@fabriccode/weave 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +309 -0
  3. package/dist/agents/agent-builder.d.ts +24 -0
  4. package/dist/agents/builtin-agents.d.ts +32 -0
  5. package/dist/agents/custom-agent-factory.d.ts +24 -0
  6. package/dist/agents/dynamic-prompt-builder.d.ts +35 -0
  7. package/dist/agents/index.d.ts +12 -0
  8. package/dist/agents/loom/default.d.ts +2 -0
  9. package/dist/agents/loom/index.d.ts +11 -0
  10. package/dist/agents/loom/prompt-composer.d.ts +35 -0
  11. package/dist/agents/model-resolution.d.ts +27 -0
  12. package/dist/agents/pattern/default.d.ts +2 -0
  13. package/dist/agents/pattern/index.d.ts +2 -0
  14. package/dist/agents/prompt-loader.d.ts +9 -0
  15. package/dist/agents/prompt-utils.d.ts +2 -0
  16. package/dist/agents/shuttle/default.d.ts +2 -0
  17. package/dist/agents/shuttle/index.d.ts +2 -0
  18. package/dist/agents/spindle/default.d.ts +2 -0
  19. package/dist/agents/spindle/index.d.ts +2 -0
  20. package/dist/agents/tapestry/default.d.ts +2 -0
  21. package/dist/agents/tapestry/index.d.ts +9 -0
  22. package/dist/agents/tapestry/prompt-composer.d.ts +24 -0
  23. package/dist/agents/thread/default.d.ts +2 -0
  24. package/dist/agents/thread/index.d.ts +2 -0
  25. package/dist/agents/types.d.ts +82 -0
  26. package/dist/agents/warp/default.d.ts +2 -0
  27. package/dist/agents/warp/index.d.ts +2 -0
  28. package/dist/agents/weft/default.d.ts +2 -0
  29. package/dist/agents/weft/index.d.ts +2 -0
  30. package/dist/config/index.d.ts +3 -0
  31. package/dist/config/loader.d.ts +2 -0
  32. package/dist/config/merge.d.ts +3 -0
  33. package/dist/config/schema.d.ts +274 -0
  34. package/dist/create-managers.d.ts +21 -0
  35. package/dist/create-tools.d.ts +16 -0
  36. package/dist/features/analytics/adherence.d.ts +10 -0
  37. package/dist/features/analytics/fingerprint.d.ts +33 -0
  38. package/dist/features/analytics/format-metrics.d.ts +10 -0
  39. package/dist/features/analytics/generate-metrics-report.d.ts +17 -0
  40. package/dist/features/analytics/git-diff.d.ts +7 -0
  41. package/dist/features/analytics/index.d.ts +29 -0
  42. package/dist/features/analytics/plan-parser.d.ts +7 -0
  43. package/dist/features/analytics/plan-token-aggregator.d.ts +11 -0
  44. package/dist/features/analytics/session-tracker.d.ts +68 -0
  45. package/dist/features/analytics/storage.d.ts +40 -0
  46. package/dist/features/analytics/suggestions.d.ts +10 -0
  47. package/dist/features/analytics/token-report.d.ts +14 -0
  48. package/dist/features/analytics/types.d.ts +194 -0
  49. package/dist/features/builtin-commands/commands.d.ts +2 -0
  50. package/dist/features/builtin-commands/index.d.ts +2 -0
  51. package/dist/features/builtin-commands/templates/metrics.d.ts +1 -0
  52. package/dist/features/builtin-commands/templates/run-workflow.d.ts +1 -0
  53. package/dist/features/builtin-commands/templates/start-work.d.ts +1 -0
  54. package/dist/features/builtin-commands/types.d.ts +16 -0
  55. package/dist/features/evals/baseline.d.ts +4 -0
  56. package/dist/features/evals/evaluators/deterministic.d.ts +2 -0
  57. package/dist/features/evals/evaluators/llm-judge.d.ts +2 -0
  58. package/dist/features/evals/executors/model-response.d.ts +2 -0
  59. package/dist/features/evals/executors/prompt-renderer.d.ts +2 -0
  60. package/dist/features/evals/index.d.ts +24 -0
  61. package/dist/features/evals/loader.d.ts +8 -0
  62. package/dist/features/evals/reporter.d.ts +2 -0
  63. package/dist/features/evals/runner.d.ts +7 -0
  64. package/dist/features/evals/schema.d.ts +478 -0
  65. package/dist/features/evals/storage.d.ts +7 -0
  66. package/dist/features/evals/targets/builtin-agent-target.d.ts +2 -0
  67. package/dist/features/evals/types.d.ts +223 -0
  68. package/dist/features/skill-loader/discovery.d.ts +12 -0
  69. package/dist/features/skill-loader/fabric-client.d.ts +2 -0
  70. package/dist/features/skill-loader/index.d.ts +6 -0
  71. package/dist/features/skill-loader/loader.d.ts +7 -0
  72. package/dist/features/skill-loader/resolver.d.ts +6 -0
  73. package/dist/features/skill-loader/types.d.ts +18 -0
  74. package/dist/features/work-state/constants.d.ts +8 -0
  75. package/dist/features/work-state/index.d.ts +5 -0
  76. package/dist/features/work-state/storage.d.ts +53 -0
  77. package/dist/features/work-state/types.d.ts +35 -0
  78. package/dist/features/work-state/validation-types.d.ts +26 -0
  79. package/dist/features/work-state/validation.d.ts +9 -0
  80. package/dist/features/workflow/commands.d.ts +17 -0
  81. package/dist/features/workflow/completion.d.ts +31 -0
  82. package/dist/features/workflow/constants.d.ts +12 -0
  83. package/dist/features/workflow/context.d.ts +16 -0
  84. package/dist/features/workflow/discovery.d.ts +19 -0
  85. package/dist/features/workflow/engine.d.ts +49 -0
  86. package/dist/features/workflow/hook.d.ts +47 -0
  87. package/dist/features/workflow/index.d.ts +15 -0
  88. package/dist/features/workflow/schema.d.ts +118 -0
  89. package/dist/features/workflow/storage.d.ts +51 -0
  90. package/dist/features/workflow/types.d.ts +142 -0
  91. package/dist/hooks/context-window-monitor.d.ts +19 -0
  92. package/dist/hooks/create-hooks.d.ts +40 -0
  93. package/dist/hooks/first-message-variant.d.ts +5 -0
  94. package/dist/hooks/index.d.ts +14 -0
  95. package/dist/hooks/keyword-detector.d.ts +8 -0
  96. package/dist/hooks/pattern-md-only.d.ts +13 -0
  97. package/dist/hooks/rules-injector.d.ts +6 -0
  98. package/dist/hooks/session-token-state.d.ts +42 -0
  99. package/dist/hooks/start-work-hook.d.ts +25 -0
  100. package/dist/hooks/verification-reminder.d.ts +22 -0
  101. package/dist/hooks/work-continuation.d.ts +26 -0
  102. package/dist/hooks/write-existing-file-guard.d.ts +14 -0
  103. package/dist/index.d.ts +5 -0
  104. package/dist/index.js +5585 -0
  105. package/dist/managers/background-manager.d.ts +88 -0
  106. package/dist/managers/config-handler.d.ts +54 -0
  107. package/dist/managers/index.d.ts +6 -0
  108. package/dist/managers/skill-mcp-manager.d.ts +30 -0
  109. package/dist/plugin/index.d.ts +1 -0
  110. package/dist/plugin/plugin-interface.d.ts +17 -0
  111. package/dist/plugin/types.d.ts +5 -0
  112. package/dist/shared/agent-display-names.d.ts +31 -0
  113. package/dist/shared/index.d.ts +5 -0
  114. package/dist/shared/log.d.ts +11 -0
  115. package/dist/shared/types.d.ts +6 -0
  116. package/dist/shared/version.d.ts +5 -0
  117. package/dist/tools/index.d.ts +4 -0
  118. package/dist/tools/permissions.d.ts +18 -0
  119. package/dist/tools/registry.d.ts +29 -0
  120. package/package.json +55 -0
@@ -0,0 +1,40 @@
1
+ import type { SessionSummary, ProjectFingerprint, MetricsReport } from "./types";
2
+ /** Maximum number of session summary entries to keep in the JSONL file */
3
+ export declare const MAX_SESSION_ENTRIES = 1000;
4
+ /**
5
+ * Ensure the analytics directory exists, creating it if needed.
6
+ * Returns the absolute path to the analytics directory.
7
+ */
8
+ export declare function ensureAnalyticsDir(directory: string): string;
9
+ /**
10
+ * Append a session summary to the JSONL file.
11
+ * Auto-creates the analytics directory if needed.
12
+ * Rotates the file to at most MAX_SESSION_ENTRIES when the threshold is exceeded.
13
+ */
14
+ export declare function appendSessionSummary(directory: string, summary: SessionSummary): boolean;
15
+ /**
16
+ * Read all session summaries from the JSONL file.
17
+ * Returns an empty array if the file doesn't exist or is unparseable.
18
+ */
19
+ export declare function readSessionSummaries(directory: string): SessionSummary[];
20
+ /**
21
+ * Write a project fingerprint to the analytics directory.
22
+ * Auto-creates the analytics directory if needed.
23
+ */
24
+ export declare function writeFingerprint(directory: string, fingerprint: ProjectFingerprint): boolean;
25
+ /**
26
+ * Read the project fingerprint from the analytics directory.
27
+ * Returns null if the file doesn't exist or is unparseable.
28
+ */
29
+ export declare function readFingerprint(directory: string): ProjectFingerprint | null;
30
+ /**
31
+ * Write a metrics report to the JSONL file.
32
+ * Auto-creates the analytics directory if needed.
33
+ * Appends the report and rotates if exceeding MAX_METRICS_ENTRIES.
34
+ */
35
+ export declare function writeMetricsReport(directory: string, report: MetricsReport): boolean;
36
+ /**
37
+ * Read all metrics reports from the JSONL file.
38
+ * Returns an empty array if the file doesn't exist or is unparseable.
39
+ */
40
+ export declare function readMetricsReports(directory: string): MetricsReport[];
@@ -0,0 +1,10 @@
1
+ import type { SessionSummary, Suggestion } from "./types";
2
+ /**
3
+ * Generate suggestions based on session history.
4
+ * Analyzes tool usage patterns, delegation frequency, and workflow patterns.
5
+ */
6
+ export declare function generateSuggestions(summaries: SessionSummary[]): Suggestion[];
7
+ /**
8
+ * Generate suggestions from stored session summaries for a project.
9
+ */
10
+ export declare function getSuggestionsForProject(directory: string): Suggestion[];
@@ -0,0 +1,14 @@
1
+ import type { SessionSummary } from "./types";
2
+ /**
3
+ * Generate a human-readable token usage and cost report from session summaries.
4
+ *
5
+ * Sections:
6
+ * 1. Overall Totals — aggregate metrics across all sessions
7
+ * 2. Per-Agent Breakdown — grouped by agentName, sorted by total cost
8
+ * 3. Top 5 Costliest Sessions — most expensive individual sessions
9
+ */
10
+ export declare function generateTokenReport(summaries: SessionSummary[]): string;
11
+ /**
12
+ * Convenience function: read summaries from disk and generate the report.
13
+ */
14
+ export declare function getTokenReport(directory: string): string;
@@ -0,0 +1,194 @@
1
+ /**
2
+ * Analytics types for session intelligence and learning.
3
+ * All analytics data is stored under `.weave/analytics/`.
4
+ */
5
+ /** Directory where analytics data is stored (relative to project root) */
6
+ export declare const ANALYTICS_DIR = ".weave/analytics";
7
+ /** File name for session summaries (JSONL format) */
8
+ export declare const SESSION_SUMMARIES_FILE = "session-summaries.jsonl";
9
+ /** File name for project fingerprint */
10
+ export declare const FINGERPRINT_FILE = "fingerprint.json";
11
+ /** A single tool invocation recorded during a session */
12
+ export interface ToolUsageEntry {
13
+ /** Tool name (e.g., "read", "write", "task") */
14
+ tool: string;
15
+ /** Number of times this tool was invoked */
16
+ count: number;
17
+ }
18
+ /** A delegation to a sub-agent recorded during a session */
19
+ export interface DelegationEntry {
20
+ /** Sub-agent type (e.g., "thread", "pattern", "weft") */
21
+ agent: string;
22
+ /** Tool call ID that started this delegation */
23
+ toolCallId: string;
24
+ /** Duration in milliseconds (if completed) */
25
+ durationMs?: number;
26
+ }
27
+ /** Accumulated token usage across all messages in a session */
28
+ export interface TokenUsage {
29
+ /** Total input tokens consumed */
30
+ inputTokens: number;
31
+ /** Total output tokens generated */
32
+ outputTokens: number;
33
+ /** Total reasoning tokens used */
34
+ reasoningTokens: number;
35
+ /** Total cache read tokens */
36
+ cacheReadTokens: number;
37
+ /** Total cache write tokens */
38
+ cacheWriteTokens: number;
39
+ /** Total number of assistant messages processed */
40
+ totalMessages: number;
41
+ }
42
+ /** Summary of a completed session, appended as a JSONL line */
43
+ export interface SessionSummary {
44
+ /** Unique session identifier */
45
+ sessionId: string;
46
+ /** ISO timestamp when session started */
47
+ startedAt: string;
48
+ /** ISO timestamp when session ended */
49
+ endedAt: string;
50
+ /** Duration in milliseconds */
51
+ durationMs: number;
52
+ /** Tools used during the session */
53
+ toolUsage: ToolUsageEntry[];
54
+ /** Delegations made during the session */
55
+ delegations: DelegationEntry[];
56
+ /** Total number of tool calls */
57
+ totalToolCalls: number;
58
+ /** Total number of delegations */
59
+ totalDelegations: number;
60
+ /** Display name of the agent that ran this session (e.g., "Loom (Main Orchestrator)") */
61
+ agentName?: string;
62
+ /** Total dollar cost accumulated across all messages */
63
+ totalCost?: number;
64
+ /** Aggregated token usage across all messages (absent for old entries or sessions with no messages) */
65
+ tokenUsage?: TokenUsage;
66
+ }
67
+ /** Detected language/framework in the project */
68
+ export interface DetectedStack {
69
+ /** Language or framework name (e.g., "typescript", "react", "bun") */
70
+ name: string;
71
+ /** Detection confidence: "high" if found in lockfile/config, "medium" for deps */
72
+ confidence: "high" | "medium";
73
+ /** Evidence for detection (e.g., "tsconfig.json exists") */
74
+ evidence: string;
75
+ }
76
+ /** Project fingerprint — captures the tech stack and structure */
77
+ export interface ProjectFingerprint {
78
+ /** ISO timestamp when fingerprint was generated */
79
+ generatedAt: string;
80
+ /** Detected technology stack entries */
81
+ stack: DetectedStack[];
82
+ /** Whether a monorepo structure was detected */
83
+ isMonorepo: boolean;
84
+ /** Package manager detected (e.g., "bun", "npm", "yarn", "pnpm") */
85
+ packageManager?: string;
86
+ /** Primary language detected */
87
+ primaryLanguage?: string;
88
+ /** Operating system (e.g., "darwin", "win32", "linux") */
89
+ os?: string;
90
+ /** CPU architecture (e.g., "arm64", "x64") */
91
+ arch?: string;
92
+ /** Weave version that generated this fingerprint (e.g., "0.6.3") */
93
+ weaveVersion?: string;
94
+ }
95
+ /** A suggestion generated from session analytics */
96
+ export interface Suggestion {
97
+ /** Unique identifier for deduplication */
98
+ id: string;
99
+ /** Human-readable suggestion text */
100
+ text: string;
101
+ /** Category of suggestion */
102
+ category: "tool-usage" | "delegation" | "workflow" | "token-usage";
103
+ /** Confidence level */
104
+ confidence: "high" | "medium" | "low";
105
+ }
106
+ /** File name for metrics reports (JSONL format) */
107
+ export declare const METRICS_REPORTS_FILE = "metrics-reports.jsonl";
108
+ /** Maximum number of metrics report entries to keep in the JSONL file */
109
+ export declare const MAX_METRICS_ENTRIES = 100;
110
+ /** Token usage for metrics reports (simplified field names vs session TokenUsage) */
111
+ export interface MetricsTokenUsage {
112
+ /** Total input tokens consumed */
113
+ input: number;
114
+ /** Total output tokens generated */
115
+ output: number;
116
+ /** Total reasoning tokens used */
117
+ reasoning: number;
118
+ /** Total cache read tokens */
119
+ cacheRead: number;
120
+ /** Total cache write tokens */
121
+ cacheWrite: number;
122
+ }
123
+ /** Create a zero-valued MetricsTokenUsage */
124
+ export declare function zeroTokenUsage(): MetricsTokenUsage;
125
+ /** Plan execution adherence metrics */
126
+ export interface AdherenceReport {
127
+ /** Proportion of planned files that actually changed (0-1) */
128
+ coverage: number;
129
+ /** Proportion of actual changes that were planned (0-1) */
130
+ precision: number;
131
+ /** Planned files that actually changed */
132
+ plannedFilesChanged: string[];
133
+ /** Files changed but not in the plan */
134
+ unplannedChanges: string[];
135
+ /** Planned files that did not change */
136
+ missedFiles: string[];
137
+ /** Total number of files in the plan */
138
+ totalPlannedFiles: number;
139
+ /** Total number of files actually changed */
140
+ totalActualFiles: number;
141
+ }
142
+ /** Metrics report for a completed plan */
143
+ export interface MetricsReport {
144
+ /** Plan name (from plan file basename) */
145
+ planName: string;
146
+ /** ISO timestamp when report was generated */
147
+ generatedAt: string;
148
+ /** Adherence metrics */
149
+ adherence: AdherenceReport;
150
+ /** Code quality score (Phase 2 — undefined in Phase 1) */
151
+ quality?: unknown;
152
+ /** Quality gaps (Phase 2 — undefined in Phase 1) */
153
+ gaps?: unknown;
154
+ /** Token usage across all sessions */
155
+ tokenUsage: MetricsTokenUsage;
156
+ /** Total duration of all sessions in milliseconds */
157
+ durationMs: number;
158
+ /** Number of sessions that worked on this plan */
159
+ sessionCount: number;
160
+ /** Git HEAD SHA when work started */
161
+ startSha?: string;
162
+ /** Git HEAD SHA when work ended (optional) */
163
+ endSha?: string;
164
+ /** Session IDs that contributed to this report */
165
+ sessionIds: string[];
166
+ }
167
+ /** Tracks in-flight tool calls for duration measurement */
168
+ export interface InFlightToolCall {
169
+ /** Tool name */
170
+ tool: string;
171
+ /** Start timestamp (ms since epoch) */
172
+ startedAt: number;
173
+ /** Sub-agent type if this is a task delegation */
174
+ agent?: string;
175
+ }
176
+ /** Active session being tracked */
177
+ export interface TrackedSession {
178
+ /** Session ID */
179
+ sessionId: string;
180
+ /** ISO timestamp when tracking started */
181
+ startedAt: string;
182
+ /** Tool usage counts keyed by tool name */
183
+ toolCounts: Record<string, number>;
184
+ /** Completed delegations */
185
+ delegations: DelegationEntry[];
186
+ /** In-flight tool calls keyed by callID */
187
+ inFlight: Record<string, InFlightToolCall>;
188
+ /** Display name of the agent running this session */
189
+ agentName?: string;
190
+ /** Accumulated dollar cost across all messages */
191
+ totalCost: number;
192
+ /** Cumulative token usage across all messages */
193
+ tokenUsage: TokenUsage;
194
+ }
@@ -0,0 +1,2 @@
1
+ import type { BuiltinCommand, BuiltinCommandName } from "./types";
2
+ export declare const BUILTIN_COMMANDS: Record<BuiltinCommandName, BuiltinCommand>;
@@ -0,0 +1,2 @@
1
+ export { BUILTIN_COMMANDS } from "./commands";
2
+ export type { BuiltinCommand, BuiltinCommandName } from "./types";
@@ -0,0 +1 @@
1
+ export declare const METRICS_TEMPLATE = "You are being activated by the /metrics command to present Weave analytics data to the user.\n\n## Your Mission\nPresent the injected metrics data in a clear, readable format. The data has already been loaded and formatted by the command hook \u2014 simply relay it to the user.\n\n## Instructions\n\n1. **Read the injected context below** \u2014 it contains pre-formatted metrics markdown\n2. **Present it to the user** as-is \u2014 do NOT re-fetch or recalculate anything\n3. **Answer follow-up questions** about the data if the user asks\n4. If the data indicates analytics is disabled or no data exists, relay that message directly";
@@ -0,0 +1 @@
1
+ export declare const RUN_WORKFLOW_TEMPLATE = "You are being activated by the /run-workflow command to execute a multi-step workflow.\n\n## Your Mission\nThe workflow engine will inject context below with:\n- The workflow definition to use\n- The user's goal for this workflow instance\n- The current step and its prompt\n- Context from any previously completed steps\n\nFollow the injected step prompt. When the step is complete, the workflow engine will\nautomatically advance you to the next step.\n\n## Rules\n- Focus on the current step's task only\n- Signal completion clearly (the workflow engine detects it)\n- Do NOT skip ahead to future steps\n- If you need user input, ask for it and wait";
@@ -0,0 +1 @@
1
+ export declare const START_WORK_TEMPLATE = "You are being activated by the /start-work command to execute a Weave plan.\n\n## Your Mission\nRead and execute the work plan, completing each task systematically.\n\n## Startup Procedure\n\n1. **Check for active work state**: Read `.weave/state.json` to see if there's a plan already in progress.\n2. **If resuming**: The system has injected context below with the active plan path and progress. Read the plan file, find the first unchecked `- [ ]` task, and continue from there.\n3. **If starting fresh**: The system has selected a plan and created work state. Read the plan file and begin from the first task.\n\n## Execution Loop\n\nFor each unchecked `- [ ]` task in the plan:\n\n1. **Read** the task description, acceptance criteria, and any references\n2. **Execute** the task \u2014 write code, run commands, create files as needed\n3. **Verify** the work \u2014 run tests, check for errors, validate acceptance criteria\n4. **Mark complete** \u2014 use the Edit tool to change `- [ ]` to `- [x]` in the plan file\n5. **Move on** \u2014 find the next unchecked task and repeat\n\n## Rules\n\n- Work through tasks **top to bottom** unless dependencies require a different order\n- **Verify every task** before marking it complete\n- If blocked on a task, document the reason as a comment in the plan and move to the next unblocked task\n- Report progress after each task: \"Completed task N/M: [title]\"\n- Do NOT stop until all checkboxes are checked or you are explicitly told to stop\n- After all tasks are complete, report a final summary";
@@ -0,0 +1,16 @@
1
+ /**
2
+ * A built-in command that can be invoked via /command-name in the chat.
3
+ */
4
+ export interface BuiltinCommand {
5
+ /** Command name matching the key in BUILTIN_COMMANDS (e.g., "start-work") */
6
+ name: string;
7
+ /** Human-readable description shown in command list */
8
+ description: string;
9
+ /** Agent to switch to when this command is executed */
10
+ agent: string;
11
+ /** Prompt template with $SESSION_ID, $TIMESTAMP, $ARGUMENTS placeholders */
12
+ template: string;
13
+ /** Hint shown for the argument (e.g., "[plan-name]") */
14
+ argumentHint?: string;
15
+ }
16
+ export type BuiltinCommandName = "start-work" | "token-report" | "metrics" | "run-workflow";
@@ -0,0 +1,4 @@
1
+ import type { BaselineComparison, BaselineComparisonOptions, DeterministicBaseline, EvalRunResult } from "./types";
2
+ export declare function deriveDeterministicBaseline(run: EvalRunResult): DeterministicBaseline;
3
+ export declare function readDeterministicBaseline(filePath: string): DeterministicBaseline;
4
+ export declare function compareDeterministicBaseline(baseline: DeterministicBaseline, run: EvalRunResult, options?: BaselineComparisonOptions): BaselineComparison;
@@ -0,0 +1,2 @@
1
+ import type { AssertionResult, EvalArtifacts, EvaluatorSpec } from "../types";
2
+ export declare function runDeterministicEvaluator(spec: EvaluatorSpec, artifacts: EvalArtifacts): AssertionResult[];
@@ -0,0 +1,2 @@
1
+ import type { AssertionResult, EvalArtifacts, LlmJudgeEvaluator } from "../types";
2
+ export declare function runLlmJudgeEvaluator(spec: LlmJudgeEvaluator, artifacts: EvalArtifacts): AssertionResult[];
@@ -0,0 +1,2 @@
1
+ import type { EvalArtifacts, ExecutionContext, ModelResponseExecutor, ResolvedTarget } from "../types";
2
+ export declare function executeModelResponse(resolvedTarget: ResolvedTarget, executor: ModelResponseExecutor, _context: ExecutionContext): EvalArtifacts;
@@ -0,0 +1,2 @@
1
+ import type { EvalArtifacts, ExecutionContext, ExecutorSpec, ResolvedTarget } from "../types";
2
+ export declare function executePromptRender(resolvedTarget: ResolvedTarget, executor: ExecutorSpec, _context: ExecutionContext): EvalArtifacts;
@@ -0,0 +1,24 @@
1
+ /**
2
+ * Phase 1 eval harness for deterministic prompt-contract coverage.
3
+ *
4
+ * Extension points are intentionally registry-based:
5
+ * - add new target `kind` values in `types.ts` + `schema.ts`
6
+ * - add new executor handlers in `runner.ts`
7
+ * - add new evaluator handlers in `evaluators/`
8
+ * - keep `EvalRunResult` top-level keys stable for future baselines
9
+ *
10
+ * Promptfoo, if adopted later, should plug in behind executor/judge adapters.
11
+ */
12
+ export type { EvalPhase, EvalTarget, ExecutorSpec, EvaluatorSpec, EvalSuiteManifest, EvalCase, LoadedEvalCase, LoadedEvalSuiteManifest, EvalArtifacts, AssertionResult, EvalCaseResult, EvalRunResult, EvalRunSummary, RunEvalSuiteOptions, RunnerFilters, } from "./types";
13
+ export { EvalCaseSchema, EvalSuiteManifestSchema, EvalRunResultSchema } from "./schema";
14
+ export { EvalConfigError, loadEvalSuiteManifest, loadEvalCasesForSuite, resolveSuitePath } from "./loader";
15
+ export { resolveBuiltinAgentTarget } from "./targets/builtin-agent-target";
16
+ export { executePromptRender } from "./executors/prompt-renderer";
17
+ export { executeModelResponse } from "./executors/model-response";
18
+ export { runDeterministicEvaluator } from "./evaluators/deterministic";
19
+ export { runLlmJudgeEvaluator } from "./evaluators/llm-judge";
20
+ export { deriveDeterministicBaseline, readDeterministicBaseline, compareDeterministicBaseline } from "./baseline";
21
+ export { ensureEvalStorageDir, getDefaultEvalRunPath, writeEvalRunResult } from "./storage";
22
+ export { formatEvalSummary } from "./reporter";
23
+ export type { RunEvalSuiteOutput } from "./runner";
24
+ export { runEvalSuite } from "./runner";
@@ -0,0 +1,8 @@
1
+ import type { LoadedEvalCase, LoadedEvalSuiteManifest } from "./types";
2
+ export declare class EvalConfigError extends Error {
3
+ constructor(message: string);
4
+ }
5
+ export declare function resolveSuitePath(directory: string, suite: string): string;
6
+ export declare function loadEvalSuiteManifest(directory: string, suite: string): LoadedEvalSuiteManifest;
7
+ export declare function loadEvalCaseFile(directory: string, filePath: string): LoadedEvalCase;
8
+ export declare function loadEvalCasesForSuite(directory: string, suite: LoadedEvalSuiteManifest): LoadedEvalCase[];
@@ -0,0 +1,2 @@
1
+ import type { EvalRunResult } from "./types";
2
+ export declare function formatEvalSummary(result: EvalRunResult): string;
@@ -0,0 +1,7 @@
1
+ import type { EvalRunResult, RunEvalSuiteOptions } from "./types";
2
+ export interface RunEvalSuiteOutput {
3
+ result: EvalRunResult;
4
+ artifactPath: string;
5
+ consoleSummary: string;
6
+ }
7
+ export declare function runEvalSuite(options: RunEvalSuiteOptions): RunEvalSuiteOutput;