@opencode_weave/weave 0.7.0 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/agent-builder.d.ts +22 -0
- package/dist/config/schema.d.ts +5 -0
- package/dist/features/evals/baseline.d.ts +4 -0
- package/dist/features/evals/evaluators/deterministic.d.ts +2 -0
- package/dist/features/evals/evaluators/llm-judge.d.ts +2 -0
- package/dist/features/evals/executors/model-response.d.ts +2 -0
- package/dist/features/evals/executors/prompt-renderer.d.ts +2 -0
- package/dist/features/evals/index.d.ts +24 -0
- package/dist/features/evals/loader.d.ts +8 -0
- package/dist/features/evals/reporter.d.ts +2 -0
- package/dist/features/evals/runner.d.ts +7 -0
- package/dist/features/evals/schema.d.ts +478 -0
- package/dist/features/evals/storage.d.ts +7 -0
- package/dist/features/evals/targets/builtin-agent-target.d.ts +2 -0
- package/dist/features/evals/types.d.ts +223 -0
- package/dist/features/task-system/index.d.ts +6 -0
- package/dist/features/task-system/storage.d.ts +38 -0
- package/dist/features/task-system/todo-sync.d.ts +38 -0
- package/dist/features/task-system/tools/index.d.ts +3 -0
- package/dist/features/task-system/tools/task-create.d.ts +9 -0
- package/dist/features/task-system/tools/task-list.d.ts +5 -0
- package/dist/features/task-system/tools/task-update.d.ts +7 -0
- package/dist/features/task-system/types.d.ts +63 -0
- package/dist/index.js +471 -124
- package/dist/plugin/plugin-interface.d.ts +1 -0
- package/dist/shared/agent-display-names.d.ts +14 -0
- package/dist/shared/index.d.ts +1 -1
- package/package.json +5 -2
|
@@ -8,12 +8,34 @@ export type BuildAgentOptions = {
|
|
|
8
8
|
resolveSkills?: ResolveSkillsFn;
|
|
9
9
|
disabledAgents?: Set<string>;
|
|
10
10
|
};
|
|
11
|
+
/**
|
|
12
|
+
* Map from agent config key (lowercase) to display name variants that
|
|
13
|
+
* might appear in prompt text. Used by stripDisabledAgentReferences to
|
|
14
|
+
* remove lines that mention disabled agents.
|
|
15
|
+
*
|
|
16
|
+
* Exported for test cleanup — tests that call addBuiltinNameVariant
|
|
17
|
+
* must restore original arrays in afterEach to avoid state pollution.
|
|
18
|
+
*/
|
|
19
|
+
export declare const AGENT_NAME_VARIANTS: Record<string, string[]>;
|
|
20
|
+
/**
|
|
21
|
+
* Reset the mutable name variants map to its initial state.
|
|
22
|
+
* Used by tests to prevent cross-test state pollution.
|
|
23
|
+
*/
|
|
24
|
+
export declare function resetNameVariants(): void;
|
|
11
25
|
/**
|
|
12
26
|
* Register name variants for a custom agent so that
|
|
13
27
|
* `stripDisabledAgentReferences` can strip its references from prompts.
|
|
14
28
|
* Does not override existing (builtin) entries.
|
|
15
29
|
*/
|
|
16
30
|
export declare function registerAgentNameVariants(name: string, variants?: string[]): void;
|
|
31
|
+
/**
|
|
32
|
+
* Add additional name variants for a builtin agent.
|
|
33
|
+
* Used when a user sets a custom display_name — the custom name
|
|
34
|
+
* must be included in variants so stripDisabledAgentReferences
|
|
35
|
+
* can match it when the agent is disabled.
|
|
36
|
+
* No-op if the config key has no existing variant entry or the variant is already present.
|
|
37
|
+
*/
|
|
38
|
+
export declare function addBuiltinNameVariant(configKey: string, variant: string): void;
|
|
17
39
|
/**
|
|
18
40
|
* Remove lines from a prompt that reference disabled agents.
|
|
19
41
|
* Only strips lines where an agent name appears as a standalone concept
|
package/dist/config/schema.d.ts
CHANGED
|
@@ -17,6 +17,7 @@ export declare const AgentOverrideConfigSchema: z.ZodObject<{
|
|
|
17
17
|
all: "all";
|
|
18
18
|
}>>;
|
|
19
19
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
20
|
+
display_name: z.ZodOptional<z.ZodString>;
|
|
20
21
|
}, z.core.$strip>;
|
|
21
22
|
export declare const AgentOverridesSchema: z.ZodRecord<z.ZodString, z.ZodObject<{
|
|
22
23
|
model: z.ZodOptional<z.ZodString>;
|
|
@@ -36,6 +37,7 @@ export declare const AgentOverridesSchema: z.ZodRecord<z.ZodString, z.ZodObject<
|
|
|
36
37
|
all: "all";
|
|
37
38
|
}>>;
|
|
38
39
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
40
|
+
display_name: z.ZodOptional<z.ZodString>;
|
|
39
41
|
}, z.core.$strip>>;
|
|
40
42
|
export declare const CategoryConfigSchema: z.ZodObject<{
|
|
41
43
|
description: z.ZodOptional<z.ZodString>;
|
|
@@ -82,6 +84,7 @@ export declare const ExperimentalConfigSchema: z.ZodObject<{
|
|
|
82
84
|
plugin_load_timeout_ms: z.ZodOptional<z.ZodNumber>;
|
|
83
85
|
context_window_warning_threshold: z.ZodOptional<z.ZodNumber>;
|
|
84
86
|
context_window_critical_threshold: z.ZodOptional<z.ZodNumber>;
|
|
87
|
+
task_system: z.ZodDefault<z.ZodBoolean>;
|
|
85
88
|
}, z.core.$strip>;
|
|
86
89
|
export declare const DelegationTriggerSchema: z.ZodObject<{
|
|
87
90
|
domain: z.ZodString;
|
|
@@ -180,6 +183,7 @@ export declare const WeaveConfigSchema: z.ZodObject<{
|
|
|
180
183
|
all: "all";
|
|
181
184
|
}>>;
|
|
182
185
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
186
|
+
display_name: z.ZodOptional<z.ZodString>;
|
|
183
187
|
}, z.core.$strip>>>;
|
|
184
188
|
custom_agents: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodObject<{
|
|
185
189
|
prompt: z.ZodOptional<z.ZodString>;
|
|
@@ -255,6 +259,7 @@ export declare const WeaveConfigSchema: z.ZodObject<{
|
|
|
255
259
|
plugin_load_timeout_ms: z.ZodOptional<z.ZodNumber>;
|
|
256
260
|
context_window_warning_threshold: z.ZodOptional<z.ZodNumber>;
|
|
257
261
|
context_window_critical_threshold: z.ZodOptional<z.ZodNumber>;
|
|
262
|
+
task_system: z.ZodDefault<z.ZodBoolean>;
|
|
258
263
|
}, z.core.$strip>>;
|
|
259
264
|
workflows: z.ZodOptional<z.ZodObject<{
|
|
260
265
|
disabled_workflows: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import type { BaselineComparison, BaselineComparisonOptions, DeterministicBaseline, EvalRunResult } from "./types";
|
|
2
|
+
export declare function deriveDeterministicBaseline(run: EvalRunResult): DeterministicBaseline;
|
|
3
|
+
export declare function readDeterministicBaseline(filePath: string): DeterministicBaseline;
|
|
4
|
+
export declare function compareDeterministicBaseline(baseline: DeterministicBaseline, run: EvalRunResult, options?: BaselineComparisonOptions): BaselineComparison;
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase 1 eval harness for deterministic prompt-contract coverage.
|
|
3
|
+
*
|
|
4
|
+
* Extension points are intentionally registry-based:
|
|
5
|
+
* - add new target `kind` values in `types.ts` + `schema.ts`
|
|
6
|
+
* - add new executor handlers in `runner.ts`
|
|
7
|
+
* - add new evaluator handlers in `evaluators/`
|
|
8
|
+
* - keep `EvalRunResult` top-level keys stable for future baselines
|
|
9
|
+
*
|
|
10
|
+
* Promptfoo, if adopted later, should plug in behind executor/judge adapters.
|
|
11
|
+
*/
|
|
12
|
+
export type { EvalPhase, EvalTarget, ExecutorSpec, EvaluatorSpec, EvalSuiteManifest, EvalCase, LoadedEvalCase, LoadedEvalSuiteManifest, EvalArtifacts, AssertionResult, EvalCaseResult, EvalRunResult, EvalRunSummary, RunEvalSuiteOptions, RunnerFilters, } from "./types";
|
|
13
|
+
export { EvalCaseSchema, EvalSuiteManifestSchema, EvalRunResultSchema } from "./schema";
|
|
14
|
+
export { EvalConfigError, loadEvalSuiteManifest, loadEvalCasesForSuite, resolveSuitePath } from "./loader";
|
|
15
|
+
export { resolveBuiltinAgentTarget } from "./targets/builtin-agent-target";
|
|
16
|
+
export { executePromptRender } from "./executors/prompt-renderer";
|
|
17
|
+
export { executeModelResponse } from "./executors/model-response";
|
|
18
|
+
export { runDeterministicEvaluator } from "./evaluators/deterministic";
|
|
19
|
+
export { runLlmJudgeEvaluator } from "./evaluators/llm-judge";
|
|
20
|
+
export { deriveDeterministicBaseline, readDeterministicBaseline, compareDeterministicBaseline } from "./baseline";
|
|
21
|
+
export { ensureEvalStorageDir, getDefaultEvalRunPath, writeEvalRunResult } from "./storage";
|
|
22
|
+
export { formatEvalSummary } from "./reporter";
|
|
23
|
+
export type { RunEvalSuiteOutput } from "./runner";
|
|
24
|
+
export { runEvalSuite } from "./runner";
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { LoadedEvalCase, LoadedEvalSuiteManifest } from "./types";
|
|
2
|
+
export declare class EvalConfigError extends Error {
|
|
3
|
+
constructor(message: string);
|
|
4
|
+
}
|
|
5
|
+
export declare function resolveSuitePath(directory: string, suite: string): string;
|
|
6
|
+
export declare function loadEvalSuiteManifest(directory: string, suite: string): LoadedEvalSuiteManifest;
|
|
7
|
+
export declare function loadEvalCaseFile(directory: string, filePath: string): LoadedEvalCase;
|
|
8
|
+
export declare function loadEvalCasesForSuite(directory: string, suite: LoadedEvalSuiteManifest): LoadedEvalCase[];
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { EvalRunResult, RunEvalSuiteOptions } from "./types";
|
|
2
|
+
export interface RunEvalSuiteOutput {
|
|
3
|
+
result: EvalRunResult;
|
|
4
|
+
artifactPath: string;
|
|
5
|
+
consoleSummary: string;
|
|
6
|
+
}
|
|
7
|
+
export declare function runEvalSuite(options: RunEvalSuiteOptions): RunEvalSuiteOutput;
|
|
@@ -0,0 +1,478 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
export declare const EvalPhaseSchema: z.ZodEnum<{
|
|
3
|
+
phase1: "phase1";
|
|
4
|
+
phase2: "phase2";
|
|
5
|
+
phase3: "phase3";
|
|
6
|
+
phase4: "phase4";
|
|
7
|
+
}>;
|
|
8
|
+
export declare const BuiltinAgentPromptVariantSchema: z.ZodObject<{
|
|
9
|
+
disabledAgents: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
10
|
+
}, z.core.$strip>;
|
|
11
|
+
export declare const BuiltinAgentPromptTargetSchema: z.ZodObject<{
|
|
12
|
+
kind: z.ZodLiteral<"builtin-agent-prompt">;
|
|
13
|
+
agent: z.ZodEnum<{
|
|
14
|
+
pattern: "pattern";
|
|
15
|
+
loom: "loom";
|
|
16
|
+
tapestry: "tapestry";
|
|
17
|
+
thread: "thread";
|
|
18
|
+
spindle: "spindle";
|
|
19
|
+
weft: "weft";
|
|
20
|
+
warp: "warp";
|
|
21
|
+
}>;
|
|
22
|
+
variant: z.ZodOptional<z.ZodObject<{
|
|
23
|
+
disabledAgents: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
24
|
+
}, z.core.$strip>>;
|
|
25
|
+
}, z.core.$strip>;
|
|
26
|
+
export declare const CustomAgentPromptTargetSchema: z.ZodObject<{
|
|
27
|
+
kind: z.ZodLiteral<"custom-agent-prompt">;
|
|
28
|
+
agentId: z.ZodString;
|
|
29
|
+
}, z.core.$strip>;
|
|
30
|
+
export declare const SingleTurnAgentTargetSchema: z.ZodObject<{
|
|
31
|
+
kind: z.ZodLiteral<"single-turn-agent">;
|
|
32
|
+
agent: z.ZodString;
|
|
33
|
+
input: z.ZodOptional<z.ZodString>;
|
|
34
|
+
}, z.core.$strip>;
|
|
35
|
+
export declare const TrajectoryAgentTargetSchema: z.ZodObject<{
|
|
36
|
+
kind: z.ZodLiteral<"trajectory-agent">;
|
|
37
|
+
agent: z.ZodString;
|
|
38
|
+
scenarioRef: z.ZodOptional<z.ZodString>;
|
|
39
|
+
}, z.core.$strip>;
|
|
40
|
+
export declare const EvalTargetSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
41
|
+
kind: z.ZodLiteral<"builtin-agent-prompt">;
|
|
42
|
+
agent: z.ZodEnum<{
|
|
43
|
+
pattern: "pattern";
|
|
44
|
+
loom: "loom";
|
|
45
|
+
tapestry: "tapestry";
|
|
46
|
+
thread: "thread";
|
|
47
|
+
spindle: "spindle";
|
|
48
|
+
weft: "weft";
|
|
49
|
+
warp: "warp";
|
|
50
|
+
}>;
|
|
51
|
+
variant: z.ZodOptional<z.ZodObject<{
|
|
52
|
+
disabledAgents: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
53
|
+
}, z.core.$strip>>;
|
|
54
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
55
|
+
kind: z.ZodLiteral<"custom-agent-prompt">;
|
|
56
|
+
agentId: z.ZodString;
|
|
57
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
58
|
+
kind: z.ZodLiteral<"single-turn-agent">;
|
|
59
|
+
agent: z.ZodString;
|
|
60
|
+
input: z.ZodOptional<z.ZodString>;
|
|
61
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
62
|
+
kind: z.ZodLiteral<"trajectory-agent">;
|
|
63
|
+
agent: z.ZodString;
|
|
64
|
+
scenarioRef: z.ZodOptional<z.ZodString>;
|
|
65
|
+
}, z.core.$strip>], "kind">;
|
|
66
|
+
export declare const PromptRenderExecutorSchema: z.ZodObject<{
|
|
67
|
+
kind: z.ZodLiteral<"prompt-render">;
|
|
68
|
+
}, z.core.$strip>;
|
|
69
|
+
export declare const ModelResponseExecutorSchema: z.ZodObject<{
|
|
70
|
+
kind: z.ZodLiteral<"model-response">;
|
|
71
|
+
provider: z.ZodString;
|
|
72
|
+
model: z.ZodString;
|
|
73
|
+
input: z.ZodString;
|
|
74
|
+
}, z.core.$strip>;
|
|
75
|
+
export declare const TrajectoryRunExecutorSchema: z.ZodObject<{
|
|
76
|
+
kind: z.ZodLiteral<"trajectory-run">;
|
|
77
|
+
scenarioRef: z.ZodString;
|
|
78
|
+
}, z.core.$strip>;
|
|
79
|
+
export declare const ExecutorSpecSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
80
|
+
kind: z.ZodLiteral<"prompt-render">;
|
|
81
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
82
|
+
kind: z.ZodLiteral<"model-response">;
|
|
83
|
+
provider: z.ZodString;
|
|
84
|
+
model: z.ZodString;
|
|
85
|
+
input: z.ZodString;
|
|
86
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
87
|
+
kind: z.ZodLiteral<"trajectory-run">;
|
|
88
|
+
scenarioRef: z.ZodString;
|
|
89
|
+
}, z.core.$strip>], "kind">;
|
|
90
|
+
export declare const ContainsAllEvaluatorSchema: z.ZodObject<{
|
|
91
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
92
|
+
kind: z.ZodLiteral<"contains-all">;
|
|
93
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
94
|
+
}, z.core.$strip>;
|
|
95
|
+
export declare const ContainsAnyEvaluatorSchema: z.ZodObject<{
|
|
96
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
97
|
+
kind: z.ZodLiteral<"contains-any">;
|
|
98
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
99
|
+
}, z.core.$strip>;
|
|
100
|
+
export declare const ExcludesAllEvaluatorSchema: z.ZodObject<{
|
|
101
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
102
|
+
kind: z.ZodLiteral<"excludes-all">;
|
|
103
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
104
|
+
}, z.core.$strip>;
|
|
105
|
+
export declare const SectionContainsAllEvaluatorSchema: z.ZodObject<{
|
|
106
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
107
|
+
kind: z.ZodLiteral<"section-contains-all">;
|
|
108
|
+
section: z.ZodString;
|
|
109
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
110
|
+
}, z.core.$strip>;
|
|
111
|
+
export declare const OrderedContainsEvaluatorSchema: z.ZodObject<{
|
|
112
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
113
|
+
kind: z.ZodLiteral<"ordered-contains">;
|
|
114
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
115
|
+
}, z.core.$strip>;
|
|
116
|
+
export declare const XmlSectionsPresentEvaluatorSchema: z.ZodObject<{
|
|
117
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
118
|
+
kind: z.ZodLiteral<"xml-sections-present">;
|
|
119
|
+
sections: z.ZodArray<z.ZodString>;
|
|
120
|
+
}, z.core.$strip>;
|
|
121
|
+
export declare const ToolPolicyEvaluatorSchema: z.ZodObject<{
|
|
122
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
123
|
+
kind: z.ZodLiteral<"tool-policy">;
|
|
124
|
+
expectations: z.ZodRecord<z.ZodString, z.ZodBoolean>;
|
|
125
|
+
}, z.core.$strip>;
|
|
126
|
+
export declare const MinLengthEvaluatorSchema: z.ZodObject<{
|
|
127
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
128
|
+
kind: z.ZodLiteral<"min-length">;
|
|
129
|
+
min: z.ZodNumber;
|
|
130
|
+
}, z.core.$strip>;
|
|
131
|
+
export declare const LlmJudgeEvaluatorSchema: z.ZodObject<{
|
|
132
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
133
|
+
kind: z.ZodLiteral<"llm-judge">;
|
|
134
|
+
rubricRef: z.ZodOptional<z.ZodString>;
|
|
135
|
+
expectedContains: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
136
|
+
forbiddenContains: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
137
|
+
}, z.core.$strip>;
|
|
138
|
+
export declare const BaselineDiffEvaluatorSchema: z.ZodObject<{
|
|
139
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
140
|
+
kind: z.ZodLiteral<"baseline-diff">;
|
|
141
|
+
baselineRef: z.ZodOptional<z.ZodString>;
|
|
142
|
+
}, z.core.$strip>;
|
|
143
|
+
export declare const TrajectoryAssertionEvaluatorSchema: z.ZodObject<{
|
|
144
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
145
|
+
kind: z.ZodLiteral<"trajectory-assertion">;
|
|
146
|
+
assertionRef: z.ZodOptional<z.ZodString>;
|
|
147
|
+
}, z.core.$strip>;
|
|
148
|
+
export declare const EvaluatorSpecSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
149
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
150
|
+
kind: z.ZodLiteral<"contains-all">;
|
|
151
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
152
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
153
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
154
|
+
kind: z.ZodLiteral<"contains-any">;
|
|
155
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
156
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
157
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
158
|
+
kind: z.ZodLiteral<"excludes-all">;
|
|
159
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
160
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
161
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
162
|
+
kind: z.ZodLiteral<"section-contains-all">;
|
|
163
|
+
section: z.ZodString;
|
|
164
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
165
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
166
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
167
|
+
kind: z.ZodLiteral<"ordered-contains">;
|
|
168
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
169
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
170
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
171
|
+
kind: z.ZodLiteral<"xml-sections-present">;
|
|
172
|
+
sections: z.ZodArray<z.ZodString>;
|
|
173
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
174
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
175
|
+
kind: z.ZodLiteral<"tool-policy">;
|
|
176
|
+
expectations: z.ZodRecord<z.ZodString, z.ZodBoolean>;
|
|
177
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
178
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
179
|
+
kind: z.ZodLiteral<"min-length">;
|
|
180
|
+
min: z.ZodNumber;
|
|
181
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
182
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
183
|
+
kind: z.ZodLiteral<"llm-judge">;
|
|
184
|
+
rubricRef: z.ZodOptional<z.ZodString>;
|
|
185
|
+
expectedContains: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
186
|
+
forbiddenContains: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
187
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
188
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
189
|
+
kind: z.ZodLiteral<"baseline-diff">;
|
|
190
|
+
baselineRef: z.ZodOptional<z.ZodString>;
|
|
191
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
192
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
193
|
+
kind: z.ZodLiteral<"trajectory-assertion">;
|
|
194
|
+
assertionRef: z.ZodOptional<z.ZodString>;
|
|
195
|
+
}, z.core.$strip>], "kind">;
|
|
196
|
+
export declare const EvalCaseSchema: z.ZodObject<{
|
|
197
|
+
id: z.ZodString;
|
|
198
|
+
title: z.ZodString;
|
|
199
|
+
phase: z.ZodEnum<{
|
|
200
|
+
phase1: "phase1";
|
|
201
|
+
phase2: "phase2";
|
|
202
|
+
phase3: "phase3";
|
|
203
|
+
phase4: "phase4";
|
|
204
|
+
}>;
|
|
205
|
+
target: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
206
|
+
kind: z.ZodLiteral<"builtin-agent-prompt">;
|
|
207
|
+
agent: z.ZodEnum<{
|
|
208
|
+
pattern: "pattern";
|
|
209
|
+
loom: "loom";
|
|
210
|
+
tapestry: "tapestry";
|
|
211
|
+
thread: "thread";
|
|
212
|
+
spindle: "spindle";
|
|
213
|
+
weft: "weft";
|
|
214
|
+
warp: "warp";
|
|
215
|
+
}>;
|
|
216
|
+
variant: z.ZodOptional<z.ZodObject<{
|
|
217
|
+
disabledAgents: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
218
|
+
}, z.core.$strip>>;
|
|
219
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
220
|
+
kind: z.ZodLiteral<"custom-agent-prompt">;
|
|
221
|
+
agentId: z.ZodString;
|
|
222
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
223
|
+
kind: z.ZodLiteral<"single-turn-agent">;
|
|
224
|
+
agent: z.ZodString;
|
|
225
|
+
input: z.ZodOptional<z.ZodString>;
|
|
226
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
227
|
+
kind: z.ZodLiteral<"trajectory-agent">;
|
|
228
|
+
agent: z.ZodString;
|
|
229
|
+
scenarioRef: z.ZodOptional<z.ZodString>;
|
|
230
|
+
}, z.core.$strip>], "kind">;
|
|
231
|
+
executor: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
232
|
+
kind: z.ZodLiteral<"prompt-render">;
|
|
233
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
234
|
+
kind: z.ZodLiteral<"model-response">;
|
|
235
|
+
provider: z.ZodString;
|
|
236
|
+
model: z.ZodString;
|
|
237
|
+
input: z.ZodString;
|
|
238
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
239
|
+
kind: z.ZodLiteral<"trajectory-run">;
|
|
240
|
+
scenarioRef: z.ZodString;
|
|
241
|
+
}, z.core.$strip>], "kind">;
|
|
242
|
+
evaluators: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
243
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
244
|
+
kind: z.ZodLiteral<"contains-all">;
|
|
245
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
246
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
247
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
248
|
+
kind: z.ZodLiteral<"contains-any">;
|
|
249
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
250
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
251
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
252
|
+
kind: z.ZodLiteral<"excludes-all">;
|
|
253
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
254
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
255
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
256
|
+
kind: z.ZodLiteral<"section-contains-all">;
|
|
257
|
+
section: z.ZodString;
|
|
258
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
259
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
260
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
261
|
+
kind: z.ZodLiteral<"ordered-contains">;
|
|
262
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
263
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
264
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
265
|
+
kind: z.ZodLiteral<"xml-sections-present">;
|
|
266
|
+
sections: z.ZodArray<z.ZodString>;
|
|
267
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
268
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
269
|
+
kind: z.ZodLiteral<"tool-policy">;
|
|
270
|
+
expectations: z.ZodRecord<z.ZodString, z.ZodBoolean>;
|
|
271
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
272
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
273
|
+
kind: z.ZodLiteral<"min-length">;
|
|
274
|
+
min: z.ZodNumber;
|
|
275
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
276
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
277
|
+
kind: z.ZodLiteral<"llm-judge">;
|
|
278
|
+
rubricRef: z.ZodOptional<z.ZodString>;
|
|
279
|
+
expectedContains: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
280
|
+
forbiddenContains: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
281
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
282
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
283
|
+
kind: z.ZodLiteral<"baseline-diff">;
|
|
284
|
+
baselineRef: z.ZodOptional<z.ZodString>;
|
|
285
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
286
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
287
|
+
kind: z.ZodLiteral<"trajectory-assertion">;
|
|
288
|
+
assertionRef: z.ZodOptional<z.ZodString>;
|
|
289
|
+
}, z.core.$strip>], "kind">>;
|
|
290
|
+
tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
291
|
+
notes: z.ZodOptional<z.ZodString>;
|
|
292
|
+
}, z.core.$strip>;
|
|
293
|
+
export declare const EvalSuiteManifestSchema: z.ZodObject<{
|
|
294
|
+
id: z.ZodString;
|
|
295
|
+
title: z.ZodString;
|
|
296
|
+
phase: z.ZodEnum<{
|
|
297
|
+
phase1: "phase1";
|
|
298
|
+
phase2: "phase2";
|
|
299
|
+
phase3: "phase3";
|
|
300
|
+
phase4: "phase4";
|
|
301
|
+
}>;
|
|
302
|
+
caseFiles: z.ZodArray<z.ZodString>;
|
|
303
|
+
tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
304
|
+
}, z.core.$strip>;
|
|
305
|
+
export declare const AssertionResultSchema: z.ZodObject<{
|
|
306
|
+
evaluatorKind: z.ZodEnum<{
|
|
307
|
+
"contains-all": "contains-all";
|
|
308
|
+
"contains-any": "contains-any";
|
|
309
|
+
"excludes-all": "excludes-all";
|
|
310
|
+
"section-contains-all": "section-contains-all";
|
|
311
|
+
"ordered-contains": "ordered-contains";
|
|
312
|
+
"xml-sections-present": "xml-sections-present";
|
|
313
|
+
"tool-policy": "tool-policy";
|
|
314
|
+
"min-length": "min-length";
|
|
315
|
+
"llm-judge": "llm-judge";
|
|
316
|
+
"baseline-diff": "baseline-diff";
|
|
317
|
+
"trajectory-assertion": "trajectory-assertion";
|
|
318
|
+
}>;
|
|
319
|
+
passed: z.ZodBoolean;
|
|
320
|
+
score: z.ZodNumber;
|
|
321
|
+
maxScore: z.ZodNumber;
|
|
322
|
+
message: z.ZodString;
|
|
323
|
+
}, z.core.$strip>;
|
|
324
|
+
export declare const EvalArtifactsSchema: z.ZodObject<{
|
|
325
|
+
renderedPrompt: z.ZodOptional<z.ZodString>;
|
|
326
|
+
agentMetadata: z.ZodOptional<z.ZodObject<{
|
|
327
|
+
agent: z.ZodString;
|
|
328
|
+
description: z.ZodOptional<z.ZodString>;
|
|
329
|
+
sourceKind: z.ZodEnum<{
|
|
330
|
+
default: "default";
|
|
331
|
+
composer: "composer";
|
|
332
|
+
}>;
|
|
333
|
+
}, z.core.$strip>>;
|
|
334
|
+
toolPolicy: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodBoolean>>;
|
|
335
|
+
promptLength: z.ZodOptional<z.ZodNumber>;
|
|
336
|
+
modelOutput: z.ZodOptional<z.ZodString>;
|
|
337
|
+
judgeOutput: z.ZodOptional<z.ZodString>;
|
|
338
|
+
trace: z.ZodOptional<z.ZodUnknown>;
|
|
339
|
+
tokens: z.ZodOptional<z.ZodNumber>;
|
|
340
|
+
cost: z.ZodOptional<z.ZodNumber>;
|
|
341
|
+
baselineDelta: z.ZodOptional<z.ZodUnknown>;
|
|
342
|
+
}, z.core.$strip>;
|
|
343
|
+
export declare const EvalCaseResultSchema: z.ZodObject<{
|
|
344
|
+
caseId: z.ZodString;
|
|
345
|
+
status: z.ZodEnum<{
|
|
346
|
+
error: "error";
|
|
347
|
+
failed: "failed";
|
|
348
|
+
passed: "passed";
|
|
349
|
+
}>;
|
|
350
|
+
score: z.ZodNumber;
|
|
351
|
+
normalizedScore: z.ZodNumber;
|
|
352
|
+
maxScore: z.ZodNumber;
|
|
353
|
+
durationMs: z.ZodNumber;
|
|
354
|
+
artifacts: z.ZodObject<{
|
|
355
|
+
renderedPrompt: z.ZodOptional<z.ZodString>;
|
|
356
|
+
agentMetadata: z.ZodOptional<z.ZodObject<{
|
|
357
|
+
agent: z.ZodString;
|
|
358
|
+
description: z.ZodOptional<z.ZodString>;
|
|
359
|
+
sourceKind: z.ZodEnum<{
|
|
360
|
+
default: "default";
|
|
361
|
+
composer: "composer";
|
|
362
|
+
}>;
|
|
363
|
+
}, z.core.$strip>>;
|
|
364
|
+
toolPolicy: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodBoolean>>;
|
|
365
|
+
promptLength: z.ZodOptional<z.ZodNumber>;
|
|
366
|
+
modelOutput: z.ZodOptional<z.ZodString>;
|
|
367
|
+
judgeOutput: z.ZodOptional<z.ZodString>;
|
|
368
|
+
trace: z.ZodOptional<z.ZodUnknown>;
|
|
369
|
+
tokens: z.ZodOptional<z.ZodNumber>;
|
|
370
|
+
cost: z.ZodOptional<z.ZodNumber>;
|
|
371
|
+
baselineDelta: z.ZodOptional<z.ZodUnknown>;
|
|
372
|
+
}, z.core.$strip>;
|
|
373
|
+
assertionResults: z.ZodArray<z.ZodObject<{
|
|
374
|
+
evaluatorKind: z.ZodEnum<{
|
|
375
|
+
"contains-all": "contains-all";
|
|
376
|
+
"contains-any": "contains-any";
|
|
377
|
+
"excludes-all": "excludes-all";
|
|
378
|
+
"section-contains-all": "section-contains-all";
|
|
379
|
+
"ordered-contains": "ordered-contains";
|
|
380
|
+
"xml-sections-present": "xml-sections-present";
|
|
381
|
+
"tool-policy": "tool-policy";
|
|
382
|
+
"min-length": "min-length";
|
|
383
|
+
"llm-judge": "llm-judge";
|
|
384
|
+
"baseline-diff": "baseline-diff";
|
|
385
|
+
"trajectory-assertion": "trajectory-assertion";
|
|
386
|
+
}>;
|
|
387
|
+
passed: z.ZodBoolean;
|
|
388
|
+
score: z.ZodNumber;
|
|
389
|
+
maxScore: z.ZodNumber;
|
|
390
|
+
message: z.ZodString;
|
|
391
|
+
}, z.core.$strip>>;
|
|
392
|
+
errors: z.ZodArray<z.ZodString>;
|
|
393
|
+
}, z.core.$strip>;
|
|
394
|
+
export declare const EvalRunSummarySchema: z.ZodObject<{
|
|
395
|
+
totalCases: z.ZodNumber;
|
|
396
|
+
passedCases: z.ZodNumber;
|
|
397
|
+
failedCases: z.ZodNumber;
|
|
398
|
+
errorCases: z.ZodNumber;
|
|
399
|
+
totalScore: z.ZodNumber;
|
|
400
|
+
normalizedScore: z.ZodNumber;
|
|
401
|
+
maxScore: z.ZodNumber;
|
|
402
|
+
}, z.core.$strip>;
|
|
403
|
+
export declare const EvalRunResultSchema: z.ZodObject<{
|
|
404
|
+
runId: z.ZodString;
|
|
405
|
+
startedAt: z.ZodString;
|
|
406
|
+
finishedAt: z.ZodString;
|
|
407
|
+
suiteId: z.ZodString;
|
|
408
|
+
phase: z.ZodEnum<{
|
|
409
|
+
phase1: "phase1";
|
|
410
|
+
phase2: "phase2";
|
|
411
|
+
phase3: "phase3";
|
|
412
|
+
phase4: "phase4";
|
|
413
|
+
}>;
|
|
414
|
+
summary: z.ZodObject<{
|
|
415
|
+
totalCases: z.ZodNumber;
|
|
416
|
+
passedCases: z.ZodNumber;
|
|
417
|
+
failedCases: z.ZodNumber;
|
|
418
|
+
errorCases: z.ZodNumber;
|
|
419
|
+
totalScore: z.ZodNumber;
|
|
420
|
+
normalizedScore: z.ZodNumber;
|
|
421
|
+
maxScore: z.ZodNumber;
|
|
422
|
+
}, z.core.$strip>;
|
|
423
|
+
caseResults: z.ZodArray<z.ZodObject<{
|
|
424
|
+
caseId: z.ZodString;
|
|
425
|
+
status: z.ZodEnum<{
|
|
426
|
+
error: "error";
|
|
427
|
+
failed: "failed";
|
|
428
|
+
passed: "passed";
|
|
429
|
+
}>;
|
|
430
|
+
score: z.ZodNumber;
|
|
431
|
+
normalizedScore: z.ZodNumber;
|
|
432
|
+
maxScore: z.ZodNumber;
|
|
433
|
+
durationMs: z.ZodNumber;
|
|
434
|
+
artifacts: z.ZodObject<{
|
|
435
|
+
renderedPrompt: z.ZodOptional<z.ZodString>;
|
|
436
|
+
agentMetadata: z.ZodOptional<z.ZodObject<{
|
|
437
|
+
agent: z.ZodString;
|
|
438
|
+
description: z.ZodOptional<z.ZodString>;
|
|
439
|
+
sourceKind: z.ZodEnum<{
|
|
440
|
+
default: "default";
|
|
441
|
+
composer: "composer";
|
|
442
|
+
}>;
|
|
443
|
+
}, z.core.$strip>>;
|
|
444
|
+
toolPolicy: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodBoolean>>;
|
|
445
|
+
promptLength: z.ZodOptional<z.ZodNumber>;
|
|
446
|
+
modelOutput: z.ZodOptional<z.ZodString>;
|
|
447
|
+
judgeOutput: z.ZodOptional<z.ZodString>;
|
|
448
|
+
trace: z.ZodOptional<z.ZodUnknown>;
|
|
449
|
+
tokens: z.ZodOptional<z.ZodNumber>;
|
|
450
|
+
cost: z.ZodOptional<z.ZodNumber>;
|
|
451
|
+
baselineDelta: z.ZodOptional<z.ZodUnknown>;
|
|
452
|
+
}, z.core.$strip>;
|
|
453
|
+
assertionResults: z.ZodArray<z.ZodObject<{
|
|
454
|
+
evaluatorKind: z.ZodEnum<{
|
|
455
|
+
"contains-all": "contains-all";
|
|
456
|
+
"contains-any": "contains-any";
|
|
457
|
+
"excludes-all": "excludes-all";
|
|
458
|
+
"section-contains-all": "section-contains-all";
|
|
459
|
+
"ordered-contains": "ordered-contains";
|
|
460
|
+
"xml-sections-present": "xml-sections-present";
|
|
461
|
+
"tool-policy": "tool-policy";
|
|
462
|
+
"min-length": "min-length";
|
|
463
|
+
"llm-judge": "llm-judge";
|
|
464
|
+
"baseline-diff": "baseline-diff";
|
|
465
|
+
"trajectory-assertion": "trajectory-assertion";
|
|
466
|
+
}>;
|
|
467
|
+
passed: z.ZodBoolean;
|
|
468
|
+
score: z.ZodNumber;
|
|
469
|
+
maxScore: z.ZodNumber;
|
|
470
|
+
message: z.ZodString;
|
|
471
|
+
}, z.core.$strip>>;
|
|
472
|
+
errors: z.ZodArray<z.ZodString>;
|
|
473
|
+
}, z.core.$strip>>;
|
|
474
|
+
}, z.core.$strip>;
|
|
475
|
+
export declare function formatSchemaIssues(filePath: string, issues: z.ZodIssue[]): string;
|
|
476
|
+
export declare const AllowedEvalTargetKinds: ("builtin-agent-prompt" | "custom-agent-prompt" | "single-turn-agent" | "trajectory-agent")[];
|
|
477
|
+
export declare const AllowedExecutorKinds: ("prompt-render" | "model-response" | "trajectory-run")[];
|
|
478
|
+
export declare const AllowedEvaluatorKinds: ("contains-all" | "contains-any" | "excludes-all" | "section-contains-all" | "ordered-contains" | "xml-sections-present" | "tool-policy" | "min-length" | "llm-judge" | "baseline-diff" | "trajectory-assertion")[];
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { EvalRunResult } from "./types";
|
|
2
|
+
export declare const EVALS_DIR = ".weave/evals";
|
|
3
|
+
export declare const EVAL_RUNS_DIR = ".weave/evals/runs";
|
|
4
|
+
export declare const EVAL_LATEST_FILE = ".weave/evals/latest.json";
|
|
5
|
+
export declare function ensureEvalStorageDir(directory: string): string;
|
|
6
|
+
export declare function getDefaultEvalRunPath(directory: string, runId: string): string;
|
|
7
|
+
export declare function writeEvalRunResult(directory: string, result: EvalRunResult, outputPath?: string): string;
|