@opencode_weave/weave 0.7.0-preview.1 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/agent-builder.d.ts +22 -0
- package/dist/config/schema.d.ts +3 -0
- package/dist/features/evals/baseline.d.ts +4 -0
- package/dist/features/evals/evaluators/deterministic.d.ts +2 -0
- package/dist/features/evals/evaluators/llm-judge.d.ts +2 -0
- package/dist/features/evals/executors/model-response.d.ts +2 -0
- package/dist/features/evals/executors/prompt-renderer.d.ts +2 -0
- package/dist/features/evals/index.d.ts +24 -0
- package/dist/features/evals/loader.d.ts +8 -0
- package/dist/features/evals/reporter.d.ts +2 -0
- package/dist/features/evals/runner.d.ts +7 -0
- package/dist/features/evals/schema.d.ts +478 -0
- package/dist/features/evals/storage.d.ts +7 -0
- package/dist/features/evals/targets/builtin-agent-target.d.ts +2 -0
- package/dist/features/evals/types.d.ts +223 -0
- package/dist/index.js +102 -4
- package/dist/shared/agent-display-names.d.ts +14 -0
- package/dist/shared/index.d.ts +1 -1
- package/package.json +5 -2
|
@@ -8,12 +8,34 @@ export type BuildAgentOptions = {
|
|
|
8
8
|
resolveSkills?: ResolveSkillsFn;
|
|
9
9
|
disabledAgents?: Set<string>;
|
|
10
10
|
};
|
|
11
|
+
/**
|
|
12
|
+
* Map from agent config key (lowercase) to display name variants that
|
|
13
|
+
* might appear in prompt text. Used by stripDisabledAgentReferences to
|
|
14
|
+
* remove lines that mention disabled agents.
|
|
15
|
+
*
|
|
16
|
+
* Exported for test cleanup — tests that call addBuiltinNameVariant
|
|
17
|
+
* must restore original arrays in afterEach to avoid state pollution.
|
|
18
|
+
*/
|
|
19
|
+
export declare const AGENT_NAME_VARIANTS: Record<string, string[]>;
|
|
20
|
+
/**
|
|
21
|
+
* Reset the mutable name variants map to its initial state.
|
|
22
|
+
* Used by tests to prevent cross-test state pollution.
|
|
23
|
+
*/
|
|
24
|
+
export declare function resetNameVariants(): void;
|
|
11
25
|
/**
|
|
12
26
|
* Register name variants for a custom agent so that
|
|
13
27
|
* `stripDisabledAgentReferences` can strip its references from prompts.
|
|
14
28
|
* Does not override existing (builtin) entries.
|
|
15
29
|
*/
|
|
16
30
|
export declare function registerAgentNameVariants(name: string, variants?: string[]): void;
|
|
31
|
+
/**
|
|
32
|
+
* Add additional name variants for a builtin agent.
|
|
33
|
+
* Used when a user sets a custom display_name — the custom name
|
|
34
|
+
* must be included in variants so stripDisabledAgentReferences
|
|
35
|
+
* can match it when the agent is disabled.
|
|
36
|
+
* No-op if the config key has no existing variant entry or the variant is already present.
|
|
37
|
+
*/
|
|
38
|
+
export declare function addBuiltinNameVariant(configKey: string, variant: string): void;
|
|
17
39
|
/**
|
|
18
40
|
* Remove lines from a prompt that reference disabled agents.
|
|
19
41
|
* Only strips lines where an agent name appears as a standalone concept
|
package/dist/config/schema.d.ts
CHANGED
|
@@ -17,6 +17,7 @@ export declare const AgentOverrideConfigSchema: z.ZodObject<{
|
|
|
17
17
|
all: "all";
|
|
18
18
|
}>>;
|
|
19
19
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
20
|
+
display_name: z.ZodOptional<z.ZodString>;
|
|
20
21
|
}, z.core.$strip>;
|
|
21
22
|
export declare const AgentOverridesSchema: z.ZodRecord<z.ZodString, z.ZodObject<{
|
|
22
23
|
model: z.ZodOptional<z.ZodString>;
|
|
@@ -36,6 +37,7 @@ export declare const AgentOverridesSchema: z.ZodRecord<z.ZodString, z.ZodObject<
|
|
|
36
37
|
all: "all";
|
|
37
38
|
}>>;
|
|
38
39
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
40
|
+
display_name: z.ZodOptional<z.ZodString>;
|
|
39
41
|
}, z.core.$strip>>;
|
|
40
42
|
export declare const CategoryConfigSchema: z.ZodObject<{
|
|
41
43
|
description: z.ZodOptional<z.ZodString>;
|
|
@@ -180,6 +182,7 @@ export declare const WeaveConfigSchema: z.ZodObject<{
|
|
|
180
182
|
all: "all";
|
|
181
183
|
}>>;
|
|
182
184
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
185
|
+
display_name: z.ZodOptional<z.ZodString>;
|
|
183
186
|
}, z.core.$strip>>>;
|
|
184
187
|
custom_agents: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodObject<{
|
|
185
188
|
prompt: z.ZodOptional<z.ZodString>;
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import type { BaselineComparison, BaselineComparisonOptions, DeterministicBaseline, EvalRunResult } from "./types";
|
|
2
|
+
export declare function deriveDeterministicBaseline(run: EvalRunResult): DeterministicBaseline;
|
|
3
|
+
export declare function readDeterministicBaseline(filePath: string): DeterministicBaseline;
|
|
4
|
+
export declare function compareDeterministicBaseline(baseline: DeterministicBaseline, run: EvalRunResult, options?: BaselineComparisonOptions): BaselineComparison;
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase 1 eval harness for deterministic prompt-contract coverage.
|
|
3
|
+
*
|
|
4
|
+
* Extension points are intentionally registry-based:
|
|
5
|
+
* - add new target `kind` values in `types.ts` + `schema.ts`
|
|
6
|
+
* - add new executor handlers in `runner.ts`
|
|
7
|
+
* - add new evaluator handlers in `evaluators/`
|
|
8
|
+
* - keep `EvalRunResult` top-level keys stable for future baselines
|
|
9
|
+
*
|
|
10
|
+
* Promptfoo, if adopted later, should plug in behind executor/judge adapters.
|
|
11
|
+
*/
|
|
12
|
+
export type { EvalPhase, EvalTarget, ExecutorSpec, EvaluatorSpec, EvalSuiteManifest, EvalCase, LoadedEvalCase, LoadedEvalSuiteManifest, EvalArtifacts, AssertionResult, EvalCaseResult, EvalRunResult, EvalRunSummary, RunEvalSuiteOptions, RunnerFilters, } from "./types";
|
|
13
|
+
export { EvalCaseSchema, EvalSuiteManifestSchema, EvalRunResultSchema } from "./schema";
|
|
14
|
+
export { EvalConfigError, loadEvalSuiteManifest, loadEvalCasesForSuite, resolveSuitePath } from "./loader";
|
|
15
|
+
export { resolveBuiltinAgentTarget } from "./targets/builtin-agent-target";
|
|
16
|
+
export { executePromptRender } from "./executors/prompt-renderer";
|
|
17
|
+
export { executeModelResponse } from "./executors/model-response";
|
|
18
|
+
export { runDeterministicEvaluator } from "./evaluators/deterministic";
|
|
19
|
+
export { runLlmJudgeEvaluator } from "./evaluators/llm-judge";
|
|
20
|
+
export { deriveDeterministicBaseline, readDeterministicBaseline, compareDeterministicBaseline } from "./baseline";
|
|
21
|
+
export { ensureEvalStorageDir, getDefaultEvalRunPath, writeEvalRunResult } from "./storage";
|
|
22
|
+
export { formatEvalSummary } from "./reporter";
|
|
23
|
+
export type { RunEvalSuiteOutput } from "./runner";
|
|
24
|
+
export { runEvalSuite } from "./runner";
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { LoadedEvalCase, LoadedEvalSuiteManifest } from "./types";
|
|
2
|
+
export declare class EvalConfigError extends Error {
|
|
3
|
+
constructor(message: string);
|
|
4
|
+
}
|
|
5
|
+
export declare function resolveSuitePath(directory: string, suite: string): string;
|
|
6
|
+
export declare function loadEvalSuiteManifest(directory: string, suite: string): LoadedEvalSuiteManifest;
|
|
7
|
+
export declare function loadEvalCaseFile(directory: string, filePath: string): LoadedEvalCase;
|
|
8
|
+
export declare function loadEvalCasesForSuite(directory: string, suite: LoadedEvalSuiteManifest): LoadedEvalCase[];
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { EvalRunResult, RunEvalSuiteOptions } from "./types";
|
|
2
|
+
export interface RunEvalSuiteOutput {
|
|
3
|
+
result: EvalRunResult;
|
|
4
|
+
artifactPath: string;
|
|
5
|
+
consoleSummary: string;
|
|
6
|
+
}
|
|
7
|
+
export declare function runEvalSuite(options: RunEvalSuiteOptions): RunEvalSuiteOutput;
|
|
@@ -0,0 +1,478 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
export declare const EvalPhaseSchema: z.ZodEnum<{
|
|
3
|
+
phase1: "phase1";
|
|
4
|
+
phase2: "phase2";
|
|
5
|
+
phase3: "phase3";
|
|
6
|
+
phase4: "phase4";
|
|
7
|
+
}>;
|
|
8
|
+
export declare const BuiltinAgentPromptVariantSchema: z.ZodObject<{
|
|
9
|
+
disabledAgents: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
10
|
+
}, z.core.$strip>;
|
|
11
|
+
export declare const BuiltinAgentPromptTargetSchema: z.ZodObject<{
|
|
12
|
+
kind: z.ZodLiteral<"builtin-agent-prompt">;
|
|
13
|
+
agent: z.ZodEnum<{
|
|
14
|
+
pattern: "pattern";
|
|
15
|
+
loom: "loom";
|
|
16
|
+
tapestry: "tapestry";
|
|
17
|
+
thread: "thread";
|
|
18
|
+
spindle: "spindle";
|
|
19
|
+
weft: "weft";
|
|
20
|
+
warp: "warp";
|
|
21
|
+
}>;
|
|
22
|
+
variant: z.ZodOptional<z.ZodObject<{
|
|
23
|
+
disabledAgents: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
24
|
+
}, z.core.$strip>>;
|
|
25
|
+
}, z.core.$strip>;
|
|
26
|
+
export declare const CustomAgentPromptTargetSchema: z.ZodObject<{
|
|
27
|
+
kind: z.ZodLiteral<"custom-agent-prompt">;
|
|
28
|
+
agentId: z.ZodString;
|
|
29
|
+
}, z.core.$strip>;
|
|
30
|
+
export declare const SingleTurnAgentTargetSchema: z.ZodObject<{
|
|
31
|
+
kind: z.ZodLiteral<"single-turn-agent">;
|
|
32
|
+
agent: z.ZodString;
|
|
33
|
+
input: z.ZodOptional<z.ZodString>;
|
|
34
|
+
}, z.core.$strip>;
|
|
35
|
+
export declare const TrajectoryAgentTargetSchema: z.ZodObject<{
|
|
36
|
+
kind: z.ZodLiteral<"trajectory-agent">;
|
|
37
|
+
agent: z.ZodString;
|
|
38
|
+
scenarioRef: z.ZodOptional<z.ZodString>;
|
|
39
|
+
}, z.core.$strip>;
|
|
40
|
+
export declare const EvalTargetSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
41
|
+
kind: z.ZodLiteral<"builtin-agent-prompt">;
|
|
42
|
+
agent: z.ZodEnum<{
|
|
43
|
+
pattern: "pattern";
|
|
44
|
+
loom: "loom";
|
|
45
|
+
tapestry: "tapestry";
|
|
46
|
+
thread: "thread";
|
|
47
|
+
spindle: "spindle";
|
|
48
|
+
weft: "weft";
|
|
49
|
+
warp: "warp";
|
|
50
|
+
}>;
|
|
51
|
+
variant: z.ZodOptional<z.ZodObject<{
|
|
52
|
+
disabledAgents: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
53
|
+
}, z.core.$strip>>;
|
|
54
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
55
|
+
kind: z.ZodLiteral<"custom-agent-prompt">;
|
|
56
|
+
agentId: z.ZodString;
|
|
57
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
58
|
+
kind: z.ZodLiteral<"single-turn-agent">;
|
|
59
|
+
agent: z.ZodString;
|
|
60
|
+
input: z.ZodOptional<z.ZodString>;
|
|
61
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
62
|
+
kind: z.ZodLiteral<"trajectory-agent">;
|
|
63
|
+
agent: z.ZodString;
|
|
64
|
+
scenarioRef: z.ZodOptional<z.ZodString>;
|
|
65
|
+
}, z.core.$strip>], "kind">;
|
|
66
|
+
export declare const PromptRenderExecutorSchema: z.ZodObject<{
|
|
67
|
+
kind: z.ZodLiteral<"prompt-render">;
|
|
68
|
+
}, z.core.$strip>;
|
|
69
|
+
export declare const ModelResponseExecutorSchema: z.ZodObject<{
|
|
70
|
+
kind: z.ZodLiteral<"model-response">;
|
|
71
|
+
provider: z.ZodString;
|
|
72
|
+
model: z.ZodString;
|
|
73
|
+
input: z.ZodString;
|
|
74
|
+
}, z.core.$strip>;
|
|
75
|
+
export declare const TrajectoryRunExecutorSchema: z.ZodObject<{
|
|
76
|
+
kind: z.ZodLiteral<"trajectory-run">;
|
|
77
|
+
scenarioRef: z.ZodString;
|
|
78
|
+
}, z.core.$strip>;
|
|
79
|
+
export declare const ExecutorSpecSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
80
|
+
kind: z.ZodLiteral<"prompt-render">;
|
|
81
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
82
|
+
kind: z.ZodLiteral<"model-response">;
|
|
83
|
+
provider: z.ZodString;
|
|
84
|
+
model: z.ZodString;
|
|
85
|
+
input: z.ZodString;
|
|
86
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
87
|
+
kind: z.ZodLiteral<"trajectory-run">;
|
|
88
|
+
scenarioRef: z.ZodString;
|
|
89
|
+
}, z.core.$strip>], "kind">;
|
|
90
|
+
export declare const ContainsAllEvaluatorSchema: z.ZodObject<{
|
|
91
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
92
|
+
kind: z.ZodLiteral<"contains-all">;
|
|
93
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
94
|
+
}, z.core.$strip>;
|
|
95
|
+
export declare const ContainsAnyEvaluatorSchema: z.ZodObject<{
|
|
96
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
97
|
+
kind: z.ZodLiteral<"contains-any">;
|
|
98
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
99
|
+
}, z.core.$strip>;
|
|
100
|
+
export declare const ExcludesAllEvaluatorSchema: z.ZodObject<{
|
|
101
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
102
|
+
kind: z.ZodLiteral<"excludes-all">;
|
|
103
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
104
|
+
}, z.core.$strip>;
|
|
105
|
+
export declare const SectionContainsAllEvaluatorSchema: z.ZodObject<{
|
|
106
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
107
|
+
kind: z.ZodLiteral<"section-contains-all">;
|
|
108
|
+
section: z.ZodString;
|
|
109
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
110
|
+
}, z.core.$strip>;
|
|
111
|
+
export declare const OrderedContainsEvaluatorSchema: z.ZodObject<{
|
|
112
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
113
|
+
kind: z.ZodLiteral<"ordered-contains">;
|
|
114
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
115
|
+
}, z.core.$strip>;
|
|
116
|
+
export declare const XmlSectionsPresentEvaluatorSchema: z.ZodObject<{
|
|
117
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
118
|
+
kind: z.ZodLiteral<"xml-sections-present">;
|
|
119
|
+
sections: z.ZodArray<z.ZodString>;
|
|
120
|
+
}, z.core.$strip>;
|
|
121
|
+
export declare const ToolPolicyEvaluatorSchema: z.ZodObject<{
|
|
122
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
123
|
+
kind: z.ZodLiteral<"tool-policy">;
|
|
124
|
+
expectations: z.ZodRecord<z.ZodString, z.ZodBoolean>;
|
|
125
|
+
}, z.core.$strip>;
|
|
126
|
+
export declare const MinLengthEvaluatorSchema: z.ZodObject<{
|
|
127
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
128
|
+
kind: z.ZodLiteral<"min-length">;
|
|
129
|
+
min: z.ZodNumber;
|
|
130
|
+
}, z.core.$strip>;
|
|
131
|
+
export declare const LlmJudgeEvaluatorSchema: z.ZodObject<{
|
|
132
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
133
|
+
kind: z.ZodLiteral<"llm-judge">;
|
|
134
|
+
rubricRef: z.ZodOptional<z.ZodString>;
|
|
135
|
+
expectedContains: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
136
|
+
forbiddenContains: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
137
|
+
}, z.core.$strip>;
|
|
138
|
+
export declare const BaselineDiffEvaluatorSchema: z.ZodObject<{
|
|
139
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
140
|
+
kind: z.ZodLiteral<"baseline-diff">;
|
|
141
|
+
baselineRef: z.ZodOptional<z.ZodString>;
|
|
142
|
+
}, z.core.$strip>;
|
|
143
|
+
export declare const TrajectoryAssertionEvaluatorSchema: z.ZodObject<{
|
|
144
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
145
|
+
kind: z.ZodLiteral<"trajectory-assertion">;
|
|
146
|
+
assertionRef: z.ZodOptional<z.ZodString>;
|
|
147
|
+
}, z.core.$strip>;
|
|
148
|
+
export declare const EvaluatorSpecSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
149
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
150
|
+
kind: z.ZodLiteral<"contains-all">;
|
|
151
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
152
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
153
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
154
|
+
kind: z.ZodLiteral<"contains-any">;
|
|
155
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
156
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
157
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
158
|
+
kind: z.ZodLiteral<"excludes-all">;
|
|
159
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
160
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
161
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
162
|
+
kind: z.ZodLiteral<"section-contains-all">;
|
|
163
|
+
section: z.ZodString;
|
|
164
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
165
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
166
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
167
|
+
kind: z.ZodLiteral<"ordered-contains">;
|
|
168
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
169
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
170
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
171
|
+
kind: z.ZodLiteral<"xml-sections-present">;
|
|
172
|
+
sections: z.ZodArray<z.ZodString>;
|
|
173
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
174
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
175
|
+
kind: z.ZodLiteral<"tool-policy">;
|
|
176
|
+
expectations: z.ZodRecord<z.ZodString, z.ZodBoolean>;
|
|
177
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
178
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
179
|
+
kind: z.ZodLiteral<"min-length">;
|
|
180
|
+
min: z.ZodNumber;
|
|
181
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
182
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
183
|
+
kind: z.ZodLiteral<"llm-judge">;
|
|
184
|
+
rubricRef: z.ZodOptional<z.ZodString>;
|
|
185
|
+
expectedContains: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
186
|
+
forbiddenContains: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
187
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
188
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
189
|
+
kind: z.ZodLiteral<"baseline-diff">;
|
|
190
|
+
baselineRef: z.ZodOptional<z.ZodString>;
|
|
191
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
192
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
193
|
+
kind: z.ZodLiteral<"trajectory-assertion">;
|
|
194
|
+
assertionRef: z.ZodOptional<z.ZodString>;
|
|
195
|
+
}, z.core.$strip>], "kind">;
|
|
196
|
+
export declare const EvalCaseSchema: z.ZodObject<{
|
|
197
|
+
id: z.ZodString;
|
|
198
|
+
title: z.ZodString;
|
|
199
|
+
phase: z.ZodEnum<{
|
|
200
|
+
phase1: "phase1";
|
|
201
|
+
phase2: "phase2";
|
|
202
|
+
phase3: "phase3";
|
|
203
|
+
phase4: "phase4";
|
|
204
|
+
}>;
|
|
205
|
+
target: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
206
|
+
kind: z.ZodLiteral<"builtin-agent-prompt">;
|
|
207
|
+
agent: z.ZodEnum<{
|
|
208
|
+
pattern: "pattern";
|
|
209
|
+
loom: "loom";
|
|
210
|
+
tapestry: "tapestry";
|
|
211
|
+
thread: "thread";
|
|
212
|
+
spindle: "spindle";
|
|
213
|
+
weft: "weft";
|
|
214
|
+
warp: "warp";
|
|
215
|
+
}>;
|
|
216
|
+
variant: z.ZodOptional<z.ZodObject<{
|
|
217
|
+
disabledAgents: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
218
|
+
}, z.core.$strip>>;
|
|
219
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
220
|
+
kind: z.ZodLiteral<"custom-agent-prompt">;
|
|
221
|
+
agentId: z.ZodString;
|
|
222
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
223
|
+
kind: z.ZodLiteral<"single-turn-agent">;
|
|
224
|
+
agent: z.ZodString;
|
|
225
|
+
input: z.ZodOptional<z.ZodString>;
|
|
226
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
227
|
+
kind: z.ZodLiteral<"trajectory-agent">;
|
|
228
|
+
agent: z.ZodString;
|
|
229
|
+
scenarioRef: z.ZodOptional<z.ZodString>;
|
|
230
|
+
}, z.core.$strip>], "kind">;
|
|
231
|
+
executor: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
232
|
+
kind: z.ZodLiteral<"prompt-render">;
|
|
233
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
234
|
+
kind: z.ZodLiteral<"model-response">;
|
|
235
|
+
provider: z.ZodString;
|
|
236
|
+
model: z.ZodString;
|
|
237
|
+
input: z.ZodString;
|
|
238
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
239
|
+
kind: z.ZodLiteral<"trajectory-run">;
|
|
240
|
+
scenarioRef: z.ZodString;
|
|
241
|
+
}, z.core.$strip>], "kind">;
|
|
242
|
+
evaluators: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
243
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
244
|
+
kind: z.ZodLiteral<"contains-all">;
|
|
245
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
246
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
247
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
248
|
+
kind: z.ZodLiteral<"contains-any">;
|
|
249
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
250
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
251
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
252
|
+
kind: z.ZodLiteral<"excludes-all">;
|
|
253
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
254
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
255
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
256
|
+
kind: z.ZodLiteral<"section-contains-all">;
|
|
257
|
+
section: z.ZodString;
|
|
258
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
259
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
260
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
261
|
+
kind: z.ZodLiteral<"ordered-contains">;
|
|
262
|
+
patterns: z.ZodArray<z.ZodString>;
|
|
263
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
264
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
265
|
+
kind: z.ZodLiteral<"xml-sections-present">;
|
|
266
|
+
sections: z.ZodArray<z.ZodString>;
|
|
267
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
268
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
269
|
+
kind: z.ZodLiteral<"tool-policy">;
|
|
270
|
+
expectations: z.ZodRecord<z.ZodString, z.ZodBoolean>;
|
|
271
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
272
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
273
|
+
kind: z.ZodLiteral<"min-length">;
|
|
274
|
+
min: z.ZodNumber;
|
|
275
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
276
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
277
|
+
kind: z.ZodLiteral<"llm-judge">;
|
|
278
|
+
rubricRef: z.ZodOptional<z.ZodString>;
|
|
279
|
+
expectedContains: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
280
|
+
forbiddenContains: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
281
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
282
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
283
|
+
kind: z.ZodLiteral<"baseline-diff">;
|
|
284
|
+
baselineRef: z.ZodOptional<z.ZodString>;
|
|
285
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
286
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
287
|
+
kind: z.ZodLiteral<"trajectory-assertion">;
|
|
288
|
+
assertionRef: z.ZodOptional<z.ZodString>;
|
|
289
|
+
}, z.core.$strip>], "kind">>;
|
|
290
|
+
tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
291
|
+
notes: z.ZodOptional<z.ZodString>;
|
|
292
|
+
}, z.core.$strip>;
|
|
293
|
+
export declare const EvalSuiteManifestSchema: z.ZodObject<{
|
|
294
|
+
id: z.ZodString;
|
|
295
|
+
title: z.ZodString;
|
|
296
|
+
phase: z.ZodEnum<{
|
|
297
|
+
phase1: "phase1";
|
|
298
|
+
phase2: "phase2";
|
|
299
|
+
phase3: "phase3";
|
|
300
|
+
phase4: "phase4";
|
|
301
|
+
}>;
|
|
302
|
+
caseFiles: z.ZodArray<z.ZodString>;
|
|
303
|
+
tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
304
|
+
}, z.core.$strip>;
|
|
305
|
+
export declare const AssertionResultSchema: z.ZodObject<{
|
|
306
|
+
evaluatorKind: z.ZodEnum<{
|
|
307
|
+
"contains-all": "contains-all";
|
|
308
|
+
"contains-any": "contains-any";
|
|
309
|
+
"excludes-all": "excludes-all";
|
|
310
|
+
"section-contains-all": "section-contains-all";
|
|
311
|
+
"ordered-contains": "ordered-contains";
|
|
312
|
+
"xml-sections-present": "xml-sections-present";
|
|
313
|
+
"tool-policy": "tool-policy";
|
|
314
|
+
"min-length": "min-length";
|
|
315
|
+
"llm-judge": "llm-judge";
|
|
316
|
+
"baseline-diff": "baseline-diff";
|
|
317
|
+
"trajectory-assertion": "trajectory-assertion";
|
|
318
|
+
}>;
|
|
319
|
+
passed: z.ZodBoolean;
|
|
320
|
+
score: z.ZodNumber;
|
|
321
|
+
maxScore: z.ZodNumber;
|
|
322
|
+
message: z.ZodString;
|
|
323
|
+
}, z.core.$strip>;
|
|
324
|
+
export declare const EvalArtifactsSchema: z.ZodObject<{
|
|
325
|
+
renderedPrompt: z.ZodOptional<z.ZodString>;
|
|
326
|
+
agentMetadata: z.ZodOptional<z.ZodObject<{
|
|
327
|
+
agent: z.ZodString;
|
|
328
|
+
description: z.ZodOptional<z.ZodString>;
|
|
329
|
+
sourceKind: z.ZodEnum<{
|
|
330
|
+
default: "default";
|
|
331
|
+
composer: "composer";
|
|
332
|
+
}>;
|
|
333
|
+
}, z.core.$strip>>;
|
|
334
|
+
toolPolicy: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodBoolean>>;
|
|
335
|
+
promptLength: z.ZodOptional<z.ZodNumber>;
|
|
336
|
+
modelOutput: z.ZodOptional<z.ZodString>;
|
|
337
|
+
judgeOutput: z.ZodOptional<z.ZodString>;
|
|
338
|
+
trace: z.ZodOptional<z.ZodUnknown>;
|
|
339
|
+
tokens: z.ZodOptional<z.ZodNumber>;
|
|
340
|
+
cost: z.ZodOptional<z.ZodNumber>;
|
|
341
|
+
baselineDelta: z.ZodOptional<z.ZodUnknown>;
|
|
342
|
+
}, z.core.$strip>;
|
|
343
|
+
export declare const EvalCaseResultSchema: z.ZodObject<{
|
|
344
|
+
caseId: z.ZodString;
|
|
345
|
+
status: z.ZodEnum<{
|
|
346
|
+
error: "error";
|
|
347
|
+
failed: "failed";
|
|
348
|
+
passed: "passed";
|
|
349
|
+
}>;
|
|
350
|
+
score: z.ZodNumber;
|
|
351
|
+
normalizedScore: z.ZodNumber;
|
|
352
|
+
maxScore: z.ZodNumber;
|
|
353
|
+
durationMs: z.ZodNumber;
|
|
354
|
+
artifacts: z.ZodObject<{
|
|
355
|
+
renderedPrompt: z.ZodOptional<z.ZodString>;
|
|
356
|
+
agentMetadata: z.ZodOptional<z.ZodObject<{
|
|
357
|
+
agent: z.ZodString;
|
|
358
|
+
description: z.ZodOptional<z.ZodString>;
|
|
359
|
+
sourceKind: z.ZodEnum<{
|
|
360
|
+
default: "default";
|
|
361
|
+
composer: "composer";
|
|
362
|
+
}>;
|
|
363
|
+
}, z.core.$strip>>;
|
|
364
|
+
toolPolicy: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodBoolean>>;
|
|
365
|
+
promptLength: z.ZodOptional<z.ZodNumber>;
|
|
366
|
+
modelOutput: z.ZodOptional<z.ZodString>;
|
|
367
|
+
judgeOutput: z.ZodOptional<z.ZodString>;
|
|
368
|
+
trace: z.ZodOptional<z.ZodUnknown>;
|
|
369
|
+
tokens: z.ZodOptional<z.ZodNumber>;
|
|
370
|
+
cost: z.ZodOptional<z.ZodNumber>;
|
|
371
|
+
baselineDelta: z.ZodOptional<z.ZodUnknown>;
|
|
372
|
+
}, z.core.$strip>;
|
|
373
|
+
assertionResults: z.ZodArray<z.ZodObject<{
|
|
374
|
+
evaluatorKind: z.ZodEnum<{
|
|
375
|
+
"contains-all": "contains-all";
|
|
376
|
+
"contains-any": "contains-any";
|
|
377
|
+
"excludes-all": "excludes-all";
|
|
378
|
+
"section-contains-all": "section-contains-all";
|
|
379
|
+
"ordered-contains": "ordered-contains";
|
|
380
|
+
"xml-sections-present": "xml-sections-present";
|
|
381
|
+
"tool-policy": "tool-policy";
|
|
382
|
+
"min-length": "min-length";
|
|
383
|
+
"llm-judge": "llm-judge";
|
|
384
|
+
"baseline-diff": "baseline-diff";
|
|
385
|
+
"trajectory-assertion": "trajectory-assertion";
|
|
386
|
+
}>;
|
|
387
|
+
passed: z.ZodBoolean;
|
|
388
|
+
score: z.ZodNumber;
|
|
389
|
+
maxScore: z.ZodNumber;
|
|
390
|
+
message: z.ZodString;
|
|
391
|
+
}, z.core.$strip>>;
|
|
392
|
+
errors: z.ZodArray<z.ZodString>;
|
|
393
|
+
}, z.core.$strip>;
|
|
394
|
+
export declare const EvalRunSummarySchema: z.ZodObject<{
|
|
395
|
+
totalCases: z.ZodNumber;
|
|
396
|
+
passedCases: z.ZodNumber;
|
|
397
|
+
failedCases: z.ZodNumber;
|
|
398
|
+
errorCases: z.ZodNumber;
|
|
399
|
+
totalScore: z.ZodNumber;
|
|
400
|
+
normalizedScore: z.ZodNumber;
|
|
401
|
+
maxScore: z.ZodNumber;
|
|
402
|
+
}, z.core.$strip>;
|
|
403
|
+
export declare const EvalRunResultSchema: z.ZodObject<{
|
|
404
|
+
runId: z.ZodString;
|
|
405
|
+
startedAt: z.ZodString;
|
|
406
|
+
finishedAt: z.ZodString;
|
|
407
|
+
suiteId: z.ZodString;
|
|
408
|
+
phase: z.ZodEnum<{
|
|
409
|
+
phase1: "phase1";
|
|
410
|
+
phase2: "phase2";
|
|
411
|
+
phase3: "phase3";
|
|
412
|
+
phase4: "phase4";
|
|
413
|
+
}>;
|
|
414
|
+
summary: z.ZodObject<{
|
|
415
|
+
totalCases: z.ZodNumber;
|
|
416
|
+
passedCases: z.ZodNumber;
|
|
417
|
+
failedCases: z.ZodNumber;
|
|
418
|
+
errorCases: z.ZodNumber;
|
|
419
|
+
totalScore: z.ZodNumber;
|
|
420
|
+
normalizedScore: z.ZodNumber;
|
|
421
|
+
maxScore: z.ZodNumber;
|
|
422
|
+
}, z.core.$strip>;
|
|
423
|
+
caseResults: z.ZodArray<z.ZodObject<{
|
|
424
|
+
caseId: z.ZodString;
|
|
425
|
+
status: z.ZodEnum<{
|
|
426
|
+
error: "error";
|
|
427
|
+
failed: "failed";
|
|
428
|
+
passed: "passed";
|
|
429
|
+
}>;
|
|
430
|
+
score: z.ZodNumber;
|
|
431
|
+
normalizedScore: z.ZodNumber;
|
|
432
|
+
maxScore: z.ZodNumber;
|
|
433
|
+
durationMs: z.ZodNumber;
|
|
434
|
+
artifacts: z.ZodObject<{
|
|
435
|
+
renderedPrompt: z.ZodOptional<z.ZodString>;
|
|
436
|
+
agentMetadata: z.ZodOptional<z.ZodObject<{
|
|
437
|
+
agent: z.ZodString;
|
|
438
|
+
description: z.ZodOptional<z.ZodString>;
|
|
439
|
+
sourceKind: z.ZodEnum<{
|
|
440
|
+
default: "default";
|
|
441
|
+
composer: "composer";
|
|
442
|
+
}>;
|
|
443
|
+
}, z.core.$strip>>;
|
|
444
|
+
toolPolicy: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodBoolean>>;
|
|
445
|
+
promptLength: z.ZodOptional<z.ZodNumber>;
|
|
446
|
+
modelOutput: z.ZodOptional<z.ZodString>;
|
|
447
|
+
judgeOutput: z.ZodOptional<z.ZodString>;
|
|
448
|
+
trace: z.ZodOptional<z.ZodUnknown>;
|
|
449
|
+
tokens: z.ZodOptional<z.ZodNumber>;
|
|
450
|
+
cost: z.ZodOptional<z.ZodNumber>;
|
|
451
|
+
baselineDelta: z.ZodOptional<z.ZodUnknown>;
|
|
452
|
+
}, z.core.$strip>;
|
|
453
|
+
assertionResults: z.ZodArray<z.ZodObject<{
|
|
454
|
+
evaluatorKind: z.ZodEnum<{
|
|
455
|
+
"contains-all": "contains-all";
|
|
456
|
+
"contains-any": "contains-any";
|
|
457
|
+
"excludes-all": "excludes-all";
|
|
458
|
+
"section-contains-all": "section-contains-all";
|
|
459
|
+
"ordered-contains": "ordered-contains";
|
|
460
|
+
"xml-sections-present": "xml-sections-present";
|
|
461
|
+
"tool-policy": "tool-policy";
|
|
462
|
+
"min-length": "min-length";
|
|
463
|
+
"llm-judge": "llm-judge";
|
|
464
|
+
"baseline-diff": "baseline-diff";
|
|
465
|
+
"trajectory-assertion": "trajectory-assertion";
|
|
466
|
+
}>;
|
|
467
|
+
passed: z.ZodBoolean;
|
|
468
|
+
score: z.ZodNumber;
|
|
469
|
+
maxScore: z.ZodNumber;
|
|
470
|
+
message: z.ZodString;
|
|
471
|
+
}, z.core.$strip>>;
|
|
472
|
+
errors: z.ZodArray<z.ZodString>;
|
|
473
|
+
}, z.core.$strip>>;
|
|
474
|
+
}, z.core.$strip>;
|
|
475
|
+
export declare function formatSchemaIssues(filePath: string, issues: z.ZodIssue[]): string;
|
|
476
|
+
export declare const AllowedEvalTargetKinds: ("builtin-agent-prompt" | "custom-agent-prompt" | "single-turn-agent" | "trajectory-agent")[];
|
|
477
|
+
export declare const AllowedExecutorKinds: ("prompt-render" | "model-response" | "trajectory-run")[];
|
|
478
|
+
export declare const AllowedEvaluatorKinds: ("contains-all" | "contains-any" | "excludes-all" | "section-contains-all" | "ordered-contains" | "xml-sections-present" | "tool-policy" | "min-length" | "llm-judge" | "baseline-diff" | "trajectory-assertion")[];
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { EvalRunResult } from "./types";
|
|
2
|
+
export declare const EVALS_DIR = ".weave/evals";
|
|
3
|
+
export declare const EVAL_RUNS_DIR = ".weave/evals/runs";
|
|
4
|
+
export declare const EVAL_LATEST_FILE = ".weave/evals/latest.json";
|
|
5
|
+
export declare function ensureEvalStorageDir(directory: string): string;
|
|
6
|
+
export declare function getDefaultEvalRunPath(directory: string, runId: string): string;
|
|
7
|
+
export declare function writeEvalRunResult(directory: string, result: EvalRunResult, outputPath?: string): string;
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
import type { WeaveAgentName } from "../../agents/types";
|
|
2
|
+
export declare const EVAL_PHASES: readonly ["phase1", "phase2", "phase3", "phase4"];
|
|
3
|
+
export type EvalPhase = (typeof EVAL_PHASES)[number];
|
|
4
|
+
export declare const EVAL_TARGET_KINDS: readonly ["builtin-agent-prompt", "custom-agent-prompt", "single-turn-agent", "trajectory-agent"];
|
|
5
|
+
export type EvalTargetKind = (typeof EVAL_TARGET_KINDS)[number];
|
|
6
|
+
export declare const EXECUTOR_KINDS: readonly ["prompt-render", "model-response", "trajectory-run"];
|
|
7
|
+
export type ExecutorKind = (typeof EXECUTOR_KINDS)[number];
|
|
8
|
+
export declare const EVALUATOR_KINDS: readonly ["contains-all", "contains-any", "excludes-all", "section-contains-all", "ordered-contains", "xml-sections-present", "tool-policy", "min-length", "llm-judge", "baseline-diff", "trajectory-assertion"];
|
|
9
|
+
export type EvaluatorKind = (typeof EVALUATOR_KINDS)[number];
|
|
10
|
+
export type BuiltinEvalAgentName = Exclude<WeaveAgentName, "shuttle">;
|
|
11
|
+
export interface BuiltinAgentPromptVariant {
|
|
12
|
+
disabledAgents?: string[];
|
|
13
|
+
}
|
|
14
|
+
export interface BuiltinAgentPromptTarget {
|
|
15
|
+
kind: "builtin-agent-prompt";
|
|
16
|
+
agent: BuiltinEvalAgentName;
|
|
17
|
+
variant?: BuiltinAgentPromptVariant;
|
|
18
|
+
}
|
|
19
|
+
export interface CustomAgentPromptTarget {
|
|
20
|
+
kind: "custom-agent-prompt";
|
|
21
|
+
agentId: string;
|
|
22
|
+
}
|
|
23
|
+
export interface SingleTurnAgentTarget {
|
|
24
|
+
kind: "single-turn-agent";
|
|
25
|
+
agent: string;
|
|
26
|
+
input?: string;
|
|
27
|
+
}
|
|
28
|
+
export interface TrajectoryAgentTarget {
|
|
29
|
+
kind: "trajectory-agent";
|
|
30
|
+
agent: string;
|
|
31
|
+
scenarioRef?: string;
|
|
32
|
+
}
|
|
33
|
+
export type EvalTarget = BuiltinAgentPromptTarget | CustomAgentPromptTarget | SingleTurnAgentTarget | TrajectoryAgentTarget;
|
|
34
|
+
export interface PromptRenderExecutor {
|
|
35
|
+
kind: "prompt-render";
|
|
36
|
+
}
|
|
37
|
+
export interface ModelResponseExecutor {
|
|
38
|
+
kind: "model-response";
|
|
39
|
+
provider: string;
|
|
40
|
+
model: string;
|
|
41
|
+
input: string;
|
|
42
|
+
}
|
|
43
|
+
export interface TrajectoryRunExecutor {
|
|
44
|
+
kind: "trajectory-run";
|
|
45
|
+
scenarioRef: string;
|
|
46
|
+
}
|
|
47
|
+
export type ExecutorSpec = PromptRenderExecutor | ModelResponseExecutor | TrajectoryRunExecutor;
|
|
48
|
+
export interface WeightedEvaluatorSpec {
|
|
49
|
+
weight?: number;
|
|
50
|
+
}
|
|
51
|
+
export interface ContainsAllEvaluator extends WeightedEvaluatorSpec {
|
|
52
|
+
kind: "contains-all";
|
|
53
|
+
patterns: string[];
|
|
54
|
+
}
|
|
55
|
+
export interface ContainsAnyEvaluator extends WeightedEvaluatorSpec {
|
|
56
|
+
kind: "contains-any";
|
|
57
|
+
patterns: string[];
|
|
58
|
+
}
|
|
59
|
+
export interface ExcludesAllEvaluator extends WeightedEvaluatorSpec {
|
|
60
|
+
kind: "excludes-all";
|
|
61
|
+
patterns: string[];
|
|
62
|
+
}
|
|
63
|
+
export interface SectionContainsAllEvaluator extends WeightedEvaluatorSpec {
|
|
64
|
+
kind: "section-contains-all";
|
|
65
|
+
section: string;
|
|
66
|
+
patterns: string[];
|
|
67
|
+
}
|
|
68
|
+
export interface OrderedContainsEvaluator extends WeightedEvaluatorSpec {
|
|
69
|
+
kind: "ordered-contains";
|
|
70
|
+
patterns: string[];
|
|
71
|
+
}
|
|
72
|
+
export interface XmlSectionsPresentEvaluator extends WeightedEvaluatorSpec {
|
|
73
|
+
kind: "xml-sections-present";
|
|
74
|
+
sections: string[];
|
|
75
|
+
}
|
|
76
|
+
export interface ToolPolicyEvaluator extends WeightedEvaluatorSpec {
|
|
77
|
+
kind: "tool-policy";
|
|
78
|
+
expectations: Record<string, boolean>;
|
|
79
|
+
}
|
|
80
|
+
export interface MinLengthEvaluator extends WeightedEvaluatorSpec {
|
|
81
|
+
kind: "min-length";
|
|
82
|
+
min: number;
|
|
83
|
+
}
|
|
84
|
+
export interface LlmJudgeEvaluator extends WeightedEvaluatorSpec {
|
|
85
|
+
kind: "llm-judge";
|
|
86
|
+
rubricRef?: string;
|
|
87
|
+
expectedContains?: string[];
|
|
88
|
+
forbiddenContains?: string[];
|
|
89
|
+
}
|
|
90
|
+
export interface BaselineDiffEvaluator extends WeightedEvaluatorSpec {
|
|
91
|
+
kind: "baseline-diff";
|
|
92
|
+
baselineRef?: string;
|
|
93
|
+
}
|
|
94
|
+
export interface TrajectoryAssertionEvaluator extends WeightedEvaluatorSpec {
|
|
95
|
+
kind: "trajectory-assertion";
|
|
96
|
+
assertionRef?: string;
|
|
97
|
+
}
|
|
98
|
+
export type EvaluatorSpec = ContainsAllEvaluator | ContainsAnyEvaluator | ExcludesAllEvaluator | SectionContainsAllEvaluator | OrderedContainsEvaluator | XmlSectionsPresentEvaluator | ToolPolicyEvaluator | MinLengthEvaluator | LlmJudgeEvaluator | BaselineDiffEvaluator | TrajectoryAssertionEvaluator;
|
|
99
|
+
export interface EvalSuiteManifest {
|
|
100
|
+
id: string;
|
|
101
|
+
title: string;
|
|
102
|
+
phase: EvalPhase;
|
|
103
|
+
caseFiles: string[];
|
|
104
|
+
tags?: string[];
|
|
105
|
+
}
|
|
106
|
+
export interface EvalCase {
|
|
107
|
+
id: string;
|
|
108
|
+
title: string;
|
|
109
|
+
phase: EvalPhase;
|
|
110
|
+
target: EvalTarget;
|
|
111
|
+
executor: ExecutorSpec;
|
|
112
|
+
evaluators: EvaluatorSpec[];
|
|
113
|
+
tags?: string[];
|
|
114
|
+
notes?: string;
|
|
115
|
+
}
|
|
116
|
+
export interface LoadedEvalSuiteManifest extends EvalSuiteManifest {
|
|
117
|
+
filePath: string;
|
|
118
|
+
}
|
|
119
|
+
export interface LoadedEvalCase extends EvalCase {
|
|
120
|
+
filePath: string;
|
|
121
|
+
}
|
|
122
|
+
export interface AgentPromptMetadataArtifact {
|
|
123
|
+
agent: string;
|
|
124
|
+
description?: string;
|
|
125
|
+
sourceKind: "composer" | "default";
|
|
126
|
+
}
|
|
127
|
+
export interface EvalArtifacts {
|
|
128
|
+
renderedPrompt?: string;
|
|
129
|
+
agentMetadata?: AgentPromptMetadataArtifact;
|
|
130
|
+
toolPolicy?: Record<string, boolean>;
|
|
131
|
+
promptLength?: number;
|
|
132
|
+
modelOutput?: string;
|
|
133
|
+
judgeOutput?: string;
|
|
134
|
+
trace?: unknown;
|
|
135
|
+
tokens?: number;
|
|
136
|
+
cost?: number;
|
|
137
|
+
baselineDelta?: unknown;
|
|
138
|
+
}
|
|
139
|
+
export interface AssertionResult {
|
|
140
|
+
evaluatorKind: EvaluatorKind;
|
|
141
|
+
passed: boolean;
|
|
142
|
+
score: number;
|
|
143
|
+
maxScore: number;
|
|
144
|
+
message: string;
|
|
145
|
+
}
|
|
146
|
+
export interface EvalCaseResult {
|
|
147
|
+
caseId: string;
|
|
148
|
+
status: "passed" | "failed" | "error";
|
|
149
|
+
score: number;
|
|
150
|
+
normalizedScore: number;
|
|
151
|
+
maxScore: number;
|
|
152
|
+
durationMs: number;
|
|
153
|
+
artifacts: EvalArtifacts;
|
|
154
|
+
assertionResults: AssertionResult[];
|
|
155
|
+
errors: string[];
|
|
156
|
+
}
|
|
157
|
+
export interface EvalRunSummary {
|
|
158
|
+
totalCases: number;
|
|
159
|
+
passedCases: number;
|
|
160
|
+
failedCases: number;
|
|
161
|
+
errorCases: number;
|
|
162
|
+
totalScore: number;
|
|
163
|
+
normalizedScore: number;
|
|
164
|
+
maxScore: number;
|
|
165
|
+
}
|
|
166
|
+
export interface EvalRunResult {
|
|
167
|
+
runId: string;
|
|
168
|
+
startedAt: string;
|
|
169
|
+
finishedAt: string;
|
|
170
|
+
suiteId: string;
|
|
171
|
+
phase: EvalPhase;
|
|
172
|
+
summary: EvalRunSummary;
|
|
173
|
+
caseResults: EvalCaseResult[];
|
|
174
|
+
}
|
|
175
|
+
export interface ResolvedTarget {
|
|
176
|
+
target: EvalTarget;
|
|
177
|
+
artifacts: EvalArtifacts;
|
|
178
|
+
}
|
|
179
|
+
export interface ExecutionContext {
|
|
180
|
+
mode: "local" | "ci" | "hosted";
|
|
181
|
+
directory: string;
|
|
182
|
+
outputPath?: string;
|
|
183
|
+
}
|
|
184
|
+
export interface RunnerFilters {
|
|
185
|
+
caseIds?: string[];
|
|
186
|
+
agents?: string[];
|
|
187
|
+
tags?: string[];
|
|
188
|
+
}
|
|
189
|
+
export interface RunEvalSuiteOptions {
|
|
190
|
+
directory: string;
|
|
191
|
+
suite: string;
|
|
192
|
+
filters?: RunnerFilters;
|
|
193
|
+
outputPath?: string;
|
|
194
|
+
mode?: ExecutionContext["mode"];
|
|
195
|
+
}
|
|
196
|
+
export interface EvalLoadErrorContext {
|
|
197
|
+
filePath: string;
|
|
198
|
+
detail: string;
|
|
199
|
+
}
|
|
200
|
+
export interface DeterministicBaselineCase {
|
|
201
|
+
caseId: string;
|
|
202
|
+
status: EvalCaseResult["status"];
|
|
203
|
+
normalizedScore: number;
|
|
204
|
+
assertionPassed: number;
|
|
205
|
+
assertionFailed: number;
|
|
206
|
+
errorCount: number;
|
|
207
|
+
}
|
|
208
|
+
export interface DeterministicBaseline {
|
|
209
|
+
version: 1;
|
|
210
|
+
suiteId: string;
|
|
211
|
+
phase: EvalPhase;
|
|
212
|
+
generatedAt: string;
|
|
213
|
+
normalizedScore: number;
|
|
214
|
+
cases: DeterministicBaselineCase[];
|
|
215
|
+
}
|
|
216
|
+
export interface BaselineComparisonOptions {
|
|
217
|
+
scoreDropTolerance?: number;
|
|
218
|
+
}
|
|
219
|
+
export interface BaselineComparison {
|
|
220
|
+
outcome: "no-regression" | "informational-diff" | "regression";
|
|
221
|
+
regressions: string[];
|
|
222
|
+
informational: string[];
|
|
223
|
+
}
|
package/dist/index.js
CHANGED
|
@@ -22,7 +22,8 @@ var AgentOverrideConfigSchema = z.object({
|
|
|
22
22
|
tools: z.record(z.string(), z.boolean()).optional(),
|
|
23
23
|
disable: z.boolean().optional(),
|
|
24
24
|
mode: z.enum(["subagent", "primary", "all"]).optional(),
|
|
25
|
-
maxTokens: z.number().optional()
|
|
25
|
+
maxTokens: z.number().optional(),
|
|
26
|
+
display_name: z.string().optional()
|
|
26
27
|
});
|
|
27
28
|
var AgentOverridesSchema = z.record(z.string(), AgentOverrideConfigSchema);
|
|
28
29
|
var CategoryConfigSchema = z.object({
|
|
@@ -223,6 +224,7 @@ var AGENT_DISPLAY_NAMES = {
|
|
|
223
224
|
weft: "weft"
|
|
224
225
|
};
|
|
225
226
|
var BUILTIN_CONFIG_KEYS = new Set(Object.keys(AGENT_DISPLAY_NAMES));
|
|
227
|
+
var INITIAL_BUILTIN_DISPLAY_NAMES = new Map(Object.entries(AGENT_DISPLAY_NAMES));
|
|
226
228
|
var reverseDisplayNames = null;
|
|
227
229
|
function getReverseDisplayNames() {
|
|
228
230
|
if (reverseDisplayNames === null) {
|
|
@@ -234,6 +236,11 @@ function registerAgentDisplayName(configKey, displayName) {
|
|
|
234
236
|
if (BUILTIN_CONFIG_KEYS.has(configKey)) {
|
|
235
237
|
throw new Error(`Cannot register display name for "${configKey}": it is a built-in agent name`);
|
|
236
238
|
}
|
|
239
|
+
for (const [builtinKey, initialDisplayName] of INITIAL_BUILTIN_DISPLAY_NAMES) {
|
|
240
|
+
if (initialDisplayName.toLowerCase() === displayName.toLowerCase()) {
|
|
241
|
+
throw new Error(`Display name "${displayName}" is reserved for built-in agent "${builtinKey}"`);
|
|
242
|
+
}
|
|
243
|
+
}
|
|
237
244
|
const reverse = getReverseDisplayNames();
|
|
238
245
|
const existingKey = reverse[displayName.toLowerCase()];
|
|
239
246
|
if (existingKey !== undefined && BUILTIN_CONFIG_KEYS.has(existingKey)) {
|
|
@@ -242,6 +249,13 @@ function registerAgentDisplayName(configKey, displayName) {
|
|
|
242
249
|
AGENT_DISPLAY_NAMES[configKey] = displayName;
|
|
243
250
|
reverseDisplayNames = null;
|
|
244
251
|
}
|
|
252
|
+
function updateBuiltinDisplayName(configKey, displayName) {
|
|
253
|
+
if (!BUILTIN_CONFIG_KEYS.has(configKey)) {
|
|
254
|
+
throw new Error(`Cannot update builtin display name for "${configKey}": not a built-in agent`);
|
|
255
|
+
}
|
|
256
|
+
AGENT_DISPLAY_NAMES[configKey] = displayName;
|
|
257
|
+
reverseDisplayNames = null;
|
|
258
|
+
}
|
|
245
259
|
function getAgentDisplayName(configKey) {
|
|
246
260
|
const exactMatch = AGENT_DISPLAY_NAMES[configKey];
|
|
247
261
|
if (exactMatch !== undefined)
|
|
@@ -688,7 +702,12 @@ FORMAT RULES:
|
|
|
688
702
|
- Max 5 visible todos at any time
|
|
689
703
|
- in_progress = yellow highlight — use for ACTIVE work only
|
|
690
704
|
- Prefix delegations with agent name
|
|
691
|
-
|
|
705
|
+
|
|
706
|
+
BEFORE FINISHING (MANDATORY):
|
|
707
|
+
- ALWAYS issue a final todowrite before your last response
|
|
708
|
+
- Mark ALL in_progress items → "completed" (or "cancelled")
|
|
709
|
+
- Never leave in_progress items when done
|
|
710
|
+
- This is NON-NEGOTIABLE — skipping it breaks the UI
|
|
692
711
|
</SidebarTodos>`;
|
|
693
712
|
}
|
|
694
713
|
function buildDelegationSection(disabled) {
|
|
@@ -979,6 +998,12 @@ FORMAT RULES:
|
|
|
979
998
|
- Summary todo always present during execution
|
|
980
999
|
- Max 5 visible todos (1 summary + 1 in_progress + 2-3 pending)
|
|
981
1000
|
- in_progress = yellow highlight — use for CURRENT task only
|
|
1001
|
+
|
|
1002
|
+
BEFORE FINISHING (MANDATORY):
|
|
1003
|
+
- ALWAYS issue a final todowrite before your last response
|
|
1004
|
+
- Mark ALL in_progress items → "completed" (or "cancelled")
|
|
1005
|
+
- Never leave in_progress items when done
|
|
1006
|
+
- This is NON-NEGOTIABLE — skipping it breaks the UI
|
|
982
1007
|
</SidebarTodos>`;
|
|
983
1008
|
}
|
|
984
1009
|
function buildTapestryPlanExecutionSection(disabled = new Set) {
|
|
@@ -1714,12 +1739,19 @@ var AGENT_NAME_VARIANTS = {
|
|
|
1714
1739
|
loom: ["loom", "Loom"],
|
|
1715
1740
|
tapestry: ["tapestry", "Tapestry"]
|
|
1716
1741
|
};
|
|
1742
|
+
var INITIAL_NAME_VARIANTS = new Map(Object.entries(AGENT_NAME_VARIANTS).map(([k, v]) => [k, [...v]]));
|
|
1717
1743
|
function registerAgentNameVariants(name, variants) {
|
|
1718
1744
|
if (AGENT_NAME_VARIANTS[name])
|
|
1719
1745
|
return;
|
|
1720
1746
|
const titleCase = name.charAt(0).toUpperCase() + name.slice(1);
|
|
1721
1747
|
AGENT_NAME_VARIANTS[name] = variants ?? [name, titleCase];
|
|
1722
1748
|
}
|
|
1749
|
+
function addBuiltinNameVariant(configKey, variant) {
|
|
1750
|
+
const existing = AGENT_NAME_VARIANTS[configKey];
|
|
1751
|
+
if (existing && !existing.includes(variant)) {
|
|
1752
|
+
existing.push(variant);
|
|
1753
|
+
}
|
|
1754
|
+
}
|
|
1723
1755
|
function stripDisabledAgentReferences(prompt, disabled) {
|
|
1724
1756
|
if (disabled.size === 0)
|
|
1725
1757
|
return prompt;
|
|
@@ -1732,7 +1764,7 @@ function stripDisabledAgentReferences(prompt, disabled) {
|
|
|
1732
1764
|
}
|
|
1733
1765
|
if (disabledVariants.length === 0)
|
|
1734
1766
|
return prompt;
|
|
1735
|
-
const pattern = new RegExp(
|
|
1767
|
+
const pattern = new RegExp(`(?<!\\w)(${disabledVariants.map((v) => v.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")).join("|")})(?!\\w)`);
|
|
1736
1768
|
const lines = prompt.split(`
|
|
1737
1769
|
`);
|
|
1738
1770
|
const filtered = lines.filter((line) => !pattern.test(line));
|
|
@@ -1977,6 +2009,26 @@ function createManagers(options) {
|
|
|
1977
2009
|
fingerprint,
|
|
1978
2010
|
customAgentMetadata
|
|
1979
2011
|
});
|
|
2012
|
+
if (pluginConfig.agents) {
|
|
2013
|
+
for (const [name, override] of Object.entries(pluginConfig.agents)) {
|
|
2014
|
+
const displayName = override.display_name?.trim();
|
|
2015
|
+
if (displayName) {
|
|
2016
|
+
try {
|
|
2017
|
+
updateBuiltinDisplayName(name, displayName);
|
|
2018
|
+
addBuiltinNameVariant(name, displayName);
|
|
2019
|
+
if (agents[name]) {
|
|
2020
|
+
agents[name] = { ...agents[name], description: displayName };
|
|
2021
|
+
}
|
|
2022
|
+
} catch (err) {
|
|
2023
|
+
if (err instanceof Error && err.message.includes("not a built-in agent")) {
|
|
2024
|
+
log(`Skipping display_name override for non-builtin agent "${name}"`);
|
|
2025
|
+
} else {
|
|
2026
|
+
throw err;
|
|
2027
|
+
}
|
|
2028
|
+
}
|
|
2029
|
+
}
|
|
2030
|
+
}
|
|
2031
|
+
}
|
|
1980
2032
|
if (pluginConfig.custom_agents) {
|
|
1981
2033
|
const disabledSet = new Set(pluginConfig.disabled_agents ?? []);
|
|
1982
2034
|
for (const [name, customConfig] of Object.entries(pluginConfig.custom_agents)) {
|
|
@@ -4705,10 +4757,12 @@ function generateMetricsReport(directory, state) {
|
|
|
4705
4757
|
}
|
|
4706
4758
|
|
|
4707
4759
|
// src/plugin/plugin-interface.ts
|
|
4760
|
+
var FINALIZE_TODOS_MARKER = "<!-- weave:finalize-todos -->";
|
|
4708
4761
|
function createPluginInterface(args) {
|
|
4709
4762
|
const { pluginConfig, hooks, tools, configHandler, agents, client, directory = "", tracker } = args;
|
|
4710
4763
|
const lastAssistantMessageText = new Map;
|
|
4711
4764
|
const lastUserMessageText = new Map;
|
|
4765
|
+
const todoFinalizedSessions = new Set;
|
|
4712
4766
|
return {
|
|
4713
4767
|
tool: tools,
|
|
4714
4768
|
config: async (config) => {
|
|
@@ -4791,6 +4845,9 @@ ${result.contextInjection}`;
|
|
|
4791
4845
|
`).trim() ?? "";
|
|
4792
4846
|
if (userText && sessionID) {
|
|
4793
4847
|
lastUserMessageText.set(sessionID, userText);
|
|
4848
|
+
if (!userText.includes(FINALIZE_TODOS_MARKER)) {
|
|
4849
|
+
todoFinalizedSessions.delete(sessionID);
|
|
4850
|
+
}
|
|
4794
4851
|
}
|
|
4795
4852
|
}
|
|
4796
4853
|
if (hooks.workflowCommand) {
|
|
@@ -4825,11 +4882,12 @@ ${cmdResult.contextInjection}`;
|
|
|
4825
4882
|
const isStartWork = promptText.includes("<session-context>");
|
|
4826
4883
|
const isContinuation = promptText.includes(CONTINUATION_MARKER);
|
|
4827
4884
|
const isWorkflowContinuation = promptText.includes(WORKFLOW_CONTINUATION_MARKER);
|
|
4885
|
+
const isTodoFinalize = promptText.includes(FINALIZE_TODOS_MARKER);
|
|
4828
4886
|
const isActiveWorkflow = (() => {
|
|
4829
4887
|
const wf = getActiveWorkflowInstance(directory);
|
|
4830
4888
|
return wf != null && wf.status === "running";
|
|
4831
4889
|
})();
|
|
4832
|
-
if (!isStartWork && !isContinuation && !isWorkflowContinuation && !isActiveWorkflow) {
|
|
4890
|
+
if (!isStartWork && !isContinuation && !isWorkflowContinuation && !isTodoFinalize && !isActiveWorkflow) {
|
|
4833
4891
|
const state = readWorkState(directory);
|
|
4834
4892
|
if (state && !state.paused) {
|
|
4835
4893
|
pauseWork(directory);
|
|
@@ -4866,6 +4924,7 @@ ${cmdResult.contextInjection}`;
|
|
|
4866
4924
|
if (event.type === "session.deleted") {
|
|
4867
4925
|
const evt = event;
|
|
4868
4926
|
clearSession2(evt.properties.info.id);
|
|
4927
|
+
todoFinalizedSessions.delete(evt.properties.info.id);
|
|
4869
4928
|
if (tracker && hooks.analyticsEnabled) {
|
|
4870
4929
|
try {
|
|
4871
4930
|
tracker.endSession(evt.properties.info.id);
|
|
@@ -4953,6 +5012,7 @@ ${cmdResult.contextInjection}`;
|
|
|
4953
5012
|
lastAssistantMessageText.set(part.sessionID, part.text);
|
|
4954
5013
|
}
|
|
4955
5014
|
}
|
|
5015
|
+
let continuationFired = false;
|
|
4956
5016
|
if (hooks.workflowContinuation && event.type === "session.idle") {
|
|
4957
5017
|
const evt = event;
|
|
4958
5018
|
const sessionId = evt.properties?.sessionID ?? "";
|
|
@@ -4997,6 +5057,7 @@ ${cmdResult.contextInjection}`;
|
|
|
4997
5057
|
}
|
|
4998
5058
|
});
|
|
4999
5059
|
log("[work-continuation] Injected continuation prompt", { sessionId });
|
|
5060
|
+
continuationFired = true;
|
|
5000
5061
|
} catch (err) {
|
|
5001
5062
|
log("[work-continuation] Failed to inject continuation", { sessionId, error: String(err) });
|
|
5002
5063
|
}
|
|
@@ -5005,6 +5066,43 @@ ${cmdResult.contextInjection}`;
|
|
|
5005
5066
|
}
|
|
5006
5067
|
}
|
|
5007
5068
|
}
|
|
5069
|
+
if (event.type === "session.idle" && client && !continuationFired) {
|
|
5070
|
+
const evt = event;
|
|
5071
|
+
const sessionId = evt.properties?.sessionID ?? "";
|
|
5072
|
+
if (sessionId && !todoFinalizedSessions.has(sessionId)) {
|
|
5073
|
+
try {
|
|
5074
|
+
const todosResponse = await client.session.todo({ path: { id: sessionId } });
|
|
5075
|
+
const todos = todosResponse.data ?? [];
|
|
5076
|
+
const hasInProgress = todos.some((t) => t.status === "in_progress");
|
|
5077
|
+
if (hasInProgress) {
|
|
5078
|
+
todoFinalizedSessions.add(sessionId);
|
|
5079
|
+
const inProgressItems = todos.filter((t) => t.status === "in_progress").map((t) => ` - "${t.content}"`).join(`
|
|
5080
|
+
`);
|
|
5081
|
+
await client.session.promptAsync({
|
|
5082
|
+
path: { id: sessionId },
|
|
5083
|
+
body: {
|
|
5084
|
+
parts: [
|
|
5085
|
+
{
|
|
5086
|
+
type: "text",
|
|
5087
|
+
text: `${FINALIZE_TODOS_MARKER}
|
|
5088
|
+
You have finished your work but left these todos as in_progress:
|
|
5089
|
+
${inProgressItems}
|
|
5090
|
+
|
|
5091
|
+
Use todowrite NOW to mark all of them as "completed" (or "cancelled" if abandoned). Do not do any other work — just update the todos and stop.`
|
|
5092
|
+
}
|
|
5093
|
+
]
|
|
5094
|
+
}
|
|
5095
|
+
});
|
|
5096
|
+
log("[todo-finalize] Injected finalize prompt for in_progress todos", {
|
|
5097
|
+
sessionId,
|
|
5098
|
+
count: todos.filter((t) => t.status === "in_progress").length
|
|
5099
|
+
});
|
|
5100
|
+
}
|
|
5101
|
+
} catch (err) {
|
|
5102
|
+
log("[todo-finalize] Failed to check/finalize todos (non-fatal)", { sessionId, error: String(err) });
|
|
5103
|
+
}
|
|
5104
|
+
}
|
|
5105
|
+
}
|
|
5008
5106
|
},
|
|
5009
5107
|
"tool.execute.before": async (input, _output) => {
|
|
5010
5108
|
const toolArgs = _output.args;
|
|
@@ -10,6 +10,11 @@
|
|
|
10
10
|
* registerAgentDisplayName().
|
|
11
11
|
*/
|
|
12
12
|
export declare const AGENT_DISPLAY_NAMES: Record<string, string>;
|
|
13
|
+
/**
|
|
14
|
+
* Reset the mutable display name map to its initial state.
|
|
15
|
+
* Used by tests to prevent cross-test state pollution.
|
|
16
|
+
*/
|
|
17
|
+
export declare function resetDisplayNames(): void;
|
|
13
18
|
/**
|
|
14
19
|
* Register a display name for an agent config key.
|
|
15
20
|
* Custom agents call this so getAgentDisplayName/getAgentConfigKey work for them.
|
|
@@ -18,6 +23,15 @@ export declare const AGENT_DISPLAY_NAMES: Record<string, string>;
|
|
|
18
23
|
* or if the config key is a built-in agent name.
|
|
19
24
|
*/
|
|
20
25
|
export declare function registerAgentDisplayName(configKey: string, displayName: string): void;
|
|
26
|
+
/**
|
|
27
|
+
* Override the display name for a built-in agent.
|
|
28
|
+
* Unlike registerAgentDisplayName (which guards against builtin config keys),
|
|
29
|
+
* this function is specifically for user-configured builtin display names.
|
|
30
|
+
*
|
|
31
|
+
* Only accepts known builtin config keys. Throws for unknown keys.
|
|
32
|
+
* Invalidates the reverse lookup cache so getAgentConfigKey reflects the new name.
|
|
33
|
+
*/
|
|
34
|
+
export declare function updateBuiltinDisplayName(configKey: string, displayName: string): void;
|
|
21
35
|
/**
|
|
22
36
|
* Get display name for an agent config key.
|
|
23
37
|
* Uses case-insensitive lookup for flexibility.
|
package/dist/shared/index.d.ts
CHANGED
|
@@ -2,4 +2,4 @@ export type { DeepPartial, Brand } from "./types";
|
|
|
2
2
|
export { getWeaveVersion } from "./version";
|
|
3
3
|
export { log, getLogFilePath, logDelegation } from "./log";
|
|
4
4
|
export type { DelegationEvent } from "./log";
|
|
5
|
-
export { AGENT_DISPLAY_NAMES, getAgentDisplayName, getAgentConfigKey, registerAgentDisplayName, } from "./agent-display-names";
|
|
5
|
+
export { AGENT_DISPLAY_NAMES, getAgentDisplayName, getAgentConfigKey, registerAgentDisplayName, updateBuiltinDisplayName, resetDisplayNames, } from "./agent-display-names";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@opencode_weave/weave",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.1",
|
|
4
4
|
"description": "Weave — lean OpenCode plugin with multi-agent orchestration",
|
|
5
5
|
"author": "Weave",
|
|
6
6
|
"license": "MIT",
|
|
@@ -25,7 +25,10 @@
|
|
|
25
25
|
"build": "bun run script/build.ts && tsc --emitDeclarationOnly",
|
|
26
26
|
"clean": "rm -rf dist",
|
|
27
27
|
"typecheck": "tsc --noEmit",
|
|
28
|
-
"test": "bun test"
|
|
28
|
+
"test": "bun test",
|
|
29
|
+
"eval": "bun run script/eval.ts",
|
|
30
|
+
"eval:smoke": "bun run script/eval.ts --suite pr-smoke",
|
|
31
|
+
"eval:coverage": "bun run script/verify-eval-coverage.ts"
|
|
29
32
|
},
|
|
30
33
|
"keywords": [
|
|
31
34
|
"opencode",
|