@mhingston5/lasso 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +707 -0
- package/docs/agent-wrangling.png +0 -0
- package/package.json +26 -0
- package/src/capabilities/matcher.ts +25 -0
- package/src/capabilities/registry.ts +103 -0
- package/src/capabilities/types.ts +15 -0
- package/src/cir/lower.ts +253 -0
- package/src/cir/optimize.ts +251 -0
- package/src/cir/types.ts +131 -0
- package/src/cir/validate.ts +265 -0
- package/src/compiler/compile.ts +601 -0
- package/src/compiler/feedback.ts +471 -0
- package/src/compiler/runtime-helpers.ts +455 -0
- package/src/composition/chain.ts +58 -0
- package/src/composition/conditional.ts +76 -0
- package/src/composition/parallel.ts +75 -0
- package/src/composition/types.ts +105 -0
- package/src/environment/analyzer.ts +56 -0
- package/src/environment/discovery.ts +179 -0
- package/src/environment/types.ts +68 -0
- package/src/failures/classifiers.ts +134 -0
- package/src/failures/generator.ts +421 -0
- package/src/failures/map-reference-failures.ts +23 -0
- package/src/failures/ontology.ts +210 -0
- package/src/failures/recovery.ts +214 -0
- package/src/failures/types.ts +14 -0
- package/src/index.ts +67 -0
- package/src/memory/advisor.ts +132 -0
- package/src/memory/extractor.ts +166 -0
- package/src/memory/store.ts +107 -0
- package/src/memory/types.ts +53 -0
- package/src/metaharness/engine.ts +256 -0
- package/src/metaharness/predictor.ts +168 -0
- package/src/metaharness/types.ts +40 -0
- package/src/mutation/derive.ts +308 -0
- package/src/mutation/diff.ts +52 -0
- package/src/mutation/engine.ts +256 -0
- package/src/mutation/types.ts +84 -0
- package/src/pi/command-input.ts +209 -0
- package/src/pi/commands.ts +351 -0
- package/src/pi/extension.ts +16 -0
- package/src/planner/synthesize.ts +83 -0
- package/src/planner/template-rules.ts +183 -0
- package/src/planner/types.ts +42 -0
- package/src/reference/catalog.ts +128 -0
- package/src/reference/patch-validation-strategies.ts +170 -0
- package/src/reference/patch-validation.ts +174 -0
- package/src/reference/pr-review-merge.ts +155 -0
- package/src/reference/strategies.ts +126 -0
- package/src/reference/types.ts +33 -0
- package/src/replanner/risk-rules.ts +161 -0
- package/src/replanner/runtime.ts +308 -0
- package/src/replanner/synthesize.ts +619 -0
- package/src/replanner/types.ts +73 -0
- package/src/spec/schema.ts +254 -0
- package/src/spec/types.ts +319 -0
- package/src/spec/validate.ts +296 -0
- package/src/state/snapshots.ts +43 -0
- package/src/state/types.ts +12 -0
- package/src/synthesis/graph-builder.ts +267 -0
- package/src/synthesis/harness-builder.ts +113 -0
- package/src/synthesis/intent-ir.ts +63 -0
- package/src/synthesis/policy-builder.ts +320 -0
- package/src/synthesis/risk-analyzer.ts +182 -0
- package/src/synthesis/skill-parser.ts +441 -0
- package/src/verification/engine.ts +230 -0
- package/src/versioning/file-store.ts +103 -0
- package/src/versioning/history.ts +43 -0
- package/src/versioning/store.ts +16 -0
- package/src/versioning/types.ts +31 -0
- package/test/capabilities/matcher.test.ts +67 -0
- package/test/capabilities/registry.test.ts +136 -0
- package/test/capabilities/synthesis.test.ts +264 -0
- package/test/cir/lower.test.ts +417 -0
- package/test/cir/optimize.test.ts +266 -0
- package/test/cir/validate.test.ts +368 -0
- package/test/compiler/adaptive-runtime.test.ts +157 -0
- package/test/compiler/compile.test.ts +1198 -0
- package/test/compiler/feedback.test.ts +784 -0
- package/test/compiler/guardrails.test.ts +191 -0
- package/test/compiler/trace.test.ts +404 -0
- package/test/composition/chain.test.ts +328 -0
- package/test/composition/conditional.test.ts +241 -0
- package/test/composition/parallel.test.ts +215 -0
- package/test/environment/analyzer.test.ts +204 -0
- package/test/environment/discovery.test.ts +149 -0
- package/test/failures/classifiers.test.ts +287 -0
- package/test/failures/generator.test.ts +203 -0
- package/test/failures/ontology.test.ts +439 -0
- package/test/failures/recovery.test.ts +300 -0
- package/test/helpers/createFixtureRepo.ts +84 -0
- package/test/helpers/createPatchValidationFixture.ts +144 -0
- package/test/helpers/runCompiledWorkflow.ts +208 -0
- package/test/memory/advisor.test.ts +332 -0
- package/test/memory/extractor.test.ts +295 -0
- package/test/memory/store.test.ts +244 -0
- package/test/metaharness/engine.test.ts +575 -0
- package/test/metaharness/predictor.test.ts +436 -0
- package/test/mutation/derive-failure.test.ts +209 -0
- package/test/mutation/engine.test.ts +622 -0
- package/test/package-smoke.test.ts +29 -0
- package/test/pi/command-input.test.ts +153 -0
- package/test/pi/commands.test.ts +623 -0
- package/test/planner/classify-template.test.ts +32 -0
- package/test/planner/synthesize.test.ts +901 -0
- package/test/reference/PatchValidation.failures.test.ts +137 -0
- package/test/reference/PatchValidation.test.ts +326 -0
- package/test/reference/PrReviewMerge.failures.test.ts +121 -0
- package/test/reference/PrReviewMerge.test.ts +55 -0
- package/test/reference/catalog-open.test.ts +70 -0
- package/test/replanner/runtime.test.ts +207 -0
- package/test/replanner/synthesize.test.ts +303 -0
- package/test/spec/validate.test.ts +1056 -0
- package/test/state/snapshots.test.ts +264 -0
- package/test/synthesis/custom-workflow.test.ts +264 -0
- package/test/synthesis/graph-builder.test.ts +370 -0
- package/test/synthesis/harness-builder.test.ts +128 -0
- package/test/synthesis/policy-builder.test.ts +149 -0
- package/test/synthesis/risk-analyzer.test.ts +230 -0
- package/test/synthesis/skill-parser.test.ts +796 -0
- package/test/verification/engine.test.ts +509 -0
- package/test/versioning/history.test.ts +144 -0
- package/test/versioning/store.test.ts +254 -0
- package/vitest.config.ts +9 -0
|
@@ -0,0 +1,471 @@
|
|
|
1
|
+
import type { CompiledHarnessWorkflow } from "./compile.js";
|
|
2
|
+
import type { HarnessSpec, TaskNode, LlmNode, ToolNode, HumanNode, RetryPolicy, VerificationPolicy } from "../spec/types.js";
|
|
3
|
+
import type { HarnessMutation, MutationTrigger } from "../mutation/types.js";
|
|
4
|
+
|
|
5
|
+
// ============================================================================
|
|
6
|
+
// Types
|
|
7
|
+
// ============================================================================
|
|
8
|
+
|
|
9
|
+
export interface CostEstimate {
|
|
10
|
+
llmCallCount: number;
|
|
11
|
+
toolCallCount: number;
|
|
12
|
+
humanInteractionCount: number;
|
|
13
|
+
estimatedDurationMs: number;
|
|
14
|
+
estimatedCostUsd: number;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export interface RiskAssessment {
|
|
18
|
+
costRisk: RiskFactor;
|
|
19
|
+
failureRisk: RiskFactor;
|
|
20
|
+
qualityRisk: RiskFactor;
|
|
21
|
+
complexityRisk: RiskFactor;
|
|
22
|
+
overallRisk: "low" | "medium" | "high";
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export interface RiskFactor {
|
|
26
|
+
level: "low" | "medium" | "high";
|
|
27
|
+
factors: string[];
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/** @deprecated Use HarnessMutation with trigger/description instead */
|
|
31
|
+
export interface CompilerSuggestion {
|
|
32
|
+
type: "reduce-llm" | "add-retry" | "merge-nodes" | "simplify" | "add-verification";
|
|
33
|
+
description: string;
|
|
34
|
+
impact: "low" | "medium" | "high";
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export interface CompilerAnalysis {
|
|
38
|
+
cost: CostEstimate;
|
|
39
|
+
risk: RiskAssessment;
|
|
40
|
+
/** @deprecated Use mutations instead */
|
|
41
|
+
suggestions: CompilerSuggestion[];
|
|
42
|
+
mutations: HarnessMutation[];
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// ============================================================================
|
|
46
|
+
// Pricing constants
|
|
47
|
+
// ============================================================================
|
|
48
|
+
|
|
49
|
+
const LLM_COST_PER_CALL_USD = 0.01;
|
|
50
|
+
const TOOL_DURATION_MS = 500;
|
|
51
|
+
const LLM_DURATION_MS = 2000;
|
|
52
|
+
const HUMAN_DURATION_MS = 300000; // 5 minutes average wait
|
|
53
|
+
const COMPLEXITY_NODE_THRESHOLD = 5;
|
|
54
|
+
const HIGH_LLM_THRESHOLD = 5;
|
|
55
|
+
const COST_THRESHOLD_USD = 0.05;
|
|
56
|
+
|
|
57
|
+
// ============================================================================
|
|
58
|
+
// Main analysis function
|
|
59
|
+
// ============================================================================
|
|
60
|
+
|
|
61
|
+
export function analyzeCompiledWorkflow(
|
|
62
|
+
compiled: CompiledHarnessWorkflow
|
|
63
|
+
): CompilerAnalysis {
|
|
64
|
+
const nodes = compiled.spec.graph.nodes;
|
|
65
|
+
const edges = compiled.spec.graph.edges;
|
|
66
|
+
|
|
67
|
+
const cost = estimateCost(nodes);
|
|
68
|
+
const risk = assessRisk(nodes, edges);
|
|
69
|
+
const suggestions = generateSuggestions(nodes, edges);
|
|
70
|
+
const mutations = generateMutations(nodes, edges, cost);
|
|
71
|
+
|
|
72
|
+
return { cost, risk, suggestions, mutations };
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// ============================================================================
|
|
76
|
+
// Cost estimation
|
|
77
|
+
// ============================================================================
|
|
78
|
+
|
|
79
|
+
function estimateCost(nodes: TaskNode[]): CostEstimate {
|
|
80
|
+
let llmCallCount = 0;
|
|
81
|
+
let toolCallCount = 0;
|
|
82
|
+
let humanInteractionCount = 0;
|
|
83
|
+
|
|
84
|
+
for (const node of nodes) {
|
|
85
|
+
switch (node.kind) {
|
|
86
|
+
case "llm":
|
|
87
|
+
llmCallCount++;
|
|
88
|
+
break;
|
|
89
|
+
case "tool":
|
|
90
|
+
toolCallCount++;
|
|
91
|
+
break;
|
|
92
|
+
case "human":
|
|
93
|
+
humanInteractionCount++;
|
|
94
|
+
break;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const estimatedDurationMs =
|
|
99
|
+
llmCallCount * LLM_DURATION_MS +
|
|
100
|
+
toolCallCount * TOOL_DURATION_MS +
|
|
101
|
+
humanInteractionCount * HUMAN_DURATION_MS;
|
|
102
|
+
|
|
103
|
+
const estimatedCostUsd = llmCallCount * LLM_COST_PER_CALL_USD;
|
|
104
|
+
|
|
105
|
+
return {
|
|
106
|
+
llmCallCount,
|
|
107
|
+
toolCallCount,
|
|
108
|
+
humanInteractionCount,
|
|
109
|
+
estimatedDurationMs,
|
|
110
|
+
estimatedCostUsd,
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// ============================================================================
|
|
115
|
+
// Risk assessment
|
|
116
|
+
// ============================================================================
|
|
117
|
+
|
|
118
|
+
function assessRisk(nodes: TaskNode[], edges: { from: string; to: string }[]): RiskAssessment {
|
|
119
|
+
const costRisk = assessCostRisk(nodes);
|
|
120
|
+
const failureRisk = assessFailureRisk(nodes);
|
|
121
|
+
const qualityRisk = assessQualityRisk(nodes);
|
|
122
|
+
const complexityRisk = assessComplexityRisk(nodes, edges);
|
|
123
|
+
|
|
124
|
+
const riskLevels = [costRisk.level, failureRisk.level, qualityRisk.level, complexityRisk.level];
|
|
125
|
+
const overallRisk = computeOverallRisk(riskLevels);
|
|
126
|
+
|
|
127
|
+
return {
|
|
128
|
+
costRisk,
|
|
129
|
+
failureRisk,
|
|
130
|
+
qualityRisk,
|
|
131
|
+
complexityRisk,
|
|
132
|
+
overallRisk,
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
function assessCostRisk(nodes: TaskNode[]): RiskFactor {
|
|
137
|
+
const llmCount = nodes.filter(n => n.kind === "llm").length;
|
|
138
|
+
const factors: string[] = [];
|
|
139
|
+
|
|
140
|
+
if (llmCount > HIGH_LLM_THRESHOLD) {
|
|
141
|
+
return {
|
|
142
|
+
level: "high",
|
|
143
|
+
factors: [`High LLM call count (${llmCount} > ${HIGH_LLM_THRESHOLD}) increases cost risk`],
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
if (llmCount > 2) {
|
|
148
|
+
return {
|
|
149
|
+
level: "medium",
|
|
150
|
+
factors: [`Moderate LLM call count (${llmCount}) may increase costs`],
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
return { level: "low", factors: [] };
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
function assessFailureRisk(nodes: TaskNode[]): RiskFactor {
|
|
158
|
+
const hasRetry = nodes.some(n => n.retryPolicy && n.retryPolicy.maxAttempts > 0);
|
|
159
|
+
const factors: string[] = [];
|
|
160
|
+
|
|
161
|
+
if (!hasRetry) {
|
|
162
|
+
return {
|
|
163
|
+
level: "high",
|
|
164
|
+
factors: ["No retry policies defined — transient failures will cause workflow termination"],
|
|
165
|
+
};
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
return { level: "low", factors: [] };
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
function assessQualityRisk(nodes: TaskNode[]): RiskFactor {
|
|
172
|
+
const hasVerification = nodes.some(n => n.verificationPolicy && n.verificationPolicy.rules.length > 0);
|
|
173
|
+
const factors: string[] = [];
|
|
174
|
+
|
|
175
|
+
if (!hasVerification) {
|
|
176
|
+
return {
|
|
177
|
+
level: "high",
|
|
178
|
+
factors: ["No verification policies defined — output quality is not guaranteed"],
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
return { level: "low", factors: [] };
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
function assessComplexityRisk(nodes: TaskNode[], edges: { from: string; to: string }[]): RiskFactor {
|
|
186
|
+
const nodeCount = nodes.length;
|
|
187
|
+
const edgeCount = edges.length;
|
|
188
|
+
const factors: string[] = [];
|
|
189
|
+
|
|
190
|
+
if (nodeCount > COMPLEXITY_NODE_THRESHOLD || edgeCount > COMPLEXITY_NODE_THRESHOLD * 2) {
|
|
191
|
+
return {
|
|
192
|
+
level: "high",
|
|
193
|
+
factors: [`Complex graph with ${nodeCount} nodes and ${edgeCount} edges increases maintenance and debugging risk`],
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
if (nodeCount > COMPLEXITY_NODE_THRESHOLD / 2) {
|
|
198
|
+
return {
|
|
199
|
+
level: "medium",
|
|
200
|
+
factors: [`Moderate graph complexity with ${nodeCount} nodes`],
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
return { level: "low", factors: [] };
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
function computeOverallRisk(levels: Array<"low" | "medium" | "high">): "low" | "medium" | "high" {
|
|
208
|
+
if (levels.includes("high")) return "high";
|
|
209
|
+
if (levels.includes("medium")) return "medium";
|
|
210
|
+
return "low";
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// ============================================================================
|
|
214
|
+
// Suggestion generation
|
|
215
|
+
// ============================================================================
|
|
216
|
+
|
|
217
|
+
function generateSuggestions(nodes: TaskNode[], edges: { from: string; to: string }[]): CompilerSuggestion[] {
|
|
218
|
+
const suggestions: CompilerSuggestion[] = [];
|
|
219
|
+
|
|
220
|
+
const llmCount = nodes.filter(n => n.kind === "llm").length;
|
|
221
|
+
if (llmCount > HIGH_LLM_THRESHOLD) {
|
|
222
|
+
suggestions.push({
|
|
223
|
+
type: "reduce-llm",
|
|
224
|
+
description: `Workflow has ${llmCount} LLM calls (threshold: ${HIGH_LLM_THRESHOLD}). Consider consolidating prompts or using cheaper models.`,
|
|
225
|
+
impact: "high",
|
|
226
|
+
});
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
const hasRetry = nodes.some(n => n.retryPolicy && n.retryPolicy.maxAttempts > 0);
|
|
230
|
+
if (!hasRetry) {
|
|
231
|
+
suggestions.push({
|
|
232
|
+
type: "add-retry",
|
|
233
|
+
description: "No retry policies found. Add retry policies to tool and LLM nodes to handle transient failures.",
|
|
234
|
+
impact: "high",
|
|
235
|
+
});
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
const adjacentSameTool = findAdjacentSameToolNodes(nodes, edges);
|
|
239
|
+
if (adjacentSameTool.length > 0) {
|
|
240
|
+
const pairs = adjacentSameTool.map(([a, b]) => `${a}->${b}`).join(", ");
|
|
241
|
+
suggestions.push({
|
|
242
|
+
type: "merge-nodes",
|
|
243
|
+
description: `Adjacent same-tool nodes detected: ${pairs}. Consider merging into a single tool call with combined arguments.`,
|
|
244
|
+
impact: "medium",
|
|
245
|
+
});
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
const hasVerification = nodes.some(n => n.verificationPolicy && n.verificationPolicy.rules.length > 0);
|
|
249
|
+
if (!hasVerification) {
|
|
250
|
+
suggestions.push({
|
|
251
|
+
type: "add-verification",
|
|
252
|
+
description: "No verification policies found. Add verification to ensure output quality and correctness.",
|
|
253
|
+
impact: "high",
|
|
254
|
+
});
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
return suggestions;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
function findAdjacentSameToolNodes(nodes: TaskNode[], edges: { from: string; to: string }[]): Array<[string, string]> {
|
|
261
|
+
const nodeMap = new Map<string, TaskNode>();
|
|
262
|
+
for (const node of nodes) {
|
|
263
|
+
nodeMap.set(node.id, node);
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
const pairs: Array<[string, string]> = [];
|
|
267
|
+
|
|
268
|
+
for (const edge of edges) {
|
|
269
|
+
const fromNode = nodeMap.get(edge.from);
|
|
270
|
+
const toNode = nodeMap.get(edge.to);
|
|
271
|
+
|
|
272
|
+
if (
|
|
273
|
+
fromNode &&
|
|
274
|
+
toNode &&
|
|
275
|
+
fromNode.kind === "tool" &&
|
|
276
|
+
toNode.kind === "tool"
|
|
277
|
+
) {
|
|
278
|
+
pairs.push([fromNode.id, toNode.id]);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
return pairs;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
// ============================================================================
|
|
286
|
+
// Mutation generation
|
|
287
|
+
// ============================================================================
|
|
288
|
+
|
|
289
|
+
function generateMutations(
|
|
290
|
+
nodes: TaskNode[],
|
|
291
|
+
edges: { from: string; to: string }[],
|
|
292
|
+
cost: CostEstimate,
|
|
293
|
+
): HarnessMutation[] {
|
|
294
|
+
const mutations: HarnessMutation[] = [];
|
|
295
|
+
|
|
296
|
+
// Cost-high: replace expensive LLM nodes with cheaper models
|
|
297
|
+
if (cost.estimatedCostUsd > COST_THRESHOLD_USD) {
|
|
298
|
+
const llmNodes = nodes.filter(n => n.kind === "llm");
|
|
299
|
+
for (const llmNode of llmNodes) {
|
|
300
|
+
mutations.push({
|
|
301
|
+
type: "replace-node",
|
|
302
|
+
params: { nodeId: llmNode.id, changes: { model: "gpt-4o-mini" } },
|
|
303
|
+
trigger: "cost_high",
|
|
304
|
+
description: `Replace expensive model in ${llmNode.id} to reduce cost`,
|
|
305
|
+
});
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
// Retry-exhausted: add retry policies to nodes that lack them
|
|
310
|
+
const nodesNeedingRetry = nodes.filter(
|
|
311
|
+
n => (n.kind === "tool" || n.kind === "llm" || n.kind === "subworkflow") &&
|
|
312
|
+
(!n.retryPolicy || n.retryPolicy.maxAttempts === 0)
|
|
313
|
+
);
|
|
314
|
+
for (const node of nodesNeedingRetry) {
|
|
315
|
+
mutations.push({
|
|
316
|
+
type: "modify-node",
|
|
317
|
+
params: {
|
|
318
|
+
nodeId: node.id,
|
|
319
|
+
changes: {
|
|
320
|
+
retryPolicy: { maxAttempts: 3, backoff: "exponential", initialDelay: 1 },
|
|
321
|
+
},
|
|
322
|
+
},
|
|
323
|
+
trigger: "retry_exhausted",
|
|
324
|
+
description: `Add retry policy to ${node.id} to handle transient failures`,
|
|
325
|
+
});
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
// Loop-detected: flag adjacent same-tool nodes for merging
|
|
329
|
+
const adjacentPairs = findAdjacentSameToolNodes(nodes, edges);
|
|
330
|
+
for (const [fromId, toId] of adjacentPairs) {
|
|
331
|
+
mutations.push({
|
|
332
|
+
type: "modify-node",
|
|
333
|
+
params: { nodeId: toId, changes: { _mergeCandidate: fromId } },
|
|
334
|
+
trigger: "loop_detected",
|
|
335
|
+
description: `Merge adjacent same-tool nodes ${fromId} → ${toId}`,
|
|
336
|
+
});
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
// Verification-failed: add verification to nodes that lack it
|
|
340
|
+
// Exclude nodes that serve as verification check nodes for other nodes
|
|
341
|
+
const verifierNodeIds = new Set<string>();
|
|
342
|
+
for (const node of nodes) {
|
|
343
|
+
if (node.verificationPolicy) {
|
|
344
|
+
for (const rule of node.verificationPolicy.rules) {
|
|
345
|
+
verifierNodeIds.add(rule.checkNodeId);
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
const nodesNeedingVerification = nodes.filter(
|
|
351
|
+
n => (n.kind === "tool" || n.kind === "llm") &&
|
|
352
|
+
!verifierNodeIds.has(n.id) &&
|
|
353
|
+
(!n.verificationPolicy || n.verificationPolicy.rules.length === 0)
|
|
354
|
+
);
|
|
355
|
+
for (const node of nodesNeedingVerification) {
|
|
356
|
+
const verifierId = `verify-${node.id}`;
|
|
357
|
+
mutations.push({
|
|
358
|
+
type: "add-verification",
|
|
359
|
+
params: {
|
|
360
|
+
nodeId: node.id,
|
|
361
|
+
verificationPolicy: {
|
|
362
|
+
rules: [
|
|
363
|
+
{
|
|
364
|
+
kind: "tool",
|
|
365
|
+
checkNodeId: verifierId,
|
|
366
|
+
onFail: "block",
|
|
367
|
+
},
|
|
368
|
+
],
|
|
369
|
+
},
|
|
370
|
+
},
|
|
371
|
+
trigger: "verification_failed",
|
|
372
|
+
description: `Add verification to ${node.id} to ensure output quality`,
|
|
373
|
+
});
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
return mutations;
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
// ============================================================================
|
|
380
|
+
// Apply suggestions to spec
|
|
381
|
+
// ============================================================================
|
|
382
|
+
|
|
383
|
+
export function applyCompilerSuggestions(
|
|
384
|
+
spec: HarnessSpec,
|
|
385
|
+
suggestions: CompilerSuggestion[]
|
|
386
|
+
): HarnessSpec {
|
|
387
|
+
const modifiedSpec = structuredClone(spec);
|
|
388
|
+
|
|
389
|
+
for (const suggestion of suggestions) {
|
|
390
|
+
switch (suggestion.type) {
|
|
391
|
+
case "add-retry":
|
|
392
|
+
applyAddRetry(modifiedSpec);
|
|
393
|
+
break;
|
|
394
|
+
case "add-verification":
|
|
395
|
+
applyAddVerification(modifiedSpec);
|
|
396
|
+
break;
|
|
397
|
+
case "reduce-llm":
|
|
398
|
+
case "merge-nodes":
|
|
399
|
+
case "simplify":
|
|
400
|
+
// These require manual intervention or more complex transformations
|
|
401
|
+
// No automatic modification for now
|
|
402
|
+
break;
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
return modifiedSpec;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
function applyAddRetry(spec: HarnessSpec): void {
|
|
410
|
+
const defaultRetry: RetryPolicy = {
|
|
411
|
+
maxAttempts: 3,
|
|
412
|
+
backoff: "exponential",
|
|
413
|
+
initialDelay: 1,
|
|
414
|
+
};
|
|
415
|
+
|
|
416
|
+
for (const node of spec.graph.nodes) {
|
|
417
|
+
if (node.kind === "tool" || node.kind === "llm" || node.kind === "subworkflow") {
|
|
418
|
+
if (!node.retryPolicy) {
|
|
419
|
+
node.retryPolicy = defaultRetry;
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
function applyAddVerification(spec: HarnessSpec): void {
|
|
426
|
+
// Add a verification policy to nodes that don't have one and are executable
|
|
427
|
+
// We create a placeholder verification that references a new verification node
|
|
428
|
+
const nodesNeedingVerification = spec.graph.nodes.filter(
|
|
429
|
+
n => (n.kind === "tool" || n.kind === "llm") && (!n.verificationPolicy || n.verificationPolicy.rules.length === 0)
|
|
430
|
+
);
|
|
431
|
+
|
|
432
|
+
if (nodesNeedingVerification.length === 0) return;
|
|
433
|
+
|
|
434
|
+
// Create verification nodes
|
|
435
|
+
const verificationNodes: TaskNode[] = [];
|
|
436
|
+
const newEdges: typeof spec.graph.edges = [];
|
|
437
|
+
|
|
438
|
+
for (const node of nodesNeedingVerification) {
|
|
439
|
+
const verifierId = `verify-${node.id}`;
|
|
440
|
+
const verifierNode: TaskNode = {
|
|
441
|
+
id: verifierId,
|
|
442
|
+
kind: "tool",
|
|
443
|
+
tool: "echo",
|
|
444
|
+
args: [`verify ${node.id}`],
|
|
445
|
+
label: `Verify ${node.label || node.id}`,
|
|
446
|
+
};
|
|
447
|
+
verificationNodes.push(verifierNode);
|
|
448
|
+
|
|
449
|
+
// Add verification policy to the original node
|
|
450
|
+
const verificationPolicy: VerificationPolicy = {
|
|
451
|
+
rules: [
|
|
452
|
+
{
|
|
453
|
+
kind: "tool",
|
|
454
|
+
checkNodeId: verifierId,
|
|
455
|
+
onFail: "block",
|
|
456
|
+
},
|
|
457
|
+
],
|
|
458
|
+
};
|
|
459
|
+
|
|
460
|
+
if (node.kind === "tool" || node.kind === "llm") {
|
|
461
|
+
node.verificationPolicy = verificationPolicy;
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
// Add edge from original node to verifier
|
|
465
|
+
newEdges.push({ from: node.id, to: verifierId });
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
// Add verification nodes and edges to the spec
|
|
469
|
+
spec.graph.nodes.push(...verificationNodes);
|
|
470
|
+
spec.graph.edges.push(...newEdges);
|
|
471
|
+
}
|