@mhingston5/lasso 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.lean-ctx/graph.db +0 -0
- package/.lean-ctx/graph.db-shm +0 -0
- package/.lean-ctx/graph.db-wal +0 -0
- package/README.md +150 -5
- package/package.json +1 -1
- package/src/cir/lower.ts +2 -0
- package/src/cir/types.ts +6 -0
- package/src/compiler/compile.ts +272 -2
- package/src/failures/generator.ts +78 -2
- package/src/failures/types.ts +21 -0
- package/src/index.ts +1 -0
- package/src/metaharness/engine.ts +146 -3
- package/src/metaharness/trace-adapter.ts +34 -0
- package/src/metaharness/types.ts +41 -0
- package/src/replanner/runtime.ts +181 -0
- package/src/spec/schema.ts +46 -6
- package/src/spec/types.ts +39 -0
- package/test/compiler/per-node-harness.test.ts +955 -0
- package/test/failures/risk.test.ts +285 -0
- package/test/metaharness/synthesize-from-trace.test.ts +372 -0
- package/test/replanner/runtime.test.ts +134 -0
|
Binary file
|
|
Binary file
|
|
File without changes
|
package/README.md
CHANGED
|
@@ -23,6 +23,9 @@
|
|
|
23
23
|
- [Harness mutations](#harness-mutations)
|
|
24
24
|
- [Guardrails](#guardrails)
|
|
25
25
|
- [Failure mode generation](#failure-mode-generation)
|
|
26
|
+
- [Risk assessment](#risk-assessment)
|
|
27
|
+
- [Per-node harnesses](#per-node-harnesses)
|
|
28
|
+
- [Trace-based synthesis](#trace-based-synthesis)
|
|
26
29
|
- [Adaptive runtime](#adaptive-runtime)
|
|
27
30
|
- [Lineage persistence](#lineage-persistence)
|
|
28
31
|
- [Harness memory](#harness-memory)
|
|
@@ -44,15 +47,19 @@ Intent
|
|
|
44
47
|
→ Memory query (past patterns, what worked/failed)
|
|
45
48
|
→ Graph synthesis (planner + capabilities)
|
|
46
49
|
→ Failure prediction (auth, tool, network, resource)
|
|
50
|
+
→ Risk assessment (probability × impact, threshold filtering)
|
|
47
51
|
→ Policy synthesis (mutations: add verification, retry, approval)
|
|
48
52
|
→ Compilation (validate → lower → optimize → execute)
|
|
49
|
-
→
|
|
53
|
+
→ Per-node harnesses (guardrails, verification hooks)
|
|
54
|
+
→ Runtime adaptation (trace → synthesize → continueAsNew)
|
|
50
55
|
```
|
|
51
56
|
|
|
52
57
|
## What is Lasso?
|
|
53
58
|
|
|
54
|
-
Lasso is a **
|
|
55
|
-
|
|
59
|
+
Lasso is a **runtime harness synthesizer** built on [pi-duroxide](https://github.com/mhingston/pi-duroxide). It
|
|
60
|
+
synthesizes deterministic scaffolding around non-deterministic parts — predicting
|
|
61
|
+
failures, assessing risks, and generating per-node guardrails before execution.
|
|
62
|
+
It's a TypeScript package that plugs into pi via the `pi` field in `package.json`. When installed, it:
|
|
56
63
|
|
|
57
64
|
1. Boots [pi-duroxide](https://github.com/mhingston/pi-duroxide) (the durable workflow runtime)
|
|
58
65
|
2. Registers 5 slash commands (`/lasso:plan`, `/lasso:run`, etc.)
|
|
@@ -153,9 +160,13 @@ buildTaskGraph() → TaskGraph
|
|
|
153
160
|
↓
|
|
154
161
|
analyzeRisks() → RiskModel
|
|
155
162
|
↓
|
|
163
|
+
generateFailureModes() → FailureMode[] + Risk[]
|
|
164
|
+
↓
|
|
165
|
+
assessRisks() → RiskAssessment (overallScore, threshold filtering)
|
|
166
|
+
↓
|
|
156
167
|
synthesizePolicy() → PolicyBundle
|
|
157
168
|
↓
|
|
158
|
-
synthesizeHarness() → HarnessSpec
|
|
169
|
+
synthesizeHarness() → HarnessSpec (with per-node guardrails & verification hooks)
|
|
159
170
|
↓
|
|
160
171
|
compileHarnessSpec() → CompiledWorkflow → pi-duroxide
|
|
161
172
|
```
|
|
@@ -167,7 +178,9 @@ Workflow executes
|
|
|
167
178
|
↓
|
|
168
179
|
Execution trace captured (timestamps, I/O snapshots, failures)
|
|
169
180
|
↓
|
|
170
|
-
|
|
181
|
+
synthesizeFromTrace(trace, currentSpec, env) → HarnessSynthesisResult
|
|
182
|
+
→ classifies repeated failures, slow nodes, cost spikes
|
|
183
|
+
→ derives mutations
|
|
171
184
|
↓
|
|
172
185
|
mutateHarness(spec, mutations) → new spec
|
|
173
186
|
↓
|
|
@@ -389,6 +402,13 @@ All top-level objects are **strict**. Unknown fields are rejected.
|
|
|
389
402
|
| `merge` | `waitFor`, `strategy` | Fork-join synchronization |
|
|
390
403
|
| `subworkflow` | `specRef`, `inputs` | `ctx.scheduleSubOrchestration()` |
|
|
391
404
|
|
|
405
|
+
**Per-node fields** (available on all node kinds via `BaseNode`):
|
|
406
|
+
|
|
407
|
+
| Field | Type | Notes |
|
|
408
|
+
| --- | --- | --- |
|
|
409
|
+
| `guardrails` | `NodeGuardrails` | Per-node limits (timeout, retries, cost, constraints) |
|
|
410
|
+
| `verificationHooks` | `VerificationHook[]` | Inline checks that run after this node completes |
|
|
411
|
+
|
|
392
412
|
### Validation rules
|
|
393
413
|
|
|
394
414
|
1. Node IDs must be unique
|
|
@@ -499,6 +519,131 @@ Failure modes are cross-referenced with environment constraints: if auth
|
|
|
499
519
|
constraint detected, auth failure probability is boosted. Each mode includes
|
|
500
520
|
triggers, mitigations, and recovery actions.
|
|
501
521
|
|
|
522
|
+
`generateFailureModes()` now returns `risks: Risk[]` alongside `failureModes`,
|
|
523
|
+
converting each failure mode into a quantified risk with probability, impact,
|
|
524
|
+
and score.
|
|
525
|
+
|
|
526
|
+
### Risk assessment
|
|
527
|
+
|
|
528
|
+
First-class `Risk` type with quantitative scoring. Each risk carries probability
|
|
529
|
+
(0-1), impact (0-1), and a composite score. `assessRisks()` filters by threshold
|
|
530
|
+
and returns a structured assessment.
|
|
531
|
+
|
|
532
|
+
```typescript
|
|
533
|
+
import { generateFailureModes, assessRisks } from "lasso";
|
|
534
|
+
|
|
535
|
+
const generation = generateFailureModes("Deploy my app to staging", env);
|
|
536
|
+
// generation.risks — Risk[] converted from failure modes
|
|
537
|
+
|
|
538
|
+
const assessment = assessRisks(generation.risks);
|
|
539
|
+
// assessment.overallScore — average risk score (0-1)
|
|
540
|
+
// assessment.risksAboveThreshold — risks scoring >= highRiskThreshold (default 0.7)
|
|
541
|
+
// assessment.highRiskThreshold — the threshold used
|
|
542
|
+
|
|
543
|
+
// Custom threshold
|
|
544
|
+
const strict = assessRisks(generation.risks, { highRiskThreshold: 0.5 });
|
|
545
|
+
```
|
|
546
|
+
|
|
547
|
+
**Risk interface:**
|
|
548
|
+
|
|
549
|
+
| Field | Type | Description |
|
|
550
|
+
| --- | --- | --- |
|
|
551
|
+
| `id` | `string` | Unique risk identifier |
|
|
552
|
+
| `probability` | `number` (0-1) | Likelihood of occurrence |
|
|
553
|
+
| `impact` | `number` (0-1) | Severity if it occurs |
|
|
554
|
+
| `score` | `number` | `probability × impact` |
|
|
555
|
+
| `signals` | `string[]` | Triggers or indicators |
|
|
556
|
+
| `mitigations` | `HarnessMutation[]` | Suggested mitigations as executable mutations |
|
|
557
|
+
| `failureClass` | `FailureClass` | Classification (auth, tool, network, etc.) |
|
|
558
|
+
| `description` | `string` | Human-readable description |
|
|
559
|
+
|
|
560
|
+
### Per-node harnesses
|
|
561
|
+
|
|
562
|
+
Every node in a `HarnessSpec` can carry its own guardrails and verification
|
|
563
|
+
hooks. These override global settings and run only during that node's execution.
|
|
564
|
+
|
|
565
|
+
```json
|
|
566
|
+
{
|
|
567
|
+
"id": "deploy",
|
|
568
|
+
"kind": "tool",
|
|
569
|
+
"tool": "bash",
|
|
570
|
+
"args": ["./deploy.sh"],
|
|
571
|
+
"guardrails": {
|
|
572
|
+
"timeoutSeconds": 120,
|
|
573
|
+
"maxRetries": 2,
|
|
574
|
+
"maxCostUsd": 0.10,
|
|
575
|
+
"constraints": ["exit_code == 0"]
|
|
576
|
+
},
|
|
577
|
+
"verificationHooks": [
|
|
578
|
+
{
|
|
579
|
+
"name": "health-check",
|
|
580
|
+
"kind": "tool",
|
|
581
|
+
"check": "curl -sf http://localhost:3000/health",
|
|
582
|
+
"onFail": "block",
|
|
583
|
+
"maxAttempts": 3
|
|
584
|
+
}
|
|
585
|
+
]
|
|
586
|
+
}
|
|
587
|
+
```
|
|
588
|
+
|
|
589
|
+
**NodeGuardrails:**
|
|
590
|
+
|
|
591
|
+
| Field | Type | Description |
|
|
592
|
+
| --- | --- | --- |
|
|
593
|
+
| `timeoutSeconds` | `number` | Max execution time for this node |
|
|
594
|
+
| `maxRetries` | `number` | Max retries (overrides global retryPolicy) |
|
|
595
|
+
| `maxCostUsd` | `number` | Max LLM cost for this node |
|
|
596
|
+
| `constraints` | `string[]` | Custom expressions that must hold true |
|
|
597
|
+
|
|
598
|
+
**VerificationHook:**
|
|
599
|
+
|
|
600
|
+
| Field | Type | Description |
|
|
601
|
+
| --- | --- | --- |
|
|
602
|
+
| `name` | `string` | Hook identifier |
|
|
603
|
+
| `kind` | `"tool" \| "llm" \| "expression"` | Type of check |
|
|
604
|
+
| `check` | `string` | Tool name, LLM prompt, or expression |
|
|
605
|
+
| `onFail` | `"block" \| "warn" \| "retry"` | Action on failure |
|
|
606
|
+
| `maxAttempts` | `number` | Max verification attempts (optional) |
|
|
607
|
+
|
|
608
|
+
Per-node guardrails override global `executionPolicy` settings. Verification
|
|
609
|
+
hooks run inline after the node completes, with retry/block/warn semantics.
|
|
610
|
+
|
|
611
|
+
### Trace-based synthesis
|
|
612
|
+
|
|
613
|
+
`synthesizeFromTrace()` analyzes an execution trace mid-flight, classifies
|
|
614
|
+
failures, and derives mutations — wired into the compiler's adaptation loop.
|
|
615
|
+
|
|
616
|
+
```typescript
|
|
617
|
+
import { DefaultMetaHarness } from "lasso";
|
|
618
|
+
|
|
619
|
+
const meta = new DefaultMetaHarness(config);
|
|
620
|
+
|
|
621
|
+
const trace = {
|
|
622
|
+
completedNodes: [
|
|
623
|
+
{ nodeId: "build", startedAt: 1, completedAt: 2, costUsd: 0.05 },
|
|
624
|
+
],
|
|
625
|
+
failedNodes: [
|
|
626
|
+
{ nodeId: "deploy", startedAt: 2, failedAt: 3, error: "auth expired", failureClass: "auth", retryCount: 3 },
|
|
627
|
+
],
|
|
628
|
+
totalCostUsd: 0.15,
|
|
629
|
+
capturedAt: Date.now(),
|
|
630
|
+
};
|
|
631
|
+
|
|
632
|
+
const result = await meta.synthesizeFromTrace(trace, currentSpec, environment);
|
|
633
|
+
// result.mutations — HarnessMutation[] derived from trace analysis
|
|
634
|
+
// result.spec — mutated HarnessSpec
|
|
635
|
+
// result.rationale — human-readable explanation of changes
|
|
636
|
+
// result.decision — "continue" | "needs_operator_input" | "stop"
|
|
637
|
+
```
|
|
638
|
+
|
|
639
|
+
The synthesis classifies:
|
|
640
|
+
- **Repeated failures** — same node failing across retries → add verification or block
|
|
641
|
+
- **Slow nodes** — duration spikes → tighten timeout guardrails
|
|
642
|
+
- **Cost spikes** — LLM cost above expected → swap to cheaper model
|
|
643
|
+
|
|
644
|
+
This feeds directly into the `continueAsNew` path, producing a new harness
|
|
645
|
+
version with repairs applied.
|
|
646
|
+
|
|
502
647
|
### Verification engine
|
|
503
648
|
|
|
504
649
|
Standalone module with compositional strategies:
|
package/package.json
CHANGED
package/src/cir/lower.ts
CHANGED
|
@@ -104,6 +104,8 @@ function lowerNode(spec: HarnessSpec, node: TaskNode, index: number, transitions
|
|
|
104
104
|
...(node.retryPolicy ? { retry: cloneRetryPolicy(node.retryPolicy) } : {}),
|
|
105
105
|
...(verification ? { verification } : {}),
|
|
106
106
|
...(failureRouting ? { failureRouting } : {}),
|
|
107
|
+
...(node.guardrails ? { guardrails: { ...node.guardrails } } : {}),
|
|
108
|
+
...(node.verificationHooks ? { verificationHooks: node.verificationHooks.map(h => ({ ...h })) } : {}),
|
|
107
109
|
terminal: outgoingCount === 0,
|
|
108
110
|
} as const;
|
|
109
111
|
|
package/src/cir/types.ts
CHANGED
|
@@ -5,11 +5,13 @@ import type {
|
|
|
5
5
|
HumanPolicy,
|
|
6
6
|
LlmNode,
|
|
7
7
|
MergeNode,
|
|
8
|
+
NodeGuardrails,
|
|
8
9
|
ObservabilityPolicy,
|
|
9
10
|
RetryPolicy,
|
|
10
11
|
SubworkflowNode,
|
|
11
12
|
TaskNode,
|
|
12
13
|
ToolNode,
|
|
14
|
+
VerificationHook,
|
|
13
15
|
VerificationRule,
|
|
14
16
|
} from "../spec/types.js";
|
|
15
17
|
|
|
@@ -69,6 +71,10 @@ export interface CirNodeBase<K extends TaskNode["kind"] = TaskNode["kind"]> {
|
|
|
69
71
|
verification?: CirVerificationHook[];
|
|
70
72
|
failureRouting?: CirFailureRoutingHint[];
|
|
71
73
|
terminal?: boolean;
|
|
74
|
+
/** Per-node guardrails from spec */
|
|
75
|
+
guardrails?: NodeGuardrails;
|
|
76
|
+
/** Per-node verification hooks from spec */
|
|
77
|
+
verificationHooks?: VerificationHook[];
|
|
72
78
|
}
|
|
73
79
|
|
|
74
80
|
export interface CirToolNode extends CirNodeBase<"tool"> {
|
package/src/compiler/compile.ts
CHANGED
|
@@ -5,6 +5,7 @@ import { lowerHarnessSpecToCir } from "../cir/lower.js";
|
|
|
5
5
|
import { optimizeCirWorkflow } from "../cir/optimize.js";
|
|
6
6
|
import { validateCirWorkflow } from "../cir/validate.js";
|
|
7
7
|
import type { HarnessSpec } from "../spec/types.js";
|
|
8
|
+
import { buildTraceEntries } from "../metaharness/trace-adapter.js";
|
|
8
9
|
import { validateHarnessSpec } from "../spec/validate.js";
|
|
9
10
|
import { addFailure, createHarnessState, recordNodeResult, updateMetrics } from "../state/snapshots.js";
|
|
10
11
|
import type { HarnessState } from "../state/types.js";
|
|
@@ -13,6 +14,7 @@ import {
|
|
|
13
14
|
checkGuardrails,
|
|
14
15
|
evaluateConditionExpression,
|
|
15
16
|
GuardrailExceededError,
|
|
17
|
+
isVerificationSuccess,
|
|
16
18
|
recordTrace,
|
|
17
19
|
runWithRetry,
|
|
18
20
|
type ExecutionState,
|
|
@@ -22,6 +24,9 @@ import { unwrapAdaptiveInput, prepareRuntimeReplan, type AdaptiveRuntimeMetadata
|
|
|
22
24
|
import type { LineageEntry } from "../versioning/types.js";
|
|
23
25
|
import type { HarnessExecutionTrace } from "../versioning/types.js";
|
|
24
26
|
import { buildReferenceHarnessSpec } from "../reference/catalog.js";
|
|
27
|
+
import type { ExecutionTrace } from "../metaharness/types.js";
|
|
28
|
+
import { deriveMutationsFromTrace } from "../mutation/derive.js";
|
|
29
|
+
import { mutateHarness } from "../mutation/engine.js";
|
|
25
30
|
|
|
26
31
|
export interface CompiledHarnessResult {
|
|
27
32
|
status: "completed";
|
|
@@ -189,13 +194,54 @@ function createWorkflowGenerator(
|
|
|
189
194
|
throw new GuardrailExceededError(guardrailResult.reason!);
|
|
190
195
|
}
|
|
191
196
|
|
|
192
|
-
|
|
197
|
+
// Per-node guardrails: check constraints and cost before execution
|
|
198
|
+
const specNode = getSpecNode(effectiveSpec, node.id);
|
|
199
|
+
if (specNode?.guardrails) {
|
|
200
|
+
checkPerNodeGuardrails(specNode.guardrails, state, node.id);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// Per-node timeout: record start time before yield
|
|
204
|
+
const nodeStartTime = specNode?.guardrails?.timeoutSeconds !== undefined ? Date.now() : undefined;
|
|
205
|
+
const nodeStartCost = specNode?.guardrails?.maxCostUsd !== undefined ? state.estimatedCostUsd : undefined;
|
|
206
|
+
|
|
207
|
+
// Override retry with per-node maxRetries if present
|
|
208
|
+
const effectiveNode = specNode?.guardrails?.maxRetries !== undefined
|
|
209
|
+
? { ...node, retry: { maxAttempts: specNode.guardrails.maxRetries + 1, backoff: node.retry?.backoff ?? "constant", initialDelay: node.retry?.initialDelay ?? 0, maxDelay: node.retry?.maxDelay, retryOn: node.retry?.retryOn } }
|
|
210
|
+
: node;
|
|
211
|
+
|
|
212
|
+
const output = yield* executeNodeWithPolicies(ctx, state, effectiveNode, effectiveNodeMap, effectiveCir.name);
|
|
193
213
|
state.outputs[node.id] = output;
|
|
194
214
|
state.stepCount += 1;
|
|
195
215
|
if (node.kind === "llm") {
|
|
196
216
|
state.estimatedCostUsd += 0.01;
|
|
197
217
|
}
|
|
198
218
|
|
|
219
|
+
// Per-node timeout: check after yield returns
|
|
220
|
+
if (nodeStartTime !== undefined && specNode?.guardrails?.timeoutSeconds !== undefined) {
|
|
221
|
+
const elapsedMs = Date.now() - nodeStartTime;
|
|
222
|
+
const timeoutMs = specNode.guardrails.timeoutSeconds * 1000;
|
|
223
|
+
if (elapsedMs > timeoutMs) {
|
|
224
|
+
throw new GuardrailExceededError(
|
|
225
|
+
`Per-node timeout exceeded for node ${node.id} (${elapsedMs}ms > ${timeoutMs}ms)`,
|
|
226
|
+
);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// Per-node cost: check delta, not cumulative
|
|
231
|
+
if (nodeStartCost !== undefined && specNode?.guardrails?.maxCostUsd !== undefined) {
|
|
232
|
+
const nodeCost = state.estimatedCostUsd - nodeStartCost;
|
|
233
|
+
if (nodeCost > specNode.guardrails.maxCostUsd) {
|
|
234
|
+
throw new GuardrailExceededError(
|
|
235
|
+
`Per-node cost limit exceeded for node ${node.id} ($${nodeCost.toFixed(4)}/$${specNode.guardrails.maxCostUsd.toFixed(2)})`,
|
|
236
|
+
);
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// Per-node verification hooks
|
|
241
|
+
if (specNode?.verificationHooks && specNode.verificationHooks.length > 0) {
|
|
242
|
+
yield* runPerNodeVerificationHooks(ctx, state, node, specNode.verificationHooks, effectiveNodeMap);
|
|
243
|
+
}
|
|
244
|
+
|
|
199
245
|
const parallelMergePlan = effectiveParallelMergePlans.get(node.id);
|
|
200
246
|
if (parallelMergePlan) {
|
|
201
247
|
const branchNodes = parallelMergePlan.branchNodeIds.map(branchNodeId => getNode(effectiveNodeMap, branchNodeId));
|
|
@@ -582,8 +628,31 @@ function* buildCompletedResultWithContinuation(
|
|
|
582
628
|
const replanDecision = prepareRuntimeReplan(adaptiveMetadata, state.input, result);
|
|
583
629
|
|
|
584
630
|
if (replanDecision.decision === "continue_as_new") {
|
|
631
|
+
const traceMutations = synthesizeTraceMutations(state, adaptiveMetadata);
|
|
632
|
+
|
|
633
|
+
let nextInput = replanDecision.nextInput;
|
|
634
|
+
if (traceMutations.length > 0) {
|
|
635
|
+
const baseSpec = buildReferenceHarnessSpec(replanDecision.nextRequest);
|
|
636
|
+
const mutated = mutateHarness(baseSpec, traceMutations);
|
|
637
|
+
nextInput = {
|
|
638
|
+
...nextInput,
|
|
639
|
+
__lassoAdaptiveRuntime: {
|
|
640
|
+
...nextInput.__lassoAdaptiveRuntime,
|
|
641
|
+
currentVersion: {
|
|
642
|
+
...nextInput.__lassoAdaptiveRuntime.currentVersion,
|
|
643
|
+
spec: mutated.spec,
|
|
644
|
+
},
|
|
645
|
+
pendingMutations: [
|
|
646
|
+
...(nextInput.__lassoAdaptiveRuntime.pendingMutations ?? []),
|
|
647
|
+
...traceMutations,
|
|
648
|
+
],
|
|
649
|
+
},
|
|
650
|
+
};
|
|
651
|
+
ctx.traceInfo(`Lasso trace synthesis: applied ${traceMutations.length} mutation(s) from execution trace`);
|
|
652
|
+
}
|
|
653
|
+
|
|
585
654
|
ctx.traceInfo(`Lasso adaptive runtime: continuing as new with version ${replanDecision.nextVersion.version}`);
|
|
586
|
-
yield ctx.continueAsNew(
|
|
655
|
+
yield ctx.continueAsNew(nextInput);
|
|
587
656
|
} else {
|
|
588
657
|
ctx.traceInfo(`Lasso adaptive runtime: ${replanDecision.decision}`);
|
|
589
658
|
}
|
|
@@ -599,3 +668,204 @@ function formatUnknownError(error: unknown): string {
|
|
|
599
668
|
|
|
600
669
|
return String(error);
|
|
601
670
|
}
|
|
671
|
+
|
|
672
|
+
// NOTE: Timestamps here are approximations — the total harness duration is
|
|
673
|
+
// applied uniformly to all nodes. Per-node timing should be sourced from
|
|
674
|
+
// trace entries when available.
|
|
675
|
+
function synthesizeTraceMutations(
|
|
676
|
+
state: ExecutionState,
|
|
677
|
+
adaptiveMetadata: AdaptiveRuntimeMetadata,
|
|
678
|
+
): ReturnType<typeof deriveMutationsFromTrace> {
|
|
679
|
+
const executionTrace: ExecutionTrace = {
|
|
680
|
+
completedNodes: [],
|
|
681
|
+
failedNodes: [],
|
|
682
|
+
currentNodeId: undefined,
|
|
683
|
+
capturedAt: Date.now(),
|
|
684
|
+
};
|
|
685
|
+
|
|
686
|
+
for (const failure of state.harnessState.failures) {
|
|
687
|
+
executionTrace.failedNodes.push({
|
|
688
|
+
nodeId: failure.nodeId ?? "unknown",
|
|
689
|
+
startedAt: Date.now() - (state.harnessState.metrics.durationMs ?? 0),
|
|
690
|
+
failedAt: Date.now(),
|
|
691
|
+
error: failure.message,
|
|
692
|
+
retryCount: 0,
|
|
693
|
+
});
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
for (const [nodeId, output] of Object.entries(state.harnessState.nodeResults ?? {})) {
|
|
697
|
+
if (!executionTrace.failedNodes.some(f => f.nodeId === nodeId)) {
|
|
698
|
+
executionTrace.completedNodes.push({
|
|
699
|
+
nodeId,
|
|
700
|
+
startedAt: Date.now() - (state.harnessState.metrics.durationMs ?? 0),
|
|
701
|
+
completedAt: Date.now(),
|
|
702
|
+
output,
|
|
703
|
+
});
|
|
704
|
+
}
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
const traceEntries = buildTraceEntries(executionTrace);
|
|
708
|
+
|
|
709
|
+
if (traceEntries.length === 0) {
|
|
710
|
+
return [];
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
const harnessTrace: HarnessExecutionTrace = {
|
|
714
|
+
entries: traceEntries,
|
|
715
|
+
totalDurationMs: state.harnessState.metrics.durationMs ?? 0,
|
|
716
|
+
nodeCount: executionTrace.completedNodes.length + executionTrace.failedNodes.length,
|
|
717
|
+
failureCount: executionTrace.failedNodes.length,
|
|
718
|
+
startTimeMs: Date.now() - (state.harnessState.metrics.durationMs ?? 0),
|
|
719
|
+
endTimeMs: Date.now(),
|
|
720
|
+
};
|
|
721
|
+
|
|
722
|
+
return deriveMutationsFromTrace(harnessTrace, adaptiveMetadata.currentVersion.spec);
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
function getSpecNode(spec: HarnessSpec, nodeId: string): import("../spec/types.js").TaskNode | undefined {
|
|
726
|
+
return spec.graph.nodes.find(node => node.id === nodeId);
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
function checkPerNodeGuardrails(
|
|
730
|
+
guardrails: import("../spec/types.js").NodeGuardrails,
|
|
731
|
+
state: ExecutionState,
|
|
732
|
+
nodeId: string,
|
|
733
|
+
): void {
|
|
734
|
+
if (guardrails.constraints) {
|
|
735
|
+
for (const constraint of guardrails.constraints) {
|
|
736
|
+
const result = evaluateConditionExpression(constraint, state);
|
|
737
|
+
if (!result) {
|
|
738
|
+
throw new GuardrailExceededError(
|
|
739
|
+
`Constraint failed for node ${nodeId}: "${constraint}"`,
|
|
740
|
+
);
|
|
741
|
+
}
|
|
742
|
+
}
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
function* runPerNodeVerificationHooks(
|
|
749
|
+
ctx: WorkflowContext,
|
|
750
|
+
state: ExecutionState,
|
|
751
|
+
node: CirNode,
|
|
752
|
+
hooks: import("../spec/types.js").VerificationHook[],
|
|
753
|
+
nodeMap: Map<string, CirNode>,
|
|
754
|
+
): Generator<YieldItem, void, unknown> {
|
|
755
|
+
for (const hook of hooks) {
|
|
756
|
+
let hookAttempts = 0;
|
|
757
|
+
const maxAttempts = hook.maxAttempts ?? 2;
|
|
758
|
+
|
|
759
|
+
while (true) {
|
|
760
|
+
if (hook.kind === "expression") {
|
|
761
|
+
const result = evaluateConditionExpression(hook.check, state);
|
|
762
|
+
if (result) {
|
|
763
|
+
break; // Hook passed, move to next hook
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
// Expression failed
|
|
767
|
+
if (hook.onFail === "block") {
|
|
768
|
+
throw new Error(
|
|
769
|
+
`Verification hook "${hook.name}" blocked: expression "${hook.check}" evaluated to false for node ${node.id}`,
|
|
770
|
+
);
|
|
771
|
+
}
|
|
772
|
+
if (hook.onFail === "warn") {
|
|
773
|
+
ctx.traceWarn(
|
|
774
|
+
`[lasso] Verification hook "${hook.name}" warning: expression "${hook.check}" evaluated to false for node ${node.id}`,
|
|
775
|
+
);
|
|
776
|
+
break; // Warn but continue to next hook
|
|
777
|
+
}
|
|
778
|
+
if (hook.onFail === "retry") {
|
|
779
|
+
hookAttempts++;
|
|
780
|
+
if (hookAttempts < maxAttempts) {
|
|
781
|
+
recordTrace(ctx, state, node, "retry", {
|
|
782
|
+
reason: "verification-hook",
|
|
783
|
+
hook: hook.name,
|
|
784
|
+
attemptNumber: hookAttempts + 1,
|
|
785
|
+
});
|
|
786
|
+
// Re-execute the node
|
|
787
|
+
yield* executeNodeWithPolicies(ctx, state, node as Exclude<CirNode, { kind: "condition" | "merge" }>, nodeMap, "current");
|
|
788
|
+
continue; // Re-check the same hook
|
|
789
|
+
}
|
|
790
|
+
throw new Error(
|
|
791
|
+
`Verification hook "${hook.name}" retry exhausted for node ${node.id}`,
|
|
792
|
+
);
|
|
793
|
+
}
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
// For tool/llm hooks, create an inline verifier node
|
|
797
|
+
const verifierNodeId = `__verify_hook_${hook.name}`;
|
|
798
|
+
let verifierNode: CirNode;
|
|
799
|
+
|
|
800
|
+
if (hook.kind === "llm") {
|
|
801
|
+
verifierNode = {
|
|
802
|
+
id: verifierNodeId,
|
|
803
|
+
kind: "llm",
|
|
804
|
+
source: {
|
|
805
|
+
specNodeId: node.id,
|
|
806
|
+
specNodeKind: node.kind,
|
|
807
|
+
specPath: `verificationHook:${hook.name}`,
|
|
808
|
+
},
|
|
809
|
+
action: {
|
|
810
|
+
provider: "anthropic",
|
|
811
|
+
model: "claude-sonnet",
|
|
812
|
+
prompt: hook.check,
|
|
813
|
+
},
|
|
814
|
+
};
|
|
815
|
+
} else {
|
|
816
|
+
verifierNode = {
|
|
817
|
+
id: verifierNodeId,
|
|
818
|
+
kind: "tool",
|
|
819
|
+
source: {
|
|
820
|
+
specNodeId: node.id,
|
|
821
|
+
specNodeKind: node.kind,
|
|
822
|
+
specPath: `verificationHook:${hook.name}`,
|
|
823
|
+
},
|
|
824
|
+
action: {
|
|
825
|
+
tool: "bash",
|
|
826
|
+
args: [hook.check],
|
|
827
|
+
},
|
|
828
|
+
};
|
|
829
|
+
}
|
|
830
|
+
|
|
831
|
+
const verifierOutput = yield createActionYieldItem(ctx, verifierNode as Exclude<CirNode, { kind: "condition" | "merge" }>, "current");
|
|
832
|
+
state.outputs[verifierNodeId] = verifierOutput;
|
|
833
|
+
|
|
834
|
+
const passed = isVerificationSuccess(verifierOutput);
|
|
835
|
+
if (passed) {
|
|
836
|
+
break; // Hook passed, move to next hook
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
// Verification failed
|
|
840
|
+
if (hook.onFail === "block") {
|
|
841
|
+
throw new Error(
|
|
842
|
+
`Verification hook "${hook.name}" blocked: verifier returned false for node ${node.id}`,
|
|
843
|
+
);
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
if (hook.onFail === "warn") {
|
|
847
|
+
ctx.traceWarn(
|
|
848
|
+
`[lasso] Verification hook "${hook.name}" warning: verifier returned false for node ${node.id}`,
|
|
849
|
+
);
|
|
850
|
+
break; // Warn but continue to next hook
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
if (hook.onFail === "retry") {
|
|
854
|
+
hookAttempts++;
|
|
855
|
+
if (hookAttempts < maxAttempts) {
|
|
856
|
+
recordTrace(ctx, state, node, "retry", {
|
|
857
|
+
reason: "verification-hook",
|
|
858
|
+
hook: hook.name,
|
|
859
|
+
attemptNumber: hookAttempts + 1,
|
|
860
|
+
});
|
|
861
|
+
// Re-execute the node
|
|
862
|
+
yield* executeNodeWithPolicies(ctx, state, node as Exclude<CirNode, { kind: "condition" | "merge" }>, nodeMap, "current");
|
|
863
|
+
continue; // Re-check the same hook
|
|
864
|
+
}
|
|
865
|
+
throw new Error(
|
|
866
|
+
`Verification hook "${hook.name}" retry exhausted for node ${node.id}`,
|
|
867
|
+
);
|
|
868
|
+
}
|
|
869
|
+
}
|
|
870
|
+
}
|
|
871
|
+
}
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import type { FailureClass } from "./ontology.js";
|
|
2
2
|
import type { EnvironmentModel } from "../environment/types.js";
|
|
3
3
|
import type { HarnessSpec } from "../spec/types.js";
|
|
4
|
+
import type { Risk, RiskAssessment } from "./types.js";
|
|
5
|
+
import type { HarnessMutation } from "../mutation/types.js";
|
|
4
6
|
|
|
5
7
|
export interface FailureMode {
|
|
6
8
|
id: string;
|
|
@@ -15,6 +17,7 @@ export interface FailureMode {
|
|
|
15
17
|
export interface FailureModeGeneration {
|
|
16
18
|
taskDescription: string;
|
|
17
19
|
failureModes: FailureMode[];
|
|
20
|
+
risks: Risk[];
|
|
18
21
|
generatedAt: number;
|
|
19
22
|
riskSummary: string;
|
|
20
23
|
}
|
|
@@ -212,8 +215,6 @@ const PATTERN_RULES: PatternRule[] = [
|
|
|
212
215
|
},
|
|
213
216
|
];
|
|
214
217
|
|
|
215
|
-
let idCounter = 0;
|
|
216
|
-
|
|
217
218
|
function generateId(cls: FailureClass, index: number): string {
|
|
218
219
|
return `gen-${cls}-${index}-${Date.now().toString(36)}`;
|
|
219
220
|
}
|
|
@@ -411,11 +412,86 @@ export function generateFailureModes(
|
|
|
411
412
|
}
|
|
412
413
|
|
|
413
414
|
const riskSummary = buildRiskSummary(failureModes);
|
|
415
|
+
const risks = failureModes.map(failureModeToRisk);
|
|
414
416
|
|
|
415
417
|
return {
|
|
416
418
|
taskDescription,
|
|
417
419
|
failureModes,
|
|
420
|
+
risks,
|
|
418
421
|
generatedAt: Date.now(),
|
|
419
422
|
riskSummary,
|
|
420
423
|
};
|
|
421
424
|
}
|
|
425
|
+
|
|
426
|
+
const PROBABILITY_MAP: Record<"low" | "medium" | "high", number> = {
|
|
427
|
+
low: 0.2,
|
|
428
|
+
medium: 0.5,
|
|
429
|
+
high: 0.8,
|
|
430
|
+
};
|
|
431
|
+
|
|
432
|
+
const IMPACT_MAP: Record<FailureClass, number> = {
|
|
433
|
+
auth: 0.7,
|
|
434
|
+
network: 0.6,
|
|
435
|
+
resource: 0.5,
|
|
436
|
+
semantic: 0.4,
|
|
437
|
+
tool: 0.6,
|
|
438
|
+
"environment-drift": 0.3,
|
|
439
|
+
unknown: 0.3,
|
|
440
|
+
human: 0.5,
|
|
441
|
+
};
|
|
442
|
+
|
|
443
|
+
export function probabilityToNumber(probability: "low" | "medium" | "high"): number {
|
|
444
|
+
return PROBABILITY_MAP[probability];
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
export function failureClassToImpact(failureClass: FailureClass): number {
|
|
448
|
+
return IMPACT_MAP[failureClass];
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
export function failureModeToRisk(mode: FailureMode): Risk {
|
|
452
|
+
const probability = probabilityToNumber(mode.probability);
|
|
453
|
+
const impact = failureClassToImpact(mode.failureClass);
|
|
454
|
+
|
|
455
|
+
const mitigations: HarnessMutation[] = mode.mitigations.map((description) => ({
|
|
456
|
+
type: "add-verification" as const,
|
|
457
|
+
params: {},
|
|
458
|
+
description,
|
|
459
|
+
}));
|
|
460
|
+
|
|
461
|
+
return {
|
|
462
|
+
id: mode.id,
|
|
463
|
+
probability,
|
|
464
|
+
impact,
|
|
465
|
+
score: probability * impact,
|
|
466
|
+
signals: [...mode.triggers],
|
|
467
|
+
mitigations,
|
|
468
|
+
failureClass: mode.failureClass,
|
|
469
|
+
description: mode.description,
|
|
470
|
+
};
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
export function assessRisks(
|
|
474
|
+
risks: Risk[],
|
|
475
|
+
options?: { highRiskThreshold?: number },
|
|
476
|
+
): RiskAssessment {
|
|
477
|
+
const highRiskThreshold = options?.highRiskThreshold ?? 0.7;
|
|
478
|
+
|
|
479
|
+
if (risks.length === 0) {
|
|
480
|
+
return {
|
|
481
|
+
risks: [],
|
|
482
|
+
overallScore: 0,
|
|
483
|
+
highRiskThreshold,
|
|
484
|
+
risksAboveThreshold: [],
|
|
485
|
+
};
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
const overallScore = risks.reduce((sum, r) => sum + r.score, 0) / risks.length;
|
|
489
|
+
const risksAboveThreshold = risks.filter((r) => r.score >= highRiskThreshold);
|
|
490
|
+
|
|
491
|
+
return {
|
|
492
|
+
risks,
|
|
493
|
+
overallScore,
|
|
494
|
+
highRiskThreshold,
|
|
495
|
+
risksAboveThreshold,
|
|
496
|
+
};
|
|
497
|
+
}
|