@mhingston5/lasso 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,955 @@
1
+ import { beforeEach, describe, expect, it, vi } from "vitest";
2
+
3
+ vi.mock("pi-duroxide", () => ({
4
+ registerWorkflow: vi.fn(),
5
+ }));
6
+
7
+ import { compileHarnessSpec } from "../../src/compiler/compile.js";
8
+ import { GuardrailExceededError } from "../../src/compiler/runtime-helpers.js";
9
+ import type { HarnessSpec } from "../../src/spec/types.js";
10
+
11
+ function createMockContext() {
12
+ return {
13
+ scheduleActivity: vi.fn(),
14
+ scheduleActivityWithRetry: vi.fn(),
15
+ scheduleTimer: vi.fn(),
16
+ waitForEvent: vi.fn(),
17
+ scheduleSubOrchestration: vi.fn(),
18
+ all: vi.fn(),
19
+ race: vi.fn(),
20
+ utcNow: () => 0,
21
+ newGuid: () => "guid-1",
22
+ continueAsNew: vi.fn(),
23
+ setCustomStatus: vi.fn(),
24
+ traceInfo: vi.fn(),
25
+ traceWarn: vi.fn(),
26
+ traceError: vi.fn(),
27
+ traceDebug: vi.fn(),
28
+ kv: { get: vi.fn(), set: vi.fn(), clear: vi.fn() },
29
+ pi: {
30
+ tool: (name: string, args: unknown) => ({ kind: "tool-call", name, args }),
31
+ llm: (messages: unknown[], options?: unknown) => ({ kind: "llm-call", messages, options }),
32
+ skill: vi.fn(),
33
+ sendMessage: vi.fn(),
34
+ prompt: vi.fn(),
35
+ },
36
+ };
37
+ }
38
+
39
+ describe("per-node guardrails", () => {
40
+ it("enforces per-node maxRetries overriding global retryPolicy", () => {
41
+ const spec: HarnessSpec = {
42
+ name: "per-node-retry-override",
43
+ executionPolicy: {
44
+ failureClassification: [
45
+ { pattern: "transient", category: "transient", retry: true },
46
+ ],
47
+ },
48
+ graph: {
49
+ entryNodeId: "action",
50
+ nodes: [
51
+ {
52
+ id: "action",
53
+ kind: "tool",
54
+ tool: "bash",
55
+ args: ["echo fail"],
56
+ retryPolicy: {
57
+ maxAttempts: 5,
58
+ backoff: "constant",
59
+ initialDelay: 2,
60
+ retryOn: ["transient"],
61
+ },
62
+ guardrails: {
63
+ maxRetries: 1,
64
+ },
65
+ },
66
+ ],
67
+ edges: [],
68
+ },
69
+ };
70
+
71
+ const compiled = compileHarnessSpec(spec);
72
+ const mock = {
73
+ calls: { timers: [] as number[] },
74
+ context: createMockContext(),
75
+ };
76
+ mock.context.scheduleTimer = (delayMs: number) => {
77
+ mock.calls.timers.push(delayMs);
78
+ return { kind: "timer", delayMs };
79
+ };
80
+ const iterator = compiled.workflows[0].generator(mock.context as any, {});
81
+
82
+ // First attempt
83
+ expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
84
+
85
+ // Throw to trigger retry — with initialDelay=2, we get a timer
86
+ const retryYield = iterator.throw(new Error("transient failure"));
87
+ expect(retryYield.value).toEqual({ kind: "timer", delayMs: 2000 });
88
+ expect(mock.calls.timers).toEqual([2000]);
89
+
90
+ // Second attempt (retry 1 — maxRetries=1 allows 1 retry = 2 total attempts)
91
+ expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
92
+
93
+ // Second failure should exhaust retries (maxRetries=1 means maxAttempts=2)
94
+ let threw = false;
95
+ try {
96
+ iterator.throw(new Error("transient failure"));
97
+ } catch {
98
+ threw = true;
99
+ }
100
+ expect(threw).toBe(true);
101
+ });
102
+
103
+ it("enforces per-node maxCostUsd on LLM nodes (per-node delta, not cumulative)", () => {
104
+ const spec: HarnessSpec = {
105
+ name: "per-node-cost",
106
+ graph: {
107
+ entryNodeId: "llm-first",
108
+ nodes: [
109
+ {
110
+ id: "llm-first",
111
+ kind: "llm",
112
+ provider: "anthropic",
113
+ model: "claude-sonnet",
114
+ prompt: "Do something first",
115
+ },
116
+ {
117
+ id: "llm-second",
118
+ kind: "llm",
119
+ provider: "anthropic",
120
+ model: "claude-sonnet",
121
+ prompt: "Do something second",
122
+ guardrails: {
123
+ maxCostUsd: 0.005,
124
+ },
125
+ },
126
+ ],
127
+ edges: [
128
+ { from: "llm-first", to: "llm-second" },
129
+ ],
130
+ },
131
+ };
132
+
133
+ const compiled = compileHarnessSpec(spec);
134
+ const ctx = createMockContext();
135
+ const iterator = compiled.workflows[0].generator(ctx as any, {});
136
+
137
+ // First LLM yields
138
+ expect(iterator.next().value).toMatchObject({ kind: "llm-call" });
139
+
140
+ // First LLM returns → cost += 0.01, moves to llm-second
141
+ // Per-node check: nodeStartCost=0.01, maxCostUsd=0.005 → pre-check passes (delta not yet measured)
142
+ // llm-second yields
143
+ expect(iterator.next("output1").value).toMatchObject({ kind: "llm-call" });
144
+
145
+ // llm-second returns → cost += 0.01 (cumulative = 0.02)
146
+ // Per-node delta check: nodeCost = 0.02 - 0.01 = 0.01 > 0.005 → should throw
147
+ let thrownError: unknown;
148
+ try {
149
+ iterator.next("output2");
150
+ } catch (error) {
151
+ thrownError = error;
152
+ }
153
+ expect(thrownError).toBeInstanceOf(GuardrailExceededError);
154
+ expect((thrownError as GuardrailExceededError).message).toContain("Per-node cost limit exceeded");
155
+ expect((thrownError as GuardrailExceededError).message).toContain("llm-second");
156
+ });
157
+
158
+ it("evaluates per-node constraints before executing the node", () => {
159
+ const spec: HarnessSpec = {
160
+ name: "per-node-constraints",
161
+ graph: {
162
+ entryNodeId: "start",
163
+ nodes: [
164
+ {
165
+ id: "start",
166
+ kind: "tool",
167
+ tool: "echo",
168
+ args: ["hello"],
169
+ },
170
+ {
171
+ id: "guarded",
172
+ kind: "tool",
173
+ tool: "echo",
174
+ args: ["should not run"],
175
+ guardrails: {
176
+ constraints: ["outputs.start.ok"],
177
+ },
178
+ },
179
+ {
180
+ id: "fallback",
181
+ kind: "tool",
182
+ tool: "echo",
183
+ args: ["fallback"],
184
+ },
185
+ ],
186
+ edges: [
187
+ { from: "start", to: "guarded" },
188
+ { from: "guarded", to: "fallback" },
189
+ ],
190
+ },
191
+ };
192
+
193
+ const compiled = compileHarnessSpec(spec);
194
+ const ctx = createMockContext();
195
+ const iterator = compiled.workflows[0].generator(ctx as any, {});
196
+
197
+ // First node executes
198
+ expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
199
+
200
+ // Return { ok: false } — constraint "outputs.start.ok" will be falsy
201
+ // The constraint check should throw before guarded executes
202
+ let thrownError: unknown;
203
+ try {
204
+ iterator.next({ ok: false });
205
+ } catch (error) {
206
+ thrownError = error;
207
+ }
208
+ expect(thrownError).toBeInstanceOf(GuardrailExceededError);
209
+ expect((thrownError as GuardrailExceededError).message).toContain("Constraint failed");
210
+ expect((thrownError as GuardrailExceededError).message).toContain("outputs.start.ok");
211
+ });
212
+
213
+ it("allows execution when per-node constraints pass", () => {
214
+ const spec: HarnessSpec = {
215
+ name: "per-node-constraints-pass",
216
+ graph: {
217
+ entryNodeId: "start",
218
+ nodes: [
219
+ {
220
+ id: "start",
221
+ kind: "tool",
222
+ tool: "echo",
223
+ args: ["hello"],
224
+ },
225
+ {
226
+ id: "guarded",
227
+ kind: "tool",
228
+ tool: "echo",
229
+ args: ["should run"],
230
+ guardrails: {
231
+ constraints: ["outputs.start.ok"],
232
+ },
233
+ },
234
+ ],
235
+ edges: [
236
+ { from: "start", to: "guarded" },
237
+ ],
238
+ },
239
+ };
240
+
241
+ const compiled = compileHarnessSpec(spec);
242
+ const ctx = createMockContext();
243
+ const iterator = compiled.workflows[0].generator(ctx as any, {});
244
+
245
+ expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
246
+ // Return { ok: true } — constraint passes
247
+ expect(iterator.next({ ok: true }).value).toMatchObject({ kind: "tool-call" });
248
+ const completed = iterator.next("done");
249
+ expect(completed.done).toBe(true);
250
+ expect(completed.value.status).toBe("completed");
251
+ });
252
+
253
+ it("enforces per-node timeoutSeconds by checking elapsed time after yield", () => {
254
+ const spec: HarnessSpec = {
255
+ name: "per-node-timeout",
256
+ graph: {
257
+ entryNodeId: "start",
258
+ nodes: [
259
+ {
260
+ id: "start",
261
+ kind: "tool",
262
+ tool: "echo",
263
+ args: ["first"],
264
+ },
265
+ {
266
+ id: "slow-node",
267
+ kind: "tool",
268
+ tool: "echo",
269
+ args: ["too slow"],
270
+ guardrails: {
271
+ timeoutSeconds: 1,
272
+ },
273
+ },
274
+ ],
275
+ edges: [
276
+ { from: "start", to: "slow-node" },
277
+ ],
278
+ },
279
+ };
280
+
281
+ const compiled = compileHarnessSpec(spec);
282
+ const ctx = createMockContext();
283
+ const iterator = compiled.workflows[0].generator(ctx as any, {});
284
+
285
+ // Mock Date.now before iteration starts so nodeStartTime uses mocked time
286
+ const originalNow = Date.now;
287
+ let fakeTime = 1000;
288
+ Date.now = () => fakeTime;
289
+
290
+ try {
291
+ // First node executes fine (fakeTime=1000, no guardrails timeout)
292
+ expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
293
+ expect(iterator.next("ok").value).toMatchObject({ kind: "tool-call" });
294
+
295
+ // slow-node yielded. Advance time by 2 seconds (past 1s timeout)
296
+ fakeTime = 3500;
297
+
298
+ let thrownError: unknown;
299
+ try {
300
+ iterator.next("done");
301
+ } catch (error) {
302
+ thrownError = error;
303
+ }
304
+ expect(thrownError).toBeInstanceOf(GuardrailExceededError);
305
+ expect((thrownError as GuardrailExceededError).message).toContain("timeout exceeded");
306
+ expect((thrownError as GuardrailExceededError).message).toContain("slow-node");
307
+ } finally {
308
+ Date.now = originalNow;
309
+ }
310
+ });
311
+
312
+ it("global guardrails still work alongside per-node guardrails", () => {
313
+ const spec: HarnessSpec = {
314
+ name: "mixed-guardrails",
315
+ executionPolicy: {
316
+ maxSteps: 3,
317
+ },
318
+ graph: {
319
+ entryNodeId: "step-0",
320
+ nodes: [
321
+ {
322
+ id: "step-0",
323
+ kind: "tool",
324
+ tool: "echo",
325
+ args: ["0"],
326
+ guardrails: {
327
+ constraints: ["outputs.step-0.ok"],
328
+ },
329
+ },
330
+ {
331
+ id: "step-1",
332
+ kind: "tool",
333
+ tool: "echo",
334
+ args: ["1"],
335
+ },
336
+ {
337
+ id: "step-2",
338
+ kind: "tool",
339
+ tool: "echo",
340
+ args: ["2"],
341
+ },
342
+ {
343
+ id: "step-3",
344
+ kind: "tool",
345
+ tool: "echo",
346
+ args: ["3"],
347
+ },
348
+ ],
349
+ edges: [
350
+ { from: "step-0", to: "step-1" },
351
+ { from: "step-1", to: "step-2" },
352
+ { from: "step-2", to: "step-3" },
353
+ ],
354
+ },
355
+ };
356
+
357
+ const compiled = compileHarnessSpec(spec);
358
+ const ctx = createMockContext();
359
+ const iterator = compiled.workflows[0].generator(ctx as any, {});
360
+
361
+ // step-0 has constraint on outputs.step-0.ok — but there's no prior output for step-0
362
+ // The constraint check happens before step-0 executes, so outputs.step-0 doesn't exist yet
363
+ // This means the constraint will fail
364
+ let thrownError: unknown;
365
+ try {
366
+ iterator.next();
367
+ } catch (error) {
368
+ thrownError = error;
369
+ }
370
+ expect(thrownError).toBeInstanceOf(GuardrailExceededError);
371
+ expect((thrownError as GuardrailExceededError).message).toContain("Constraint failed");
372
+ });
373
+ });
374
+
375
+ describe("per-node verification hooks", () => {
376
+ it("runs verification hooks after node execution and blocks on failure", () => {
377
+ const spec: HarnessSpec = {
378
+ name: "verify-block",
379
+ graph: {
380
+ entryNodeId: "action",
381
+ nodes: [
382
+ {
383
+ id: "action",
384
+ kind: "tool",
385
+ tool: "bash",
386
+ args: ["echo test"],
387
+ verificationHooks: [
388
+ {
389
+ name: "check-output",
390
+ kind: "llm",
391
+ check: "Did the test pass?",
392
+ onFail: "block",
393
+ },
394
+ ],
395
+ },
396
+ {
397
+ id: "after",
398
+ kind: "tool",
399
+ tool: "echo",
400
+ args: ["done"],
401
+ },
402
+ ],
403
+ edges: [
404
+ { from: "action", to: "after" },
405
+ ],
406
+ },
407
+ };
408
+
409
+ const compiled = compileHarnessSpec(spec);
410
+ const ctx = createMockContext();
411
+ const iterator = compiled.workflows[0].generator(ctx as any, {});
412
+
413
+ // Primary node executes
414
+ expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
415
+
416
+ // Verification hook runs (LLM verifier)
417
+ expect(iterator.next({ ok: true }).value).toMatchObject({ kind: "llm-call" });
418
+
419
+ // Verifier returns false → block
420
+ let thrownError: unknown;
421
+ try {
422
+ iterator.next({ passed: false });
423
+ } catch (error) {
424
+ thrownError = error;
425
+ }
426
+ expect(thrownError).toBeDefined();
427
+ expect((thrownError as Error).message).toContain("Verification hook");
428
+ expect((thrownError as Error).message).toContain("check-output");
429
+ expect((thrownError as Error).message).toContain("blocked");
430
+ });
431
+
432
+ it("runs verification hooks and warns on failure without halting", () => {
433
+ const spec: HarnessSpec = {
434
+ name: "verify-warn",
435
+ graph: {
436
+ entryNodeId: "action",
437
+ nodes: [
438
+ {
439
+ id: "action",
440
+ kind: "tool",
441
+ tool: "bash",
442
+ args: ["echo test"],
443
+ verificationHooks: [
444
+ {
445
+ name: "soft-check",
446
+ kind: "llm",
447
+ check: "Is this OK?",
448
+ onFail: "warn",
449
+ },
450
+ ],
451
+ },
452
+ {
453
+ id: "after",
454
+ kind: "tool",
455
+ tool: "echo",
456
+ args: ["done"],
457
+ },
458
+ ],
459
+ edges: [
460
+ { from: "action", to: "after" },
461
+ ],
462
+ },
463
+ };
464
+
465
+ const compiled = compileHarnessSpec(spec);
466
+ const ctx = createMockContext();
467
+ const iterator = compiled.workflows[0].generator(ctx as any, {});
468
+
469
+ // Primary node executes
470
+ expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
471
+
472
+ // Verification hook runs
473
+ expect(iterator.next({ ok: true }).value).toMatchObject({ kind: "llm-call" });
474
+
475
+ // Verifier returns false → warn, but execution continues
476
+ expect(iterator.next({ passed: false }).value).toMatchObject({ kind: "tool-call" });
477
+
478
+ const completed = iterator.next("done");
479
+ expect(completed.done).toBe(true);
480
+ expect(completed.value.status).toBe("completed");
481
+ });
482
+
483
+ it("runs verification hooks and retries the node on failure", () => {
484
+ const spec: HarnessSpec = {
485
+ name: "verify-retry",
486
+ graph: {
487
+ entryNodeId: "action",
488
+ nodes: [
489
+ {
490
+ id: "action",
491
+ kind: "tool",
492
+ tool: "bash",
493
+ args: ["echo test"],
494
+ verificationHooks: [
495
+ {
496
+ name: "retry-check",
497
+ kind: "llm",
498
+ check: "Did it work?",
499
+ onFail: "retry",
500
+ maxAttempts: 2,
501
+ },
502
+ ],
503
+ },
504
+ {
505
+ id: "after",
506
+ kind: "tool",
507
+ tool: "echo",
508
+ args: ["done"],
509
+ },
510
+ ],
511
+ edges: [
512
+ { from: "action", to: "after" },
513
+ ],
514
+ },
515
+ };
516
+
517
+ const compiled = compileHarnessSpec(spec);
518
+ const ctx = createMockContext();
519
+ const iterator = compiled.workflows[0].generator(ctx as any, {});
520
+
521
+ // First attempt: primary node
522
+ expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
523
+
524
+ // Verification runs
525
+ expect(iterator.next({ ok: true }).value).toMatchObject({ kind: "llm-call" });
526
+
527
+ // Verification fails → retry (attempt 1 of maxAttempts=2)
528
+ // Should re-execute the primary node
529
+ expect(iterator.next({ passed: false }).value).toMatchObject({ kind: "tool-call" });
530
+
531
+ // Second attempt verification runs
532
+ expect(iterator.next({ ok: true }).value).toMatchObject({ kind: "llm-call" });
533
+
534
+ // Verification fails again → retry exhausted
535
+ let thrownError: unknown;
536
+ try {
537
+ iterator.next({ passed: false });
538
+ } catch (error) {
539
+ thrownError = error;
540
+ }
541
+ expect(thrownError).toBeDefined();
542
+ expect((thrownError as Error).message).toContain("Verification hook");
543
+ expect((thrownError as Error).message).toContain("retry exhausted");
544
+ });
545
+
546
+ it("defaults maxAttempts to 2 for retry verification hooks", () => {
547
+ const spec: HarnessSpec = {
548
+ name: "verify-retry-default",
549
+ graph: {
550
+ entryNodeId: "action",
551
+ nodes: [
552
+ {
553
+ id: "action",
554
+ kind: "tool",
555
+ tool: "bash",
556
+ args: ["echo test"],
557
+ verificationHooks: [
558
+ {
559
+ name: "default-retry",
560
+ kind: "llm",
561
+ check: "Did it work?",
562
+ onFail: "retry",
563
+ },
564
+ ],
565
+ },
566
+ ],
567
+ edges: [],
568
+ },
569
+ };
570
+
571
+ const compiled = compileHarnessSpec(spec);
572
+ const ctx = createMockContext();
573
+ const iterator = compiled.workflows[0].generator(ctx as any, {});
574
+
575
+ // First attempt
576
+ expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
577
+ expect(iterator.next({ ok: true }).value).toMatchObject({ kind: "llm-call" });
578
+
579
+ // Fail → retry (maxAttempts defaults to 2, so 1 retry)
580
+ expect(iterator.next({ passed: false }).value).toMatchObject({ kind: "tool-call" });
581
+
582
+ // Second attempt
583
+ expect(iterator.next({ ok: true }).value).toMatchObject({ kind: "llm-call" });
584
+
585
+ // Fail again → exhausted
586
+ let thrownError: unknown;
587
+ try {
588
+ iterator.next({ passed: false });
589
+ } catch (error) {
590
+ thrownError = error;
591
+ }
592
+ expect(thrownError).toBeDefined();
593
+ expect((thrownError as Error).message).toContain("retry exhausted");
594
+ });
595
+
596
+ it("runs multiple verification hooks in order", () => {
597
+ const spec: HarnessSpec = {
598
+ name: "verify-multiple",
599
+ graph: {
600
+ entryNodeId: "action",
601
+ nodes: [
602
+ {
603
+ id: "action",
604
+ kind: "tool",
605
+ tool: "bash",
606
+ args: ["echo test"],
607
+ verificationHooks: [
608
+ {
609
+ name: "first-check",
610
+ kind: "llm",
611
+ check: "First check?",
612
+ onFail: "block",
613
+ },
614
+ {
615
+ name: "second-check",
616
+ kind: "llm",
617
+ check: "Second check?",
618
+ onFail: "block",
619
+ },
620
+ ],
621
+ },
622
+ {
623
+ id: "after",
624
+ kind: "tool",
625
+ tool: "echo",
626
+ args: ["done"],
627
+ },
628
+ ],
629
+ edges: [
630
+ { from: "action", to: "after" },
631
+ ],
632
+ },
633
+ };
634
+
635
+ const compiled = compileHarnessSpec(spec);
636
+ const ctx = createMockContext();
637
+ const iterator = compiled.workflows[0].generator(ctx as any, {});
638
+
639
+ // Primary node
640
+ expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
641
+
642
+ // First verification hook
643
+ expect(iterator.next({ ok: true }).value).toMatchObject({ kind: "llm-call" });
644
+
645
+ // First hook passes → second verification hook
646
+ expect(iterator.next({ passed: true }).value).toMatchObject({ kind: "llm-call" });
647
+
648
+ // Second hook passes → proceed to next node
649
+ expect(iterator.next({ approved: true }).value).toMatchObject({ kind: "tool-call" });
650
+
651
+ const completed = iterator.next("done");
652
+ expect(completed.done).toBe(true);
653
+ expect(completed.value.status).toBe("completed");
654
+ });
655
+
656
+ it("stops early when first verification hook fails with block", () => {
657
+ const spec: HarnessSpec = {
658
+ name: "verify-early-stop",
659
+ graph: {
660
+ entryNodeId: "action",
661
+ nodes: [
662
+ {
663
+ id: "action",
664
+ kind: "tool",
665
+ tool: "bash",
666
+ args: ["echo test"],
667
+ verificationHooks: [
668
+ {
669
+ name: "first-check",
670
+ kind: "llm",
671
+ check: "First check?",
672
+ onFail: "block",
673
+ },
674
+ {
675
+ name: "second-check",
676
+ kind: "llm",
677
+ check: "Second check?",
678
+ onFail: "block",
679
+ },
680
+ ],
681
+ },
682
+ ],
683
+ edges: [],
684
+ },
685
+ };
686
+
687
+ const compiled = compileHarnessSpec(spec);
688
+ const ctx = createMockContext();
689
+ const iterator = compiled.workflows[0].generator(ctx as any, {});
690
+
691
+ // Primary node
692
+ expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
693
+
694
+ // First verification hook
695
+ expect(iterator.next({ ok: true }).value).toMatchObject({ kind: "llm-call" });
696
+
697
+ // First hook fails → block, second hook should NOT run
698
+ let thrownError: unknown;
699
+ try {
700
+ iterator.next({ passed: false });
701
+ } catch (error) {
702
+ thrownError = error;
703
+ }
704
+ expect(thrownError).toBeDefined();
705
+ expect((thrownError as Error).message).toContain("first-check");
706
+ });
707
+
708
+ it("runs expression-based verification hooks without yielding", () => {
709
+ const spec: HarnessSpec = {
710
+ name: "verify-expression",
711
+ graph: {
712
+ entryNodeId: "action",
713
+ nodes: [
714
+ {
715
+ id: "action",
716
+ kind: "tool",
717
+ tool: "bash",
718
+ args: ["echo test"],
719
+ verificationHooks: [
720
+ {
721
+ name: "expr-check",
722
+ kind: "expression",
723
+ check: "outputs.action.ok",
724
+ onFail: "block",
725
+ },
726
+ ],
727
+ },
728
+ {
729
+ id: "after",
730
+ kind: "tool",
731
+ tool: "echo",
732
+ args: ["done"],
733
+ },
734
+ ],
735
+ edges: [
736
+ { from: "action", to: "after" },
737
+ ],
738
+ },
739
+ };
740
+
741
+ const compiled = compileHarnessSpec(spec);
742
+ const ctx = createMockContext();
743
+ const iterator = compiled.workflows[0].generator(ctx as any, {});
744
+
745
+ // Primary node executes
746
+ expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
747
+
748
+ // Expression verification evaluates inline (no yield) → passes
749
+ expect(iterator.next({ ok: true }).value).toMatchObject({ kind: "tool-call" });
750
+
751
+ const completed = iterator.next("done");
752
+ expect(completed.done).toBe(true);
753
+ expect(completed.value.status).toBe("completed");
754
+ });
755
+
756
+ it("expression verification hook fails when expression is falsy", () => {
757
+ const spec: HarnessSpec = {
758
+ name: "verify-expression-fail",
759
+ graph: {
760
+ entryNodeId: "action",
761
+ nodes: [
762
+ {
763
+ id: "action",
764
+ kind: "tool",
765
+ tool: "bash",
766
+ args: ["echo test"],
767
+ verificationHooks: [
768
+ {
769
+ name: "expr-check",
770
+ kind: "expression",
771
+ check: "outputs.action.ok",
772
+ onFail: "block",
773
+ },
774
+ ],
775
+ },
776
+ ],
777
+ edges: [],
778
+ },
779
+ };
780
+
781
+ const compiled = compileHarnessSpec(spec);
782
+ const ctx = createMockContext();
783
+ const iterator = compiled.workflows[0].generator(ctx as any, {});
784
+
785
+ // Primary node executes
786
+ expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
787
+
788
+ // Return { ok: false } → expression fails → block
789
+ let thrownError: unknown;
790
+ try {
791
+ iterator.next({ ok: false });
792
+ } catch (error) {
793
+ thrownError = error;
794
+ }
795
+ expect(thrownError).toBeDefined();
796
+ expect((thrownError as Error).message).toContain("expr-check");
797
+ expect((thrownError as Error).message).toContain("blocked");
798
+ });
799
+
800
+ it("nodes without verificationHooks work normally", () => {
801
+ const spec: HarnessSpec = {
802
+ name: "no-hooks",
803
+ graph: {
804
+ entryNodeId: "action",
805
+ nodes: [
806
+ {
807
+ id: "action",
808
+ kind: "tool",
809
+ tool: "bash",
810
+ args: ["echo test"],
811
+ },
812
+ {
813
+ id: "after",
814
+ kind: "tool",
815
+ tool: "echo",
816
+ args: ["done"],
817
+ },
818
+ ],
819
+ edges: [
820
+ { from: "action", to: "after" },
821
+ ],
822
+ },
823
+ };
824
+
825
+ const compiled = compileHarnessSpec(spec);
826
+ const ctx = createMockContext();
827
+ const iterator = compiled.workflows[0].generator(ctx as any, {});
828
+
829
+ expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
830
+ expect(iterator.next("ok").value).toMatchObject({ kind: "tool-call" });
831
+ const completed = iterator.next("done");
832
+ expect(completed.done).toBe(true);
833
+ expect(completed.value.status).toBe("completed");
834
+ });
835
+ });
836
+
837
+ describe("per-node guardrails and verification hooks combined", () => {
838
+ it("enforces guardrails before verification hooks", () => {
839
+ const spec: HarnessSpec = {
840
+ name: "guardrails-before-verify",
841
+ graph: {
842
+ entryNodeId: "start",
843
+ nodes: [
844
+ {
845
+ id: "start",
846
+ kind: "tool",
847
+ tool: "echo",
848
+ args: ["go"],
849
+ },
850
+ {
851
+ id: "action",
852
+ kind: "tool",
853
+ tool: "bash",
854
+ args: ["echo action"],
855
+ guardrails: {
856
+ constraints: ["outputs.start.proceed"],
857
+ },
858
+ verificationHooks: [
859
+ {
860
+ name: "post-check",
861
+ kind: "llm",
862
+ check: "Was it correct?",
863
+ onFail: "block",
864
+ },
865
+ ],
866
+ },
867
+ ],
868
+ edges: [
869
+ { from: "start", to: "action" },
870
+ ],
871
+ },
872
+ };
873
+
874
+ const compiled = compileHarnessSpec(spec);
875
+ const ctx = createMockContext();
876
+ const iterator = compiled.workflows[0].generator(ctx as any, {});
877
+
878
+ // start executes
879
+ expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
880
+
881
+ // start returns { proceed: false } → constraint fails → guardrail error
882
+ let thrownError: unknown;
883
+ try {
884
+ iterator.next({ proceed: false });
885
+ } catch (error) {
886
+ thrownError = error;
887
+ }
888
+ expect(thrownError).toBeInstanceOf(GuardrailExceededError);
889
+ expect((thrownError as GuardrailExceededError).message).toContain("Constraint failed");
890
+ });
891
+
892
+ it("verification hooks run after guardrails pass", () => {
893
+ const spec: HarnessSpec = {
894
+ name: "guardrails-pass-verify-runs",
895
+ graph: {
896
+ entryNodeId: "start",
897
+ nodes: [
898
+ {
899
+ id: "start",
900
+ kind: "tool",
901
+ tool: "echo",
902
+ args: ["go"],
903
+ },
904
+ {
905
+ id: "action",
906
+ kind: "tool",
907
+ tool: "bash",
908
+ args: ["echo action"],
909
+ guardrails: {
910
+ constraints: ["outputs.start.proceed"],
911
+ },
912
+ verificationHooks: [
913
+ {
914
+ name: "post-check",
915
+ kind: "llm",
916
+ check: "Was it correct?",
917
+ onFail: "block",
918
+ },
919
+ ],
920
+ },
921
+ {
922
+ id: "after",
923
+ kind: "tool",
924
+ tool: "echo",
925
+ args: ["done"],
926
+ },
927
+ ],
928
+ edges: [
929
+ { from: "start", to: "action" },
930
+ { from: "action", to: "after" },
931
+ ],
932
+ },
933
+ };
934
+
935
+ const compiled = compileHarnessSpec(spec);
936
+ const ctx = createMockContext();
937
+ const iterator = compiled.workflows[0].generator(ctx as any, {});
938
+
939
+ // start executes
940
+ expect(iterator.next().value).toMatchObject({ kind: "tool-call" });
941
+
942
+ // start returns { proceed: true } → constraint passes → action executes
943
+ expect(iterator.next({ proceed: true }).value).toMatchObject({ kind: "tool-call" });
944
+
945
+ // action returns → verification hook runs
946
+ expect(iterator.next({ result: "ok" }).value).toMatchObject({ kind: "llm-call" });
947
+
948
+ // Verifier passes → continue
949
+ expect(iterator.next({ approved: true }).value).toMatchObject({ kind: "tool-call" });
950
+
951
+ const completed = iterator.next("done");
952
+ expect(completed.done).toBe(true);
953
+ expect(completed.value.status).toBe("completed");
954
+ });
955
+ });