@mhingston5/lasso 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/README.md +707 -0
  2. package/docs/agent-wrangling.png +0 -0
  3. package/package.json +26 -0
  4. package/src/capabilities/matcher.ts +25 -0
  5. package/src/capabilities/registry.ts +103 -0
  6. package/src/capabilities/types.ts +15 -0
  7. package/src/cir/lower.ts +253 -0
  8. package/src/cir/optimize.ts +251 -0
  9. package/src/cir/types.ts +131 -0
  10. package/src/cir/validate.ts +265 -0
  11. package/src/compiler/compile.ts +601 -0
  12. package/src/compiler/feedback.ts +471 -0
  13. package/src/compiler/runtime-helpers.ts +455 -0
  14. package/src/composition/chain.ts +58 -0
  15. package/src/composition/conditional.ts +76 -0
  16. package/src/composition/parallel.ts +75 -0
  17. package/src/composition/types.ts +105 -0
  18. package/src/environment/analyzer.ts +56 -0
  19. package/src/environment/discovery.ts +179 -0
  20. package/src/environment/types.ts +68 -0
  21. package/src/failures/classifiers.ts +134 -0
  22. package/src/failures/generator.ts +421 -0
  23. package/src/failures/map-reference-failures.ts +23 -0
  24. package/src/failures/ontology.ts +210 -0
  25. package/src/failures/recovery.ts +214 -0
  26. package/src/failures/types.ts +14 -0
  27. package/src/index.ts +67 -0
  28. package/src/memory/advisor.ts +132 -0
  29. package/src/memory/extractor.ts +166 -0
  30. package/src/memory/store.ts +107 -0
  31. package/src/memory/types.ts +53 -0
  32. package/src/metaharness/engine.ts +256 -0
  33. package/src/metaharness/predictor.ts +168 -0
  34. package/src/metaharness/types.ts +40 -0
  35. package/src/mutation/derive.ts +308 -0
  36. package/src/mutation/diff.ts +52 -0
  37. package/src/mutation/engine.ts +256 -0
  38. package/src/mutation/types.ts +84 -0
  39. package/src/pi/command-input.ts +209 -0
  40. package/src/pi/commands.ts +351 -0
  41. package/src/pi/extension.ts +16 -0
  42. package/src/planner/synthesize.ts +83 -0
  43. package/src/planner/template-rules.ts +183 -0
  44. package/src/planner/types.ts +42 -0
  45. package/src/reference/catalog.ts +128 -0
  46. package/src/reference/patch-validation-strategies.ts +170 -0
  47. package/src/reference/patch-validation.ts +174 -0
  48. package/src/reference/pr-review-merge.ts +155 -0
  49. package/src/reference/strategies.ts +126 -0
  50. package/src/reference/types.ts +33 -0
  51. package/src/replanner/risk-rules.ts +161 -0
  52. package/src/replanner/runtime.ts +308 -0
  53. package/src/replanner/synthesize.ts +619 -0
  54. package/src/replanner/types.ts +73 -0
  55. package/src/spec/schema.ts +254 -0
  56. package/src/spec/types.ts +319 -0
  57. package/src/spec/validate.ts +296 -0
  58. package/src/state/snapshots.ts +43 -0
  59. package/src/state/types.ts +12 -0
  60. package/src/synthesis/graph-builder.ts +267 -0
  61. package/src/synthesis/harness-builder.ts +113 -0
  62. package/src/synthesis/intent-ir.ts +63 -0
  63. package/src/synthesis/policy-builder.ts +320 -0
  64. package/src/synthesis/risk-analyzer.ts +182 -0
  65. package/src/synthesis/skill-parser.ts +441 -0
  66. package/src/verification/engine.ts +230 -0
  67. package/src/versioning/file-store.ts +103 -0
  68. package/src/versioning/history.ts +43 -0
  69. package/src/versioning/store.ts +16 -0
  70. package/src/versioning/types.ts +31 -0
  71. package/test/capabilities/matcher.test.ts +67 -0
  72. package/test/capabilities/registry.test.ts +136 -0
  73. package/test/capabilities/synthesis.test.ts +264 -0
  74. package/test/cir/lower.test.ts +417 -0
  75. package/test/cir/optimize.test.ts +266 -0
  76. package/test/cir/validate.test.ts +368 -0
  77. package/test/compiler/adaptive-runtime.test.ts +157 -0
  78. package/test/compiler/compile.test.ts +1198 -0
  79. package/test/compiler/feedback.test.ts +784 -0
  80. package/test/compiler/guardrails.test.ts +191 -0
  81. package/test/compiler/trace.test.ts +404 -0
  82. package/test/composition/chain.test.ts +328 -0
  83. package/test/composition/conditional.test.ts +241 -0
  84. package/test/composition/parallel.test.ts +215 -0
  85. package/test/environment/analyzer.test.ts +204 -0
  86. package/test/environment/discovery.test.ts +149 -0
  87. package/test/failures/classifiers.test.ts +287 -0
  88. package/test/failures/generator.test.ts +203 -0
  89. package/test/failures/ontology.test.ts +439 -0
  90. package/test/failures/recovery.test.ts +300 -0
  91. package/test/helpers/createFixtureRepo.ts +84 -0
  92. package/test/helpers/createPatchValidationFixture.ts +144 -0
  93. package/test/helpers/runCompiledWorkflow.ts +208 -0
  94. package/test/memory/advisor.test.ts +332 -0
  95. package/test/memory/extractor.test.ts +295 -0
  96. package/test/memory/store.test.ts +244 -0
  97. package/test/metaharness/engine.test.ts +575 -0
  98. package/test/metaharness/predictor.test.ts +436 -0
  99. package/test/mutation/derive-failure.test.ts +209 -0
  100. package/test/mutation/engine.test.ts +622 -0
  101. package/test/package-smoke.test.ts +29 -0
  102. package/test/pi/command-input.test.ts +153 -0
  103. package/test/pi/commands.test.ts +623 -0
  104. package/test/planner/classify-template.test.ts +32 -0
  105. package/test/planner/synthesize.test.ts +901 -0
  106. package/test/reference/PatchValidation.failures.test.ts +137 -0
  107. package/test/reference/PatchValidation.test.ts +326 -0
  108. package/test/reference/PrReviewMerge.failures.test.ts +121 -0
  109. package/test/reference/PrReviewMerge.test.ts +55 -0
  110. package/test/reference/catalog-open.test.ts +70 -0
  111. package/test/replanner/runtime.test.ts +207 -0
  112. package/test/replanner/synthesize.test.ts +303 -0
  113. package/test/spec/validate.test.ts +1056 -0
  114. package/test/state/snapshots.test.ts +264 -0
  115. package/test/synthesis/custom-workflow.test.ts +264 -0
  116. package/test/synthesis/graph-builder.test.ts +370 -0
  117. package/test/synthesis/harness-builder.test.ts +128 -0
  118. package/test/synthesis/policy-builder.test.ts +149 -0
  119. package/test/synthesis/risk-analyzer.test.ts +230 -0
  120. package/test/synthesis/skill-parser.test.ts +796 -0
  121. package/test/verification/engine.test.ts +509 -0
  122. package/test/versioning/history.test.ts +144 -0
  123. package/test/versioning/store.test.ts +254 -0
  124. package/vitest.config.ts +9 -0
@@ -0,0 +1,137 @@
1
+ import { afterEach, describe, expect, it } from "vitest";
2
+ import { buildPatchValidationHarnessSpec } from "../../src/reference/patch-validation.js";
3
+ import { compileHarnessSpec } from "../../src/compiler/compile.js";
4
+ import { createPatchValidationFixture } from "../helpers/createPatchValidationFixture.js";
5
+ import { runCompiledWorkflow } from "../helpers/runCompiledWorkflow.js";
6
+ import type { ToolNode } from "../../src/spec/types.js";
7
+
8
+ describe("buildPatchValidationHarnessSpec failures", () => {
9
+ const cleanups: Array<() => void> = [];
10
+
11
+ afterEach(() => {
12
+ while (cleanups.length > 0) {
13
+ cleanups.pop()?.();
14
+ }
15
+ });
16
+
17
+ it("routes to not-reproduced when the baseline reproduce command passes on baseline", async () => {
18
+ const fixture = createPatchValidationFixture({ baselineAlwaysPasses: true });
19
+ cleanups.push(fixture.cleanup);
20
+
21
+ const compiled = compileHarnessSpec(buildPatchValidationHarnessSpec(fixture.bundle));
22
+ const result = await runCompiledWorkflow(compiled, fixture.bundle, {});
23
+
24
+ expect(result.status).toBe("completed");
25
+ expect(result.terminalNodeId).toBe("not-reproduced");
26
+
27
+ const baselineOutput = result.outputs["run-baseline"] as { reproduced: boolean };
28
+ expect(baselineOutput.reproduced).toBe(false);
29
+ });
30
+
31
+ it("routes to apply-failed when the branch candidate does not exist", async () => {
32
+ const fixture = createPatchValidationFixture({ applyFailure: true, candidateKind: "branch" });
33
+ cleanups.push(fixture.cleanup);
34
+
35
+ const compiled = compileHarnessSpec(buildPatchValidationHarnessSpec(fixture.bundle));
36
+ const result = await runCompiledWorkflow(compiled, fixture.bundle, {});
37
+
38
+ expect(result.status).toBe("completed");
39
+ expect(result.terminalNodeId).toBe("apply-failed");
40
+
41
+ const applyOutput = result.outputs["apply-candidate"] as { applied: boolean; reason?: string };
42
+ expect(applyOutput.applied).toBe(false);
43
+ expect(applyOutput.reason).toMatch(/branch checkout failed/);
44
+ });
45
+
46
+ it("routes to apply-failed when the patch-file candidate is malformed", async () => {
47
+ const fixture = createPatchValidationFixture({ applyFailure: true, candidateKind: "patchFile" });
48
+ cleanups.push(fixture.cleanup);
49
+
50
+ const compiled = compileHarnessSpec(buildPatchValidationHarnessSpec(fixture.bundle));
51
+ const result = await runCompiledWorkflow(compiled, fixture.bundle, {});
52
+
53
+ expect(result.status).toBe("completed");
54
+ expect(result.terminalNodeId).toBe("apply-failed");
55
+
56
+ const applyOutput = result.outputs["apply-candidate"] as { applied: boolean; reason?: string };
57
+ expect(applyOutput.applied).toBe(false);
58
+ expect(applyOutput.reason).toMatch(/patch apply failed/);
59
+ });
60
+
61
+ it("routes to candidate-failed when the bug still reproduces after candidate is applied", async () => {
62
+ const fixture = createPatchValidationFixture({ fixDoesNotFixBug: true });
63
+ cleanups.push(fixture.cleanup);
64
+
65
+ const compiled = compileHarnessSpec(buildPatchValidationHarnessSpec(fixture.bundle));
66
+ const result = await runCompiledWorkflow(compiled, fixture.bundle, {});
67
+
68
+ expect(result.status).toBe("completed");
69
+ expect(result.terminalNodeId).toBe("candidate-failed");
70
+
71
+ const candidateOutput = result.outputs["run-candidate-reproduce"] as { reproduced: boolean };
72
+ expect(candidateOutput.reproduced).toBe(true);
73
+ });
74
+
75
+ it("routes to candidate-failed when the candidate passes reproduction but fails broader verification", async () => {
76
+ const fixture = createPatchValidationFixture({ verificationFailure: true });
77
+ cleanups.push(fixture.cleanup);
78
+
79
+ const compiled = compileHarnessSpec(buildPatchValidationHarnessSpec(fixture.bundle));
80
+ const result = await runCompiledWorkflow(compiled, fixture.bundle, {});
81
+
82
+ expect(result.status).toBe("completed");
83
+ expect(result.terminalNodeId).toBe("candidate-failed");
84
+
85
+ const candidateOutput = result.outputs["run-candidate-reproduce"] as { reproduced: boolean };
86
+ expect(candidateOutput.reproduced).toBe(false);
87
+
88
+ const verificationOutput = result.outputs["run-verification"] as { passed: boolean };
89
+ expect(verificationOutput.passed).toBe(false);
90
+ });
91
+
92
+ it("routes to rejected when the human reviewer rejects the candidate", async () => {
93
+ const fixture = createPatchValidationFixture({ approvalRequired: true });
94
+ cleanups.push(fixture.cleanup);
95
+
96
+ const compiled = compileHarnessSpec(buildPatchValidationHarnessSpec(fixture.bundle));
97
+ const result = await runCompiledWorkflow(compiled, fixture.bundle, {
98
+ llmResult: { summary: "Candidate looks good but requires human sign-off." },
99
+ humanResponse: { approved: false },
100
+ });
101
+
102
+ expect(result.status).toBe("completed");
103
+ expect(result.terminalNodeId).toBe("rejected");
104
+ });
105
+
106
+ it("exhausts retries and throws when run-verification emits a transient error", async () => {
107
+ const fixture = createPatchValidationFixture();
108
+ cleanups.push(fixture.cleanup);
109
+
110
+ const spec = buildPatchValidationHarnessSpec(fixture.bundle);
111
+
112
+ // Patch run-verification to throw a transient error instead of returning JSON.
113
+ const verifyIdx = spec.graph.nodes.findIndex(n => n.id === "run-verification");
114
+ const originalNode = spec.graph.nodes[verifyIdx] as ToolNode;
115
+ spec.graph.nodes[verifyIdx] = {
116
+ ...originalNode,
117
+ args: ["-lc", `echo 'transient-verify-fail' >&2 && exit 1`],
118
+ retryPolicy: {
119
+ maxAttempts: 2,
120
+ backoff: "constant",
121
+ initialDelay: 0,
122
+ retryOn: ["transient"],
123
+ },
124
+ executionPolicy: {
125
+ failureClassification: [
126
+ { pattern: "transient-verify-fail", category: "transient", retry: true },
127
+ ],
128
+ },
129
+ } as ToolNode;
130
+
131
+ const compiled = compileHarnessSpec(spec);
132
+
133
+ await expect(
134
+ runCompiledWorkflow(compiled, fixture.bundle, {}),
135
+ ).rejects.toThrow(/transient-verify-fail/i);
136
+ });
137
+ });
@@ -0,0 +1,326 @@
1
+ import { afterEach, describe, expect, it } from "vitest";
2
+ import { buildPatchValidationHarnessSpec } from "../../src/reference/patch-validation.js";
3
+ import { compileHarnessSpec } from "../../src/compiler/compile.js";
4
+ import { validateHarnessSpec } from "../../src/spec/validate.js";
5
+ import { createPatchValidationFixture } from "../helpers/createPatchValidationFixture.js";
6
+ import { runCompiledWorkflow } from "../helpers/runCompiledWorkflow.js";
7
+ import type { LocalPatchValidationBundle } from "../../src/reference/types.js";
8
+
9
+ const ALWAYS_PRESENT_TERMINAL_IDS = ["validated-fix", "not-reproduced", "apply-failed", "candidate-failed"];
10
+ const APPROVAL_ONLY_TERMINAL_IDS = ["rejected"];
11
+
12
+ describe("buildPatchValidationHarnessSpec", () => {
13
+ it("builds a valid serial spec for a branch candidate source", () => {
14
+ const bundle: LocalPatchValidationBundle = {
15
+ repoPath: "/tmp/repo",
16
+ baselineRef: "HEAD",
17
+ candidateSource: { kind: "branch", value: "fix/bug-123" },
18
+ reproduceCommands: ["npm test -- failing.spec.ts"],
19
+ verificationCommands: ["npm test", "npm run build"],
20
+ reviewInstructions: "Approve only if baseline fails and candidate passes.",
21
+ approvalRequired: true,
22
+ };
23
+
24
+ const spec = buildPatchValidationHarnessSpec(bundle);
25
+
26
+ expect(spec.name).toBe("patch-validation");
27
+ expect(spec.graph.entryNodeId).toBeDefined();
28
+
29
+ const nodeKinds = spec.graph.nodes.map(n => n.kind);
30
+ expect(nodeKinds).not.toContain("merge");
31
+
32
+ const nodeIds = spec.graph.nodes.map(n => n.id);
33
+ for (const terminal of ALWAYS_PRESENT_TERMINAL_IDS) {
34
+ expect(nodeIds).toContain(terminal);
35
+ }
36
+ for (const terminal of APPROVAL_ONLY_TERMINAL_IDS) {
37
+ expect(nodeIds).toContain(terminal);
38
+ }
39
+
40
+ const allTerminalIds = [...ALWAYS_PRESENT_TERMINAL_IDS, ...APPROVAL_ONLY_TERMINAL_IDS];
41
+ const terminalNodes = spec.graph.nodes.filter(n => allTerminalIds.includes(n.id));
42
+ for (const node of terminalNodes) {
43
+ expect(node.kind).toBe("subworkflow");
44
+ }
45
+ });
46
+
47
+ it("builds a valid serial spec for a patch-file candidate source", () => {
48
+ const bundle: LocalPatchValidationBundle = {
49
+ repoPath: "/tmp/repo",
50
+ baselineRef: "HEAD",
51
+ candidateSource: { kind: "patchFile", value: "/patches/fix.patch" },
52
+ reproduceCommands: ["cargo test buggy_test"],
53
+ verificationCommands: ["cargo test", "cargo clippy"],
54
+ reviewInstructions: "Validate the patch fixes the regression.",
55
+ approvalRequired: false,
56
+ };
57
+
58
+ const spec = buildPatchValidationHarnessSpec(bundle);
59
+
60
+ expect(spec.name).toBe("patch-validation");
61
+
62
+ const nodeKinds = spec.graph.nodes.map(n => n.kind);
63
+ expect(nodeKinds).not.toContain("merge");
64
+
65
+ const nodeIds = spec.graph.nodes.map(n => n.id);
66
+ for (const terminal of ALWAYS_PRESENT_TERMINAL_IDS) {
67
+ expect(nodeIds).toContain(terminal);
68
+ }
69
+ for (const terminal of APPROVAL_ONLY_TERMINAL_IDS) {
70
+ expect(nodeIds).not.toContain(terminal);
71
+ }
72
+ });
73
+
74
+ it("includes a human gate node when approvalRequired is true", () => {
75
+ const bundle: LocalPatchValidationBundle = {
76
+ repoPath: "/tmp/repo",
77
+ baselineRef: "HEAD",
78
+ candidateSource: { kind: "branch", value: "fix/bug-123" },
79
+ reproduceCommands: ["npm test"],
80
+ verificationCommands: ["npm test"],
81
+ reviewInstructions: "Review required.",
82
+ approvalRequired: true,
83
+ };
84
+
85
+ const spec = buildPatchValidationHarnessSpec(bundle);
86
+ const humanNodes = spec.graph.nodes.filter(n => n.kind === "human");
87
+ expect(humanNodes.length).toBeGreaterThanOrEqual(1);
88
+ });
89
+
90
+ it("omits the human gate node when approvalRequired is false", () => {
91
+ const bundle: LocalPatchValidationBundle = {
92
+ repoPath: "/tmp/repo",
93
+ baselineRef: "HEAD",
94
+ candidateSource: { kind: "patchFile", value: "/patches/fix.patch" },
95
+ reproduceCommands: ["npm test"],
96
+ verificationCommands: ["npm test"],
97
+ reviewInstructions: "No human review.",
98
+ approvalRequired: false,
99
+ };
100
+
101
+ const spec = buildPatchValidationHarnessSpec(bundle);
102
+ const humanNodes = spec.graph.nodes.filter(n => n.kind === "human");
103
+ expect(humanNodes).toHaveLength(0);
104
+ });
105
+
106
+ it("has a linear edge chain with no merge node (serial ordering)", () => {
107
+ const bundle: LocalPatchValidationBundle = {
108
+ repoPath: "/tmp/repo",
109
+ baselineRef: "HEAD",
110
+ candidateSource: { kind: "branch", value: "fix/bug-123" },
111
+ reproduceCommands: ["npm test"],
112
+ verificationCommands: ["npm test"],
113
+ reviewInstructions: "Approve.",
114
+ approvalRequired: false,
115
+ };
116
+
117
+ const spec = buildPatchValidationHarnessSpec(bundle);
118
+
119
+ // No merge nodes — the workflow must be serial
120
+ const mergeNodes = spec.graph.nodes.filter(n => n.kind === "merge");
121
+ expect(mergeNodes).toHaveLength(0);
122
+
123
+ // No fan-in: no node should appear as an edge target more than once
124
+ const edgeTargets = spec.graph.edges.map(e => e.to);
125
+ const targetCounts = new Map<string, number>();
126
+ for (const t of edgeTargets) {
127
+ targetCounts.set(t, (targetCounts.get(t) ?? 0) + 1);
128
+ }
129
+ for (const [nodeId, count] of targetCounts) {
130
+ expect(count, `node ${nodeId} has ${count} incoming edges (fan-in detected)`).toBe(1);
131
+ }
132
+ });
133
+
134
+ it("produces a spec that passes schema validation when approvalRequired is true", () => {
135
+ const bundle: LocalPatchValidationBundle = {
136
+ repoPath: "/tmp/repo",
137
+ baselineRef: "HEAD",
138
+ candidateSource: { kind: "branch", value: "fix/bug-123" },
139
+ reproduceCommands: ["npm test"],
140
+ verificationCommands: ["npm test"],
141
+ reviewInstructions: "Approve.",
142
+ approvalRequired: true,
143
+ };
144
+
145
+ const result = validateHarnessSpec(buildPatchValidationHarnessSpec(bundle));
146
+ expect(result.valid, (result as any).errors?.join(", ")).toBe(true);
147
+ });
148
+
149
+ it("produces a spec that passes schema validation when approvalRequired is false", () => {
150
+ const bundle: LocalPatchValidationBundle = {
151
+ repoPath: "/tmp/repo",
152
+ baselineRef: "HEAD",
153
+ candidateSource: { kind: "patchFile", value: "/patches/fix.patch" },
154
+ reproduceCommands: ["npm test"],
155
+ verificationCommands: ["npm test"],
156
+ reviewInstructions: "No approval.",
157
+ approvalRequired: false,
158
+ };
159
+
160
+ const result = validateHarnessSpec(buildPatchValidationHarnessSpec(bundle));
161
+ expect(result.valid, (result as any).errors?.join(", ")).toBe(true);
162
+ });
163
+
164
+ it("baseline reproduce tool guards the checkout so a bad ref fails fast", () => {
165
+ const bundle: LocalPatchValidationBundle = {
166
+ repoPath: "/tmp/repo",
167
+ baselineRef: "HEAD",
168
+ candidateSource: { kind: "branch", value: "fix/bug-123" },
169
+ reproduceCommands: ["npm test"],
170
+ verificationCommands: ["npm test"],
171
+ reviewInstructions: "Approve.",
172
+ approvalRequired: false,
173
+ };
174
+
175
+ const spec = buildPatchValidationHarnessSpec(bundle);
176
+ const baselineNode = spec.graph.nodes.find(n => n.id === "run-baseline");
177
+ expect(baselineNode).toBeDefined();
178
+ expect(baselineNode!.kind).toBe("tool");
179
+ const script = (baselineNode as any).args[1] as string;
180
+ expect(script).toMatch(/git checkout.*\|\|\s*exit 1/);
181
+ });
182
+
183
+ it("apply candidate tool emits JSON failure payload when baseline checkout fails", () => {
184
+ const bundle: LocalPatchValidationBundle = {
185
+ repoPath: "/tmp/repo",
186
+ baselineRef: "HEAD",
187
+ candidateSource: { kind: "patchFile", value: "/patches/fix.patch" },
188
+ reproduceCommands: ["npm test"],
189
+ verificationCommands: ["npm test"],
190
+ reviewInstructions: "No approval.",
191
+ approvalRequired: false,
192
+ };
193
+
194
+ const spec = buildPatchValidationHarnessSpec(bundle);
195
+ const applyNode = spec.graph.nodes.find(n => n.id === "apply-candidate");
196
+ expect(applyNode).toBeDefined();
197
+ expect(applyNode!.kind).toBe("tool");
198
+ const script = (applyNode as any).args[1] as string;
199
+ expect(script).toMatch(/baseline checkout failed/);
200
+ expect(script).toMatch(/"applied":false/);
201
+ });
202
+
203
+ it("summarise node prompt requests only a summary, not an approved field", () => {
204
+ const bundle: LocalPatchValidationBundle = {
205
+ repoPath: "/tmp/repo",
206
+ baselineRef: "HEAD",
207
+ candidateSource: { kind: "branch", value: "fix/bug-123" },
208
+ reproduceCommands: ["npm test"],
209
+ verificationCommands: ["npm test"],
210
+ reviewInstructions: "Review carefully.",
211
+ approvalRequired: true,
212
+ };
213
+
214
+ const spec = buildPatchValidationHarnessSpec(bundle);
215
+ const summariseNode = spec.graph.nodes.find(n => n.id === "summarise");
216
+ expect(summariseNode).toBeDefined();
217
+ expect(summariseNode!.kind).toBe("llm");
218
+ const prompt = (summariseNode as any).prompt as string;
219
+ expect(prompt).toMatch(/summary/i);
220
+ expect(prompt).not.toMatch(/\bapproved\b/);
221
+ });
222
+
223
+ it("throws when reproduceCommands is empty", () => {
224
+ const bundle: LocalPatchValidationBundle = {
225
+ repoPath: "/tmp/repo",
226
+ baselineRef: "HEAD",
227
+ candidateSource: { kind: "branch", value: "fix/bug-123" },
228
+ reproduceCommands: [],
229
+ verificationCommands: ["npm test"],
230
+ reviewInstructions: "Approve.",
231
+ approvalRequired: false,
232
+ };
233
+
234
+ expect(() => buildPatchValidationHarnessSpec(bundle)).toThrow(
235
+ "Patch validation requires at least one reproduce command",
236
+ );
237
+ });
238
+
239
+ it("human approval prompt explicitly references the prior summarise step output", () => {
240
+ const bundle: LocalPatchValidationBundle = {
241
+ repoPath: "/tmp/repo",
242
+ baselineRef: "HEAD",
243
+ candidateSource: { kind: "branch", value: "fix/bug-123" },
244
+ reproduceCommands: ["npm test"],
245
+ verificationCommands: ["npm test"],
246
+ reviewInstructions: "Approve only if regression-free.",
247
+ approvalRequired: true,
248
+ };
249
+
250
+ const spec = buildPatchValidationHarnessSpec(bundle);
251
+ const humanNode = spec.graph.nodes.find(n => n.id === "human-approve");
252
+ expect(humanNode).toBeDefined();
253
+ expect(humanNode!.kind).toBe("human");
254
+ const prompt = (humanNode as any).prompt as string;
255
+ expect(prompt).toMatch(/summarise/);
256
+ expect(prompt).toMatch(/summarise\.summary/);
257
+ });
258
+
259
+ it("throws when verificationCommands is empty", () => {
260
+ const bundle: LocalPatchValidationBundle = {
261
+ repoPath: "/tmp/repo",
262
+ baselineRef: "HEAD",
263
+ candidateSource: { kind: "branch", value: "fix/bug-123" },
264
+ reproduceCommands: ["npm test"],
265
+ verificationCommands: [],
266
+ reviewInstructions: "Approve.",
267
+ approvalRequired: false,
268
+ };
269
+
270
+ expect(() => buildPatchValidationHarnessSpec(bundle)).toThrow(
271
+ "Patch validation requires at least one verification command",
272
+ );
273
+ });
274
+ });
275
+
276
+ describe("buildPatchValidationHarnessSpec compiled workflow", () => {
277
+ const cleanups: Array<() => void> = [];
278
+
279
+ afterEach(() => {
280
+ while (cleanups.length > 0) {
281
+ cleanups.pop()?.();
282
+ }
283
+ });
284
+
285
+ it("runs to validated-fix when a branch candidate fixes the bug", async () => {
286
+ const fixture = createPatchValidationFixture();
287
+ cleanups.push(fixture.cleanup);
288
+
289
+ const spec = buildPatchValidationHarnessSpec(fixture.bundle);
290
+ const compiled = compileHarnessSpec(spec);
291
+ const result = await runCompiledWorkflow(compiled, fixture.bundle, {});
292
+
293
+ expect(result.status).toBe("completed");
294
+ expect(result.terminalNodeId).toBe("validated-fix");
295
+
296
+ const baselineOutput = result.outputs["run-baseline"] as { reproduced: boolean };
297
+ expect(baselineOutput.reproduced).toBe(true);
298
+
299
+ const candidateOutput = result.outputs["run-candidate-reproduce"] as { reproduced: boolean };
300
+ expect(candidateOutput.reproduced).toBe(false);
301
+
302
+ const verificationOutput = result.outputs["run-verification"] as { passed: boolean };
303
+ expect(verificationOutput.passed).toBe(true);
304
+ });
305
+
306
+ it("runs to validated-fix when a patch-file candidate fixes the bug", async () => {
307
+ const fixture = createPatchValidationFixture({ candidateKind: "patchFile" });
308
+ cleanups.push(fixture.cleanup);
309
+
310
+ const spec = buildPatchValidationHarnessSpec(fixture.bundle);
311
+ const compiled = compileHarnessSpec(spec);
312
+ const result = await runCompiledWorkflow(compiled, fixture.bundle, {});
313
+
314
+ expect(result.status).toBe("completed");
315
+ expect(result.terminalNodeId).toBe("validated-fix");
316
+
317
+ const baselineOutput = result.outputs["run-baseline"] as { reproduced: boolean };
318
+ expect(baselineOutput.reproduced).toBe(true);
319
+
320
+ const candidateOutput = result.outputs["run-candidate-reproduce"] as { reproduced: boolean };
321
+ expect(candidateOutput.reproduced).toBe(false);
322
+
323
+ const verificationOutput = result.outputs["run-verification"] as { passed: boolean };
324
+ expect(verificationOutput.passed).toBe(true);
325
+ });
326
+ });
@@ -0,0 +1,121 @@
1
+ import { execFileSync } from "node:child_process";
2
+ import { afterEach, describe, expect, it } from "vitest";
3
+ import { buildPrReviewMergeHarnessSpec } from "../../src/reference/pr-review-merge.js";
4
+ import { compileHarnessSpec } from "../../src/compiler/compile.js";
5
+ import { createFixtureRepo } from "../helpers/createFixtureRepo.js";
6
+ import { runCompiledWorkflow } from "../helpers/runCompiledWorkflow.js";
7
+
8
+ describe("buildPrReviewMergeHarnessSpec failures", () => {
9
+ const cleanups: Array<() => void> = [];
10
+
11
+ afterEach(() => {
12
+ while (cleanups.length > 0) {
13
+ cleanups.pop()?.();
14
+ }
15
+ });
16
+
17
+ it("routes verification failures to the rejection terminal", async () => {
18
+ const fixture = createFixtureRepo({
19
+ verificationCommands: ['node -e "process.exit(1)"'],
20
+ });
21
+ cleanups.push(fixture.cleanup);
22
+
23
+ const compiled = compileHarnessSpec(buildPrReviewMergeHarnessSpec(fixture.bundle));
24
+ const result = await runCompiledWorkflow(compiled, fixture.bundle, {
25
+ llmResult: {
26
+ approved: true,
27
+ },
28
+ humanResponse: {
29
+ approved: true,
30
+ },
31
+ });
32
+
33
+ expect(result.status).toBe("completed");
34
+ expect(result.terminalNodeId).toBe("reject-verification");
35
+ });
36
+
37
+ it("routes merge conflicts to the conflict terminal", async () => {
38
+ const fixture = createFixtureRepo({
39
+ createMergeConflict: true,
40
+ });
41
+ cleanups.push(fixture.cleanup);
42
+
43
+ const compiled = compileHarnessSpec(buildPrReviewMergeHarnessSpec(fixture.bundle));
44
+ const result = await runCompiledWorkflow(compiled, fixture.bundle, {
45
+ llmResult: {
46
+ approved: true,
47
+ },
48
+ humanResponse: {
49
+ approved: true,
50
+ },
51
+ });
52
+
53
+ expect(result.status).toBe("completed");
54
+ expect(result.terminalNodeId).toBe("merge-conflict");
55
+
56
+ const currentBranch = execFileSync("git", ["rev-parse", "--abbrev-ref", "HEAD"], {
57
+ cwd: fixture.bundle.repoPath,
58
+ encoding: "utf8",
59
+ }).trim();
60
+ expect(currentBranch).toBe(fixture.bundle.targetBranch);
61
+ });
62
+
63
+ it("routes both-added merge conflicts to the conflict terminal", async () => {
64
+ const fixture = createFixtureRepo({
65
+ createMergeConflict: true,
66
+ mergeConflictMode: "both_added",
67
+ });
68
+ cleanups.push(fixture.cleanup);
69
+
70
+ const compiled = compileHarnessSpec(buildPrReviewMergeHarnessSpec(fixture.bundle));
71
+ const result = await runCompiledWorkflow(compiled, fixture.bundle, {
72
+ llmResult: {
73
+ approved: true,
74
+ },
75
+ humanResponse: {
76
+ approved: true,
77
+ },
78
+ });
79
+
80
+ expect(result.status).toBe("completed");
81
+ expect(result.terminalNodeId).toBe("merge-conflict");
82
+ });
83
+
84
+ it("surfaces retry exhaustion when post-merge verification keeps failing", async () => {
85
+ const fixture = createFixtureRepo({
86
+ createPostMergeFailureMarker: true,
87
+ });
88
+ cleanups.push(fixture.cleanup);
89
+
90
+ const compiled = compileHarnessSpec(buildPrReviewMergeHarnessSpec(fixture.bundle));
91
+
92
+ await expect(
93
+ runCompiledWorkflow(compiled, fixture.bundle, {
94
+ llmResult: {
95
+ approved: true,
96
+ },
97
+ humanResponse: {
98
+ approved: true,
99
+ },
100
+ }),
101
+ ).rejects.toThrow(/retryable post-merge failure|Verification retry exhausted|post-merge/i);
102
+ });
103
+
104
+ it("routes human rejection to the rejection terminal", async () => {
105
+ const fixture = createFixtureRepo();
106
+ cleanups.push(fixture.cleanup);
107
+
108
+ const compiled = compileHarnessSpec(buildPrReviewMergeHarnessSpec(fixture.bundle));
109
+ const result = await runCompiledWorkflow(compiled, fixture.bundle, {
110
+ llmResult: {
111
+ approved: true,
112
+ },
113
+ humanResponse: {
114
+ approved: false,
115
+ },
116
+ });
117
+
118
+ expect(result.status).toBe("completed");
119
+ expect(result.terminalNodeId).toBe("reject-human");
120
+ });
121
+ });
@@ -0,0 +1,55 @@
1
+ import { execFileSync } from "node:child_process";
2
+ import { afterEach, describe, expect, it } from "vitest";
3
+ import { buildPrReviewMergeHarnessSpec } from "../../src/reference/pr-review-merge.js";
4
+ import { compileHarnessSpec } from "../../src/compiler/compile.js";
5
+ import { createFixtureRepo } from "../helpers/createFixtureRepo.js";
6
+ import { runCompiledWorkflow } from "../helpers/runCompiledWorkflow.js";
7
+
8
+ describe("buildPrReviewMergeHarnessSpec", () => {
9
+ const cleanups: Array<() => void> = [];
10
+
11
+ afterEach(() => {
12
+ while (cleanups.length > 0) {
13
+ cleanups.pop()?.();
14
+ }
15
+ });
16
+
17
+ it("builds and runs the happy-path simulated PR review + merge workflow", async () => {
18
+ const fixture = createFixtureRepo();
19
+ cleanups.push(fixture.cleanup);
20
+
21
+ const spec = buildPrReviewMergeHarnessSpec(fixture.bundle);
22
+ const compiled = compileHarnessSpec(spec);
23
+ const result = await runCompiledWorkflow(compiled, fixture.bundle, {
24
+ llmResult: {
25
+ approved: true,
26
+ summary: "Looks good to merge.",
27
+ },
28
+ humanResponse: {
29
+ approved: true,
30
+ },
31
+ });
32
+
33
+ expect(spec.graph.nodes.map(node => node.kind)).toEqual(
34
+ expect.arrayContaining(["tool", "llm", "human", "condition", "merge", "subworkflow"]),
35
+ );
36
+ expect(result.status).toBe("completed");
37
+ expect(result.terminalNodeId).toBe("complete-success");
38
+ expect(result.result).toEqual({
39
+ name: "complete-success",
40
+ input: {},
41
+ });
42
+
43
+ const currentBranch = execFileSync("git", ["rev-parse", "--abbrev-ref", "HEAD"], {
44
+ cwd: fixture.bundle.repoPath,
45
+ encoding: "utf8",
46
+ }).trim();
47
+ expect(currentBranch).toBe(fixture.bundle.targetBranch);
48
+
49
+ const mergedFile = execFileSync("git", ["show", `${fixture.bundle.targetBranch}:app.txt`], {
50
+ cwd: fixture.bundle.repoPath,
51
+ encoding: "utf8",
52
+ });
53
+ expect(mergedFile).toContain("feature change");
54
+ });
55
+ });