@quinteroac/agents-coding-toolkit 0.1.0-preview → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +29 -15
  2. package/package.json +14 -4
  3. package/scaffold/.agents/flow/tmpl_it_000001_progress.example.json +20 -0
  4. package/scaffold/.agents/skills/execute-refactor-item/tmpl_SKILL.md +59 -0
  5. package/scaffold/.agents/skills/plan-refactor/tmpl_SKILL.md +89 -9
  6. package/scaffold/.agents/skills/refine-refactor-plan/tmpl_SKILL.md +30 -0
  7. package/scaffold/.agents/tmpl_state_rules.md +0 -1
  8. package/scaffold/schemas/tmpl_prototype-progress.ts +22 -0
  9. package/scaffold/schemas/tmpl_refactor-execution-progress.ts +16 -0
  10. package/scaffold/schemas/tmpl_refactor-prd.ts +14 -0
  11. package/scaffold/schemas/tmpl_state.ts +1 -0
  12. package/scaffold/schemas/tmpl_test-execution-progress.ts +17 -0
  13. package/schemas/issues.ts +19 -0
  14. package/schemas/prototype-progress.ts +22 -0
  15. package/schemas/refactor-execution-progress.ts +16 -0
  16. package/schemas/refactor-prd.ts +14 -0
  17. package/schemas/state.test.ts +58 -0
  18. package/schemas/state.ts +1 -0
  19. package/schemas/test-execution-progress.ts +17 -0
  20. package/schemas/test-plan.test.ts +1 -1
  21. package/schemas/validate-progress.ts +1 -1
  22. package/schemas/validate-state.ts +1 -1
  23. package/src/cli.test.ts +57 -0
  24. package/src/cli.ts +227 -58
  25. package/src/commands/approve-project-context.ts +13 -6
  26. package/src/commands/approve-prototype.test.ts +427 -0
  27. package/src/commands/approve-prototype.ts +185 -0
  28. package/src/commands/approve-refactor-plan.test.ts +254 -0
  29. package/src/commands/approve-refactor-plan.ts +200 -0
  30. package/src/commands/approve-requirement.test.ts +224 -0
  31. package/src/commands/approve-requirement.ts +75 -16
  32. package/src/commands/approve-test-plan.test.ts +2 -2
  33. package/src/commands/approve-test-plan.ts +21 -7
  34. package/src/commands/create-issue.test.ts +2 -2
  35. package/src/commands/create-project-context.ts +31 -25
  36. package/src/commands/create-prototype.test.ts +488 -18
  37. package/src/commands/create-prototype.ts +185 -63
  38. package/src/commands/create-test-plan.ts +8 -6
  39. package/src/commands/define-refactor-plan.test.ts +208 -0
  40. package/src/commands/define-refactor-plan.ts +96 -0
  41. package/src/commands/define-requirement.ts +15 -9
  42. package/src/commands/execute-automated-fix.test.ts +78 -33
  43. package/src/commands/execute-automated-fix.ts +34 -101
  44. package/src/commands/execute-refactor.test.ts +954 -0
  45. package/src/commands/execute-refactor.ts +332 -0
  46. package/src/commands/execute-test-plan.test.ts +24 -16
  47. package/src/commands/execute-test-plan.ts +29 -55
  48. package/src/commands/flow-config.ts +79 -0
  49. package/src/commands/flow.test.ts +755 -0
  50. package/src/commands/flow.ts +405 -0
  51. package/src/commands/refine-project-context.ts +9 -7
  52. package/src/commands/refine-refactor-plan.test.ts +210 -0
  53. package/src/commands/refine-refactor-plan.ts +95 -0
  54. package/src/commands/refine-requirement.ts +9 -6
  55. package/src/commands/refine-test-plan.test.ts +2 -2
  56. package/src/commands/refine-test-plan.ts +9 -6
  57. package/src/commands/start-iteration.test.ts +52 -0
  58. package/src/commands/start-iteration.ts +5 -0
  59. package/src/commands/write-json.ts +102 -97
  60. package/src/flow-cli.test.ts +18 -0
  61. package/src/force-flag.test.ts +144 -0
  62. package/src/guardrail.test.ts +411 -0
  63. package/src/guardrail.ts +82 -0
  64. package/src/install.test.ts +7 -5
  65. package/src/pack.test.ts +2 -1
  66. package/src/progress-utils.ts +34 -0
  67. package/src/readline.ts +23 -0
  68. package/src/write-json-artifact.ts +33 -0
  69. package/scaffold/.agents/flow/tmpl_README.md +0 -7
  70. package/scaffold/.agents/flow/tmpl_iteration_close_checklist.example.md +0 -11
  71. package/schemas/test-plan.ts +0 -20
@@ -0,0 +1,332 @@
1
+ import { $ } from "bun";
2
+ import { readFile, writeFile } from "node:fs/promises";
3
+ import { join } from "node:path";
4
+
5
+ import { RefactorPrdSchema } from "../../scaffold/schemas/tmpl_refactor-prd";
6
+ import {
7
+ RefactorExecutionProgressSchema,
8
+ type RefactorExecutionProgress,
9
+ } from "../../scaffold/schemas/tmpl_refactor-execution-progress";
10
+ import {
11
+ buildPrompt,
12
+ invokeAgent,
13
+ loadSkill,
14
+ type AgentInvokeOptions,
15
+ type AgentProvider,
16
+ type AgentResult,
17
+ } from "../agent";
18
+ import { CLI_PATH } from "../cli-path";
19
+ import { assertGuardrail } from "../guardrail";
20
+ import { applyStatusUpdate, idsMatchExactly, sortedValues } from "../progress-utils";
21
+ import { exists, FLOW_REL_DIR, readState, writeState } from "../state";
22
+
23
+ export interface ExecuteRefactorOptions {
24
+ provider: AgentProvider;
25
+ force?: boolean;
26
+ }
27
+
28
+ export { RefactorExecutionProgressSchema };
29
+ export type { RefactorExecutionProgress };
30
+
31
+ interface WriteJsonResult {
32
+ exitCode: number;
33
+ stderr: string;
34
+ }
35
+
36
+ interface ExecuteRefactorDeps {
37
+ existsFn: (path: string) => Promise<boolean>;
38
+ invokeAgentFn: (options: AgentInvokeOptions) => Promise<AgentResult>;
39
+ invokeWriteJsonFn: (
40
+ projectRoot: string,
41
+ schemaName: string,
42
+ outPath: string,
43
+ data: string,
44
+ ) => Promise<WriteJsonResult>;
45
+ loadSkillFn: (projectRoot: string, skillName: string) => Promise<string>;
46
+ logFn: (message: string) => void;
47
+ nowFn: () => Date;
48
+ readFileFn: typeof readFile;
49
+ writeFileFn: typeof writeFile;
50
+ }
51
+
52
+ async function runWriteJsonCommand(
53
+ projectRoot: string,
54
+ schemaName: string,
55
+ outPath: string,
56
+ data: string,
57
+ ): Promise<WriteJsonResult> {
58
+ const result =
59
+ await $`bun ${CLI_PATH} write-json --schema ${schemaName} --out ${outPath} --data ${data}`
60
+ .cwd(projectRoot)
61
+ .nothrow()
62
+ .quiet();
63
+ return {
64
+ exitCode: result.exitCode,
65
+ stderr: result.stderr.toString().trim(),
66
+ };
67
+ }
68
+
69
+ const defaultDeps: ExecuteRefactorDeps = {
70
+ existsFn: exists,
71
+ invokeAgentFn: invokeAgent,
72
+ invokeWriteJsonFn: runWriteJsonCommand,
73
+ loadSkillFn: loadSkill,
74
+ logFn: console.log,
75
+ nowFn: () => new Date(),
76
+ readFileFn: readFile,
77
+ writeFileFn: writeFile,
78
+ };
79
+
80
+ export async function runExecuteRefactor(
81
+ opts: ExecuteRefactorOptions,
82
+ deps: Partial<ExecuteRefactorDeps> = {},
83
+ ): Promise<void> {
84
+ const mergedDeps: ExecuteRefactorDeps = { ...defaultDeps, ...deps };
85
+ const force = opts.force ?? false;
86
+ const projectRoot = process.cwd();
87
+ const state = await readState(projectRoot);
88
+
89
+ // AC02: Reject if current_phase !== "refactor"
90
+ await assertGuardrail(
91
+ state,
92
+ state.current_phase !== "refactor",
93
+ `Cannot execute refactor: current_phase must be 'refactor'. Current phase: '${state.current_phase}'.`,
94
+ { force },
95
+ );
96
+
97
+ // AC03: Reject if refactor_plan.status !== "approved"
98
+ await assertGuardrail(
99
+ state,
100
+ state.phases.refactor.refactor_plan.status !== "approved",
101
+ `Cannot execute refactor: refactor_plan.status must be 'approved'. Current status: '${state.phases.refactor.refactor_plan.status}'. Run \`bun nvst approve refactor-plan\` first.`,
102
+ { force },
103
+ );
104
+
105
+ // AC04: Reject if refactor_execution.status is already "completed"
106
+ await assertGuardrail(
107
+ state,
108
+ state.phases.refactor.refactor_execution.status === "completed",
109
+ "Cannot execute refactor: refactor_execution.status is already 'completed'.",
110
+ { force },
111
+ );
112
+
113
+ // AC05: Read and validate refactor-prd.json
114
+ const iteration = state.current_iteration;
115
+ const refactorPrdFileName = `it_${iteration}_refactor-prd.json`;
116
+ const refactorPrdPath = join(projectRoot, FLOW_REL_DIR, refactorPrdFileName);
117
+
118
+ if (!(await mergedDeps.existsFn(refactorPrdPath))) {
119
+ throw new Error(
120
+ `Refactor PRD file missing: expected ${join(FLOW_REL_DIR, refactorPrdFileName)}. Run \`bun nvst approve refactor-plan\` first.`,
121
+ );
122
+ }
123
+
124
+ let parsedPrd: unknown;
125
+ try {
126
+ parsedPrd = JSON.parse(await mergedDeps.readFileFn(refactorPrdPath, "utf8"));
127
+ } catch {
128
+ throw new Error(
129
+ `Invalid refactor PRD JSON in ${join(FLOW_REL_DIR, refactorPrdFileName)}.`,
130
+ );
131
+ }
132
+
133
+ const prdValidation = RefactorPrdSchema.safeParse(parsedPrd);
134
+ if (!prdValidation.success) {
135
+ throw new Error(
136
+ `Refactor PRD schema mismatch in ${join(FLOW_REL_DIR, refactorPrdFileName)}.`,
137
+ );
138
+ }
139
+
140
+ const refactorItems = prdValidation.data.refactorItems;
141
+
142
+ // Load skill
143
+ let skillTemplate: string;
144
+ try {
145
+ skillTemplate = await mergedDeps.loadSkillFn(projectRoot, "execute-refactor-item");
146
+ } catch {
147
+ throw new Error(
148
+ "Required skill missing: expected .agents/skills/execute-refactor-item/SKILL.md.",
149
+ );
150
+ }
151
+
152
+ // AC13: Progress file name
153
+ const progressFileName = `it_${iteration}_refactor-execution-progress.json`;
154
+ const progressPath = join(projectRoot, FLOW_REL_DIR, progressFileName);
155
+
156
+ // AC06: Set refactor_execution.status = "in_progress" before processing
157
+ // AC13: Set refactor_execution.file
158
+ state.phases.refactor.refactor_execution.status = "in_progress";
159
+ state.phases.refactor.refactor_execution.file = progressFileName;
160
+ state.last_updated = mergedDeps.nowFn().toISOString();
161
+ state.updated_by = "nvst:execute-refactor";
162
+ await writeState(projectRoot, state);
163
+
164
+ // Initialize or load progress file
165
+ let progressData: RefactorExecutionProgress;
166
+
167
+ if (await mergedDeps.existsFn(progressPath)) {
168
+ let parsedProgress: unknown;
169
+ try {
170
+ parsedProgress = JSON.parse(await mergedDeps.readFileFn(progressPath, "utf8"));
171
+ } catch {
172
+ throw new Error(
173
+ `Invalid progress JSON in ${join(FLOW_REL_DIR, progressFileName)}.`,
174
+ );
175
+ }
176
+
177
+ const progressValidation = RefactorExecutionProgressSchema.safeParse(parsedProgress);
178
+ if (!progressValidation.success) {
179
+ throw new Error(
180
+ `Progress schema mismatch in ${join(FLOW_REL_DIR, progressFileName)}.`,
181
+ );
182
+ }
183
+
184
+ // AC05: Verify progress item IDs match refactor PRD item IDs
185
+ const expectedIds = sortedValues(refactorItems.map((item) => item.id));
186
+ const existingIds = sortedValues(progressValidation.data.entries.map((entry) => entry.id));
187
+ if (!idsMatchExactly(existingIds, expectedIds)) {
188
+ throw new Error(
189
+ "Refactor execution progress file out of sync: entry ids do not match refactor PRD item ids.",
190
+ );
191
+ }
192
+
193
+ progressData = progressValidation.data;
194
+ } else {
195
+ const now = mergedDeps.nowFn().toISOString();
196
+ progressData = {
197
+ entries: refactorItems.map((item) => ({
198
+ id: item.id,
199
+ title: item.title,
200
+ status: "pending" as const,
201
+ attempt_count: 0,
202
+ last_agent_exit_code: null,
203
+ updated_at: now,
204
+ })),
205
+ };
206
+ const writeResult = await mergedDeps.invokeWriteJsonFn(
207
+ projectRoot,
208
+ "refactor-execution-progress",
209
+ join(FLOW_REL_DIR, progressFileName),
210
+ JSON.stringify(progressData),
211
+ );
212
+ if (writeResult.exitCode !== 0) {
213
+ throw new Error(
214
+ `Failed to write refactor execution progress: ${writeResult.stderr || "write-json exited non-zero"}.`,
215
+ );
216
+ }
217
+ }
218
+
219
+ // AC07, AC08, AC09, AC10: Process each item in order
220
+ for (const item of refactorItems) {
221
+ const entry = progressData.entries.find((e) => e.id === item.id);
222
+ if (!entry || entry.status === "completed") {
223
+ continue;
224
+ }
225
+
226
+ // Set current item to in_progress before invoking agent (FR-4; observability on interrupt)
227
+ applyStatusUpdate(entry, "in_progress", mergedDeps.nowFn().toISOString());
228
+ const writeInProgressResult = await mergedDeps.invokeWriteJsonFn(
229
+ projectRoot,
230
+ "refactor-execution-progress",
231
+ join(FLOW_REL_DIR, progressFileName),
232
+ JSON.stringify(progressData),
233
+ );
234
+ if (writeInProgressResult.exitCode !== 0) {
235
+ throw new Error(
236
+ `Failed to write refactor execution progress: ${writeInProgressResult.stderr || "write-json exited non-zero"}.`,
237
+ );
238
+ }
239
+
240
+ // AC07: Build prompt with skill and item context (FR-6 variable names)
241
+ const prompt = buildPrompt(skillTemplate, {
242
+ current_iteration: iteration,
243
+ item_id: item.id,
244
+ item_title: item.title,
245
+ item_description: item.description,
246
+ item_rationale: item.rationale,
247
+ });
248
+
249
+ // US-002-AC01: Invoke agent in non-interactive mode (autonomous execution)
250
+ const agentResult = await mergedDeps.invokeAgentFn({
251
+ provider: opts.provider,
252
+ prompt,
253
+ cwd: projectRoot,
254
+ interactive: false,
255
+ });
256
+
257
+ // AC09 & AC10: Record result after each invocation, continue on failure
258
+ const succeeded = agentResult.exitCode === 0;
259
+ entry.attempt_count = entry.attempt_count + 1;
260
+ entry.last_agent_exit_code = agentResult.exitCode;
261
+ applyStatusUpdate(entry, succeeded ? "completed" : "failed", mergedDeps.nowFn().toISOString());
262
+
263
+ const writeResult = await mergedDeps.invokeWriteJsonFn(
264
+ projectRoot,
265
+ "refactor-execution-progress",
266
+ join(FLOW_REL_DIR, progressFileName),
267
+ JSON.stringify(progressData),
268
+ );
269
+ if (writeResult.exitCode !== 0) {
270
+ throw new Error(
271
+ `Failed to write refactor execution progress: ${writeResult.stderr || "write-json exited non-zero"}.`,
272
+ );
273
+ }
274
+
275
+ mergedDeps.logFn(
276
+ `iteration=it_${iteration} item=${item.id} outcome=${entry.status}`,
277
+ );
278
+ }
279
+
280
+ // US-003: Generate markdown execution report (written regardless of failures)
281
+ const reportFileName = `it_${iteration}_refactor-execution-report.md`;
282
+ const reportPath = join(projectRoot, FLOW_REL_DIR, reportFileName);
283
+ const reportContent = buildRefactorExecutionReport(iteration, progressData);
284
+ await mergedDeps.writeFileFn(reportPath, reportContent, "utf8");
285
+
286
+ // AC11 & AC12: Update state based on overall result
287
+ const allCompleted = progressData.entries.every((entry) => entry.status === "completed");
288
+
289
+ if (allCompleted) {
290
+ // AC11: All completed → set status to "completed"
291
+ state.phases.refactor.refactor_execution.status = "completed";
292
+ }
293
+ // AC12: Any failure → stays "in_progress" (already set above)
294
+
295
+ state.last_updated = mergedDeps.nowFn().toISOString();
296
+ state.updated_by = "nvst:execute-refactor";
297
+ await writeState(projectRoot, state);
298
+
299
+ if (allCompleted) {
300
+ mergedDeps.logFn("Refactor execution completed for all items.");
301
+ } else {
302
+ mergedDeps.logFn("Refactor execution paused with remaining pending or failed items.");
303
+ }
304
+ }
305
+
306
+ export function buildRefactorExecutionReport(
307
+ iteration: string,
308
+ progress: RefactorExecutionProgress,
309
+ ): string {
310
+ const total = progress.entries.length;
311
+ const completed = progress.entries.filter((e) => e.status === "completed").length;
312
+ const failed = progress.entries.filter((e) => e.status === "failed").length;
313
+
314
+ const tableRows = progress.entries
315
+ .map((e) => {
316
+ const exitCode = e.last_agent_exit_code === null ? "N/A" : String(e.last_agent_exit_code);
317
+ return `| ${e.id} | ${e.title} | ${e.status} | ${exitCode} |`;
318
+ })
319
+ .join("\n");
320
+
321
+ return `# Refactor Execution Report
322
+
323
+ **Iteration:** it_${iteration}
324
+ **Total:** ${total}
325
+ **Completed:** ${completed}
326
+ **Failed:** ${failed}
327
+
328
+ | RI ID | Title | Status | Agent Exit Code |
329
+ |-------|-------|--------|-----------------|
330
+ ${tableRows}
331
+ `;
332
+ }
@@ -114,7 +114,7 @@ describe("execute test-plan command", () => {
114
114
  expect(source).toContain("if (command === \"execute\") {");
115
115
  expect(source).toContain('if (subcommand === "test-plan") {');
116
116
  expect(source).toContain("const { provider, remainingArgs: postAgentArgs } = parseAgentArg(args.slice(1));");
117
- expect(source).toContain("await runExecuteTestPlan({ provider });");
117
+ expect(source).toContain("await runExecuteTestPlan({ provider, force });");
118
118
  expect(source).toContain("execute test-plan --agent <provider>");
119
119
  });
120
120
 
@@ -317,13 +317,15 @@ describe("execute test-plan command", () => {
317
317
  join(projectRoot, ".agents", "flow", "it_000005_test-execution-report.md"),
318
318
  "utf8",
319
319
  );
320
- expect(markdownReportRaw).toContain("# Test Execution Report (Iteration 000005)");
321
- expect(markdownReportRaw).toContain("- Total Tests: 3");
322
- expect(markdownReportRaw).toContain("- Passed: 3");
323
- expect(markdownReportRaw).toContain("- Failed: 0");
320
+ expect(markdownReportRaw).toContain("# Test Execution Report");
321
+ expect(markdownReportRaw).toContain("**Iteration:** it_000005");
322
+ expect(markdownReportRaw).toContain("**Total:** 3");
323
+ expect(markdownReportRaw).toContain("**Passed:** 3");
324
+ expect(markdownReportRaw).toContain("**Failed:** 0");
324
325
 
325
326
  const state = await readState(projectRoot);
326
327
  expect(state.phases.prototype.test_execution.status).toBe("completed");
328
+ expect(state.phases.prototype.prototype_approved).toBe(false);
327
329
  expect(state.updated_by).toBe("nvst:execute-test-plan");
328
330
  });
329
331
 
@@ -537,6 +539,10 @@ describe("execute test-plan command", () => {
537
539
  expect(rerunBatchPrompt).not.toContain("TC-US001-01");
538
540
  });
539
541
 
542
+ // After retry, all pass -> test execution completed but prototype_approved requires explicit approve
543
+ const stateAfterRetry = await readState(projectRoot);
544
+ expect(stateAfterRetry.phases.prototype.prototype_approved).toBe(false);
545
+
540
546
  const progressRaw = await readFile(
541
547
  join(projectRoot, ".agents", "flow", "it_000005_test-execution-progress.json"),
542
548
  "utf8",
@@ -671,13 +677,12 @@ describe("execute test-plan command", () => {
671
677
  promptManualTestFn: async () => {
672
678
  return { status: "passed", evidence: "ok", notes: "ok" };
673
679
  },
674
- writeFileFn: async (path, data) => {
680
+ writeJsonArtifactFn: async (path, _schema, data) => {
675
681
  const pathAsString = path.toString();
676
682
  if (pathAsString.endsWith("it_000005_test-execution-progress.json")) {
677
- progressSnapshots.push(data.toString());
683
+ progressSnapshots.push(JSON.stringify(data, null, 2));
678
684
  }
679
- await writeFile(pathAsString, data.toString(), "utf8");
680
- return 0;
685
+ await writeFile(pathAsString, `${JSON.stringify(data, null, 2)}\n`, "utf8");
681
686
  },
682
687
  },
683
688
  );
@@ -1673,13 +1678,14 @@ describe("US-004: preserve report and state tracking compatibility", () => {
1673
1678
  "utf8",
1674
1679
  );
1675
1680
 
1676
- expect(markdownRaw).toContain("# Test Execution Report (Iteration 000005)");
1677
- expect(markdownRaw).toContain("- Test Plan: `it_000005_TP.json`");
1678
- expect(markdownRaw).toContain("- Total Tests: 3");
1679
- expect(markdownRaw).toContain("- Passed: 1");
1680
- expect(markdownRaw).toContain("- Failed: 2");
1681
+ expect(markdownRaw).toContain("# Test Execution Report");
1682
+ expect(markdownRaw).toContain("**Iteration:** it_000005");
1683
+ expect(markdownRaw).toContain("**Test Plan:** `it_000005_TP.json`");
1684
+ expect(markdownRaw).toContain("**Total:** 3");
1685
+ expect(markdownRaw).toContain("**Passed:** 1");
1686
+ expect(markdownRaw).toContain("**Failed:** 2");
1681
1687
  expect(markdownRaw).toContain("| Test ID | Description | Status | Correlated Requirements | Artifacts |");
1682
- expect(markdownRaw).toContain("| --- | --- | --- | --- | --- |");
1688
+ expect(markdownRaw).toContain("|---------|-------------|--------|------------------------|-----------|");
1683
1689
  // All three test cases appear in table
1684
1690
  expect(markdownRaw).toContain("TC-US001-01");
1685
1691
  expect(markdownRaw).toContain("TC-US001-02");
@@ -1736,10 +1742,11 @@ describe("US-004: preserve report and state tracking compatibility", () => {
1736
1742
  expect(stateSnapshots[0]!.status).toBe("in_progress");
1737
1743
  expect(stateSnapshots[0]!.file).toBe("it_000005_test-execution-progress.json");
1738
1744
 
1739
- // After execution (all passed): completed
1745
+ // After execution (all passed): completed; prototype_approved requires explicit approve command
1740
1746
  const finalState = await readState(projectRoot);
1741
1747
  expect(finalState.phases.prototype.test_execution.status).toBe("completed");
1742
1748
  expect(finalState.phases.prototype.test_execution.file).toBe("it_000005_test-execution-progress.json");
1749
+ expect(finalState.phases.prototype.prototype_approved).toBe(false);
1743
1750
  expect(finalState.updated_by).toBe("nvst:execute-test-plan");
1744
1751
  });
1745
1752
 
@@ -1779,6 +1786,7 @@ describe("US-004: preserve report and state tracking compatibility", () => {
1779
1786
 
1780
1787
  const finalState = await readState(projectRoot);
1781
1788
  expect(finalState.phases.prototype.test_execution.status).toBe("failed");
1789
+ expect(finalState.phases.prototype.prototype_approved).toBe(false);
1782
1790
  expect(finalState.phases.prototype.test_execution.file).toBe("it_000005_test-execution-progress.json");
1783
1791
  expect(finalState.updated_by).toBe("nvst:execute-test-plan");
1784
1792
  });
@@ -11,12 +11,20 @@ import {
11
11
  type AgentProvider,
12
12
  type AgentResult,
13
13
  } from "../agent";
14
+ import { assertGuardrail } from "../guardrail";
15
+ import { applyStatusUpdate, idsMatchExactly, sortedValues } from "../progress-utils";
14
16
  import { exists, FLOW_REL_DIR, readState, writeState } from "../state";
15
- import { TestPlanSchema, type TestPlan } from "../../schemas/test-plan";
17
+ import { writeJsonArtifact, type WriteJsonArtifactFn } from "../write-json-artifact";
18
+ import { TestPlanSchema, type TestPlan } from "../../scaffold/schemas/tmpl_test-plan";
19
+ import {
20
+ TestExecutionProgressSchema,
21
+ type TestExecutionProgress,
22
+ } from "../../scaffold/schemas/tmpl_test-execution-progress";
16
23
  import { extractJson } from "./create-issue";
17
24
 
18
25
  export interface ExecuteTestPlanOptions {
19
26
  provider: AgentProvider;
27
+ force?: boolean;
20
28
  }
21
29
 
22
30
  const ExecutionPayloadSchema = z.object({
@@ -38,24 +46,6 @@ const BatchResultSchema = z.array(BatchResultItemSchema);
38
46
 
39
47
  type BatchResultItem = z.infer<typeof BatchResultItemSchema>;
40
48
 
41
- const TestExecutionProgressStatusSchema = z.enum(["pending", "in_progress", "passed", "failed"]);
42
-
43
- const TestExecutionProgressEntrySchema = z.object({
44
- id: z.string(),
45
- type: z.enum(["automated", "exploratory_manual"]),
46
- status: TestExecutionProgressStatusSchema,
47
- attempt_count: z.number().int().nonnegative(),
48
- last_agent_exit_code: z.number().int().nullable(),
49
- last_error_summary: z.string(),
50
- updated_at: z.string(),
51
- });
52
-
53
- const TestExecutionProgressSchema = z.object({
54
- entries: z.array(TestExecutionProgressEntrySchema),
55
- });
56
-
57
- type TestExecutionProgress = z.infer<typeof TestExecutionProgressSchema>;
58
-
59
49
  interface FlatTestCase {
60
50
  id: string;
61
51
  description: string;
@@ -131,6 +121,7 @@ interface ExecuteTestPlanDeps {
131
121
  promptManualTestFn: (testCase: FlatTestCase) => Promise<ManualTestUserInput>;
132
122
  readFileFn: typeof readFile;
133
123
  writeFileFn: typeof Bun.write;
124
+ writeJsonArtifactFn: WriteJsonArtifactFn;
134
125
  }
135
126
 
136
127
  const defaultDeps: ExecuteTestPlanDeps = {
@@ -142,6 +133,7 @@ const defaultDeps: ExecuteTestPlanDeps = {
142
133
  promptManualTestFn: promptManualTest,
143
134
  readFileFn: readFile,
144
135
  writeFileFn: Bun.write,
136
+ writeJsonArtifactFn: writeJsonArtifact,
145
137
  };
146
138
 
147
139
  function flattenTests(testPlan: TestPlan): FlatTestCase[] {
@@ -195,24 +187,6 @@ function derivePassFail(status: ExecutionPayload["status"]): "pass" | "fail" | n
195
187
  return null;
196
188
  }
197
189
 
198
- function sortedValues(values: string[]): string[] {
199
- return [...values].sort((a, b) => a.localeCompare(b));
200
- }
201
-
202
- function idsMatchExactly(left: string[], right: string[]): boolean {
203
- if (left.length !== right.length) {
204
- return false;
205
- }
206
-
207
- for (let i = 0; i < left.length; i += 1) {
208
- if (left[i] !== right[i]) {
209
- return false;
210
- }
211
- }
212
-
213
- return true;
214
- }
215
-
216
190
  function toArtifactSafeSegment(value: string): string {
217
191
  return value.replace(/[^a-zA-Z0-9_-]/g, "_");
218
192
  }
@@ -229,15 +203,16 @@ function buildMarkdownReport(report: TestExecutionReport): string {
229
203
  const failedCount = totalTests - passedCount;
230
204
 
231
205
  const lines = [
232
- `# Test Execution Report (Iteration ${report.iteration})`,
206
+ "# Test Execution Report",
233
207
  "",
234
- `- Test Plan: \`${report.testPlanFile}\``,
235
- `- Total Tests: ${totalTests}`,
236
- `- Passed: ${passedCount}`,
237
- `- Failed: ${failedCount}`,
208
+ `**Iteration:** it_${report.iteration}`,
209
+ `**Test Plan:** \`${report.testPlanFile}\``,
210
+ `**Total:** ${totalTests}`,
211
+ `**Passed:** ${passedCount}`,
212
+ `**Failed:** ${failedCount}`,
238
213
  "",
239
214
  "| Test ID | Description | Status | Correlated Requirements | Artifacts |",
240
- "| --- | --- | --- | --- | --- |",
215
+ "|---------|-------------|--------|------------------------|-----------|",
241
216
  ];
242
217
 
243
218
  for (const result of report.results) {
@@ -321,13 +296,15 @@ export async function runExecuteTestPlan(
321
296
  const projectRoot = process.cwd();
322
297
  const mergedDeps: ExecuteTestPlanDeps = { ...defaultDeps, ...deps };
323
298
  const state = await readState(projectRoot);
299
+ const force = opts.force ?? false;
324
300
 
325
301
  const tpGeneration = state.phases.prototype.tp_generation;
326
- if (tpGeneration.status !== "created") {
327
- throw new Error(
328
- `Cannot execute test plan: prototype.tp_generation.status must be created. Current status: '${tpGeneration.status}'. Run \`bun nvst approve test-plan\` first.`,
329
- );
330
- }
302
+ await assertGuardrail(
303
+ state,
304
+ tpGeneration.status !== "created",
305
+ `Cannot execute test plan: prototype.tp_generation.status must be created. Current status: '${tpGeneration.status}'. Run \`bun nvst approve test-plan\` first.`,
306
+ { force },
307
+ );
331
308
 
332
309
  if (!tpGeneration.file) {
333
310
  throw new Error("Cannot execute test plan: prototype.tp_generation.file is missing.");
@@ -437,7 +414,7 @@ export async function runExecuteTestPlan(
437
414
  const executedTestIds: string[] = [];
438
415
 
439
416
  const writeProgress = async () => {
440
- await mergedDeps.writeFileFn(progressPath, `${JSON.stringify(progress, null, 2)}\n`);
417
+ await mergedDeps.writeJsonArtifactFn(progressPath, TestExecutionProgressSchema, progress);
441
418
  };
442
419
 
443
420
  await mergedDeps.mkdirFn(join(projectRoot, FLOW_REL_DIR), { recursive: true });
@@ -456,8 +433,7 @@ export async function runExecuteTestPlan(
456
433
  for (const tc of pendingAutomatedTests) {
457
434
  const entry = progress.entries.find((e) => e.id === tc.id);
458
435
  if (entry) {
459
- entry.status = "in_progress";
460
- entry.updated_at = new Date().toISOString();
436
+ applyStatusUpdate(entry, "in_progress", new Date().toISOString());
461
437
  }
462
438
  }
463
439
  await writeProgress();
@@ -606,8 +582,7 @@ export async function runExecuteTestPlan(
606
582
  continue;
607
583
  }
608
584
 
609
- progressEntry.status = "in_progress";
610
- progressEntry.updated_at = new Date().toISOString();
585
+ applyStatusUpdate(progressEntry, "in_progress", new Date().toISOString());
611
586
  await writeProgress();
612
587
 
613
588
  const userInput = await mergedDeps.promptManualTestFn(testCase);
@@ -626,8 +601,7 @@ export async function runExecuteTestPlan(
626
601
  progressEntry.attempt_count += 1;
627
602
  progressEntry.last_agent_exit_code = null;
628
603
  progressEntry.last_error_summary = payload.status === "passed" ? "" : payload.notes;
629
- progressEntry.status = payload.status === "passed" ? "passed" : "failed";
630
- progressEntry.updated_at = new Date().toISOString();
604
+ applyStatusUpdate(progressEntry, payload.status === "passed" ? "passed" : "failed", new Date().toISOString());
631
605
  await writeProgress();
632
606
 
633
607
  await mergedDeps.writeFileFn(
@@ -0,0 +1,79 @@
1
+ export type FlowHandlerKey =
2
+ | "runCreateProjectContextFn"
3
+ | "runCreatePrototypeFn"
4
+ | "runCreateTestPlanFn"
5
+ | "runDefineRefactorPlanFn"
6
+ | "runDefineRequirementFn"
7
+ | "runExecuteRefactorFn"
8
+ | "runExecuteTestPlanFn";
9
+
10
+ type FlowStepDefinition = {
11
+ id: string;
12
+ label: string;
13
+ requiresAgent: boolean;
14
+ handlerKey: FlowHandlerKey;
15
+ };
16
+
17
+ export const FLOW_STEPS = {
18
+ "define-requirement": {
19
+ id: "define-requirement",
20
+ label: "define requirement",
21
+ requiresAgent: true,
22
+ handlerKey: "runDefineRequirementFn",
23
+ },
24
+ "create-project-context": {
25
+ id: "create-project-context",
26
+ label: "create project-context",
27
+ requiresAgent: true,
28
+ handlerKey: "runCreateProjectContextFn",
29
+ },
30
+ "create-prototype": {
31
+ id: "create-prototype",
32
+ label: "create prototype",
33
+ requiresAgent: true,
34
+ handlerKey: "runCreatePrototypeFn",
35
+ },
36
+ "create-test-plan": {
37
+ id: "create-test-plan",
38
+ label: "create test-plan",
39
+ requiresAgent: true,
40
+ handlerKey: "runCreateTestPlanFn",
41
+ },
42
+ "execute-test-plan": {
43
+ id: "execute-test-plan",
44
+ label: "execute test-plan",
45
+ requiresAgent: true,
46
+ handlerKey: "runExecuteTestPlanFn",
47
+ },
48
+ "define-refactor-plan": {
49
+ id: "define-refactor-plan",
50
+ label: "define refactor-plan",
51
+ requiresAgent: true,
52
+ handlerKey: "runDefineRefactorPlanFn",
53
+ },
54
+ "execute-refactor": {
55
+ id: "execute-refactor",
56
+ label: "execute refactor",
57
+ requiresAgent: true,
58
+ handlerKey: "runExecuteRefactorFn",
59
+ },
60
+ } as const satisfies Record<string, FlowStepDefinition>;
61
+
62
+ export type FlowStepId = keyof typeof FLOW_STEPS;
63
+ export type FlowStep = (typeof FLOW_STEPS)[FlowStepId];
64
+
65
+ export const FLOW_APPROVAL_TARGETS = {
66
+ requirement: "requirement",
67
+ projectContext: "project-context",
68
+ testPlan: "test-plan",
69
+ prototype: "prototype",
70
+ refactorPlan: "refactor-plan",
71
+ } as const;
72
+
73
+ export type FlowApprovalTarget = (typeof FLOW_APPROVAL_TARGETS)[keyof typeof FLOW_APPROVAL_TARGETS];
74
+
75
+ export const FLOW_APPROVAL_GATE_PREFIX = "Waiting for approval. Run: nvst approve";
76
+
77
+ export function buildApprovalGateMessage(target: FlowApprovalTarget): string {
78
+ return `${FLOW_APPROVAL_GATE_PREFIX} ${target} to continue, then re-run nvst flow.`;
79
+ }