npm - @quinteroac/agents-coding-toolkit - Versions diffs - 0.1.0-preview → 0.2.0 - Mend

@quinteroac/agents-coding-toolkit 0.1.0-preview → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

package/README.md +29 -15
package/package.json +14 -4
package/scaffold/.agents/flow/tmpl_it_000001_progress.example.json +20 -0
package/scaffold/.agents/skills/execute-refactor-item/tmpl_SKILL.md +59 -0
package/scaffold/.agents/skills/plan-refactor/tmpl_SKILL.md +89 -9
package/scaffold/.agents/skills/refine-refactor-plan/tmpl_SKILL.md +30 -0
package/scaffold/.agents/tmpl_state_rules.md +0 -1
package/scaffold/schemas/tmpl_prototype-progress.ts +22 -0
package/scaffold/schemas/tmpl_refactor-execution-progress.ts +16 -0
package/scaffold/schemas/tmpl_refactor-prd.ts +14 -0
package/scaffold/schemas/tmpl_state.ts +1 -0
package/scaffold/schemas/tmpl_test-execution-progress.ts +17 -0
package/schemas/issues.ts +19 -0
package/schemas/prototype-progress.ts +22 -0
package/schemas/refactor-execution-progress.ts +16 -0
package/schemas/refactor-prd.ts +14 -0
package/schemas/state.test.ts +58 -0
package/schemas/state.ts +1 -0
package/schemas/test-execution-progress.ts +17 -0
package/schemas/test-plan.test.ts +1 -1
package/schemas/validate-progress.ts +1 -1
package/schemas/validate-state.ts +1 -1
package/src/cli.test.ts +57 -0
package/src/cli.ts +227 -58
package/src/commands/approve-project-context.ts +13 -6
package/src/commands/approve-prototype.test.ts +427 -0
package/src/commands/approve-prototype.ts +185 -0
package/src/commands/approve-refactor-plan.test.ts +254 -0
package/src/commands/approve-refactor-plan.ts +200 -0
package/src/commands/approve-requirement.test.ts +224 -0
package/src/commands/approve-requirement.ts +75 -16
package/src/commands/approve-test-plan.test.ts +2 -2
package/src/commands/approve-test-plan.ts +21 -7
package/src/commands/create-issue.test.ts +2 -2
package/src/commands/create-project-context.ts +31 -25
package/src/commands/create-prototype.test.ts +488 -18
package/src/commands/create-prototype.ts +185 -63
package/src/commands/create-test-plan.ts +8 -6
package/src/commands/define-refactor-plan.test.ts +208 -0
package/src/commands/define-refactor-plan.ts +96 -0
package/src/commands/define-requirement.ts +15 -9
package/src/commands/execute-automated-fix.test.ts +78 -33
package/src/commands/execute-automated-fix.ts +34 -101
package/src/commands/execute-refactor.test.ts +954 -0
package/src/commands/execute-refactor.ts +332 -0
package/src/commands/execute-test-plan.test.ts +24 -16
package/src/commands/execute-test-plan.ts +29 -55
package/src/commands/flow-config.ts +79 -0
package/src/commands/flow.test.ts +755 -0
package/src/commands/flow.ts +405 -0
package/src/commands/refine-project-context.ts +9 -7
package/src/commands/refine-refactor-plan.test.ts +210 -0
package/src/commands/refine-refactor-plan.ts +95 -0
package/src/commands/refine-requirement.ts +9 -6
package/src/commands/refine-test-plan.test.ts +2 -2
package/src/commands/refine-test-plan.ts +9 -6
package/src/commands/start-iteration.test.ts +52 -0
package/src/commands/start-iteration.ts +5 -0
package/src/commands/write-json.ts +102 -97
package/src/flow-cli.test.ts +18 -0
package/src/force-flag.test.ts +144 -0
package/src/guardrail.test.ts +411 -0
package/src/guardrail.ts +82 -0
package/src/install.test.ts +7 -5
package/src/pack.test.ts +2 -1
package/src/progress-utils.ts +34 -0
package/src/readline.ts +23 -0
package/src/write-json-artifact.ts +33 -0
package/scaffold/.agents/flow/tmpl_README.md +0 -7
package/scaffold/.agents/flow/tmpl_iteration_close_checklist.example.md +0 -11
package/schemas/test-plan.ts +0 -20

package/src/commands/execute-refactor.ts ADDED Viewed

@@ -0,0 +1,332 @@
+import { $ } from "bun";
+import { readFile, writeFile } from "node:fs/promises";
+import { join } from "node:path";
+import { RefactorPrdSchema } from "../../scaffold/schemas/tmpl_refactor-prd";
+import {
+  RefactorExecutionProgressSchema,
+  type RefactorExecutionProgress,
+} from "../../scaffold/schemas/tmpl_refactor-execution-progress";
+import {
+  buildPrompt,
+  invokeAgent,
+  loadSkill,
+  type AgentInvokeOptions,
+  type AgentProvider,
+  type AgentResult,
+} from "../agent";
+import { CLI_PATH } from "../cli-path";
+import { assertGuardrail } from "../guardrail";
+import { applyStatusUpdate, idsMatchExactly, sortedValues } from "../progress-utils";
+import { exists, FLOW_REL_DIR, readState, writeState } from "../state";
+export interface ExecuteRefactorOptions {
+  provider: AgentProvider;
+  force?: boolean;
+}
+export { RefactorExecutionProgressSchema };
+export type { RefactorExecutionProgress };
+interface WriteJsonResult {
+  exitCode: number;
+  stderr: string;
+}
+interface ExecuteRefactorDeps {
+  existsFn: (path: string) => Promise<boolean>;
+  invokeAgentFn: (options: AgentInvokeOptions) => Promise<AgentResult>;
+  invokeWriteJsonFn: (
+    projectRoot: string,
+    schemaName: string,
+    outPath: string,
+    data: string,
+  ) => Promise<WriteJsonResult>;
+  loadSkillFn: (projectRoot: string, skillName: string) => Promise<string>;
+  logFn: (message: string) => void;
+  nowFn: () => Date;
+  readFileFn: typeof readFile;
+  writeFileFn: typeof writeFile;
+}
+async function runWriteJsonCommand(
+  projectRoot: string,
+  schemaName: string,
+  outPath: string,
+  data: string,
+): Promise<WriteJsonResult> {
+  const result =
+    await $`bun ${CLI_PATH} write-json --schema ${schemaName} --out ${outPath} --data ${data}`
+      .cwd(projectRoot)
+      .nothrow()
+      .quiet();
+  return {
+    exitCode: result.exitCode,
+    stderr: result.stderr.toString().trim(),
+  };
+}
+const defaultDeps: ExecuteRefactorDeps = {
+  existsFn: exists,
+  invokeAgentFn: invokeAgent,
+  invokeWriteJsonFn: runWriteJsonCommand,
+  loadSkillFn: loadSkill,
+  logFn: console.log,
+  nowFn: () => new Date(),
+  readFileFn: readFile,
+  writeFileFn: writeFile,
+};
+export async function runExecuteRefactor(
+  opts: ExecuteRefactorOptions,
+  deps: Partial<ExecuteRefactorDeps> = {},
+): Promise<void> {
+  const mergedDeps: ExecuteRefactorDeps = { ...defaultDeps, ...deps };
+  const force = opts.force ?? false;
+  const projectRoot = process.cwd();
+  const state = await readState(projectRoot);
+  // AC02: Reject if current_phase !== "refactor"
+  await assertGuardrail(
+    state,
+    state.current_phase !== "refactor",
+    `Cannot execute refactor: current_phase must be 'refactor'. Current phase: '${state.current_phase}'.`,
+    { force },
+  );
+  // AC03: Reject if refactor_plan.status !== "approved"
+  await assertGuardrail(
+    state,
+    state.phases.refactor.refactor_plan.status !== "approved",
+    `Cannot execute refactor: refactor_plan.status must be 'approved'. Current status: '${state.phases.refactor.refactor_plan.status}'. Run \`bun nvst approve refactor-plan\` first.`,
+    { force },
+  );
+  // AC04: Reject if refactor_execution.status is already "completed"
+  await assertGuardrail(
+    state,
+    state.phases.refactor.refactor_execution.status === "completed",
+    "Cannot execute refactor: refactor_execution.status is already 'completed'.",
+    { force },
+  );
+  // AC05: Read and validate refactor-prd.json
+  const iteration = state.current_iteration;
+  const refactorPrdFileName = `it_${iteration}_refactor-prd.json`;
+  const refactorPrdPath = join(projectRoot, FLOW_REL_DIR, refactorPrdFileName);
+  if (!(await mergedDeps.existsFn(refactorPrdPath))) {
+    throw new Error(
+      `Refactor PRD file missing: expected ${join(FLOW_REL_DIR, refactorPrdFileName)}. Run \`bun nvst approve refactor-plan\` first.`,
+    );
+  }
+  let parsedPrd: unknown;
+  try {
+    parsedPrd = JSON.parse(await mergedDeps.readFileFn(refactorPrdPath, "utf8"));
+  } catch {
+    throw new Error(
+      `Invalid refactor PRD JSON in ${join(FLOW_REL_DIR, refactorPrdFileName)}.`,
+    );
+  }
+  const prdValidation = RefactorPrdSchema.safeParse(parsedPrd);
+  if (!prdValidation.success) {
+    throw new Error(
+      `Refactor PRD schema mismatch in ${join(FLOW_REL_DIR, refactorPrdFileName)}.`,
+    );
+  }
+  const refactorItems = prdValidation.data.refactorItems;
+  // Load skill
+  let skillTemplate: string;
+  try {
+    skillTemplate = await mergedDeps.loadSkillFn(projectRoot, "execute-refactor-item");
+  } catch {
+    throw new Error(
+      "Required skill missing: expected .agents/skills/execute-refactor-item/SKILL.md.",
+    );
+  }
+  // AC13: Progress file name
+  const progressFileName = `it_${iteration}_refactor-execution-progress.json`;
+  const progressPath = join(projectRoot, FLOW_REL_DIR, progressFileName);
+  // AC06: Set refactor_execution.status = "in_progress" before processing
+  // AC13: Set refactor_execution.file
+  state.phases.refactor.refactor_execution.status = "in_progress";
+  state.phases.refactor.refactor_execution.file = progressFileName;
+  state.last_updated = mergedDeps.nowFn().toISOString();
+  state.updated_by = "nvst:execute-refactor";
+  await writeState(projectRoot, state);
+  // Initialize or load progress file
+  let progressData: RefactorExecutionProgress;
+  if (await mergedDeps.existsFn(progressPath)) {
+    let parsedProgress: unknown;
+    try {
+      parsedProgress = JSON.parse(await mergedDeps.readFileFn(progressPath, "utf8"));
+    } catch {
+      throw new Error(
+        `Invalid progress JSON in ${join(FLOW_REL_DIR, progressFileName)}.`,
+      );
+    }
+    const progressValidation = RefactorExecutionProgressSchema.safeParse(parsedProgress);
+    if (!progressValidation.success) {
+      throw new Error(
+        `Progress schema mismatch in ${join(FLOW_REL_DIR, progressFileName)}.`,
+      );
+    }
+    // AC05: Verify progress item IDs match refactor PRD item IDs
+    const expectedIds = sortedValues(refactorItems.map((item) => item.id));
+    const existingIds = sortedValues(progressValidation.data.entries.map((entry) => entry.id));
+    if (!idsMatchExactly(existingIds, expectedIds)) {
+      throw new Error(
+        "Refactor execution progress file out of sync: entry ids do not match refactor PRD item ids.",
+      );
+    }
+    progressData = progressValidation.data;
+  } else {
+    const now = mergedDeps.nowFn().toISOString();
+    progressData = {
+      entries: refactorItems.map((item) => ({
+        id: item.id,
+        title: item.title,
+        status: "pending" as const,
+        attempt_count: 0,
+        last_agent_exit_code: null,
+        updated_at: now,
+      })),
+    };
+    const writeResult = await mergedDeps.invokeWriteJsonFn(
+      projectRoot,
+      "refactor-execution-progress",
+      join(FLOW_REL_DIR, progressFileName),
+      JSON.stringify(progressData),
+    );
+    if (writeResult.exitCode !== 0) {
+      throw new Error(
+        `Failed to write refactor execution progress: ${writeResult.stderr || "write-json exited non-zero"}.`,
+      );
+    }
+  }
+  // AC07, AC08, AC09, AC10: Process each item in order
+  for (const item of refactorItems) {
+    const entry = progressData.entries.find((e) => e.id === item.id);
+    if (!entry || entry.status === "completed") {
+      continue;
+    }
+    // Set current item to in_progress before invoking agent (FR-4; observability on interrupt)
+    applyStatusUpdate(entry, "in_progress", mergedDeps.nowFn().toISOString());
+    const writeInProgressResult = await mergedDeps.invokeWriteJsonFn(
+      projectRoot,
+      "refactor-execution-progress",
+      join(FLOW_REL_DIR, progressFileName),
+      JSON.stringify(progressData),
+    );
+    if (writeInProgressResult.exitCode !== 0) {
+      throw new Error(
+        `Failed to write refactor execution progress: ${writeInProgressResult.stderr || "write-json exited non-zero"}.`,
+      );
+    }
+    // AC07: Build prompt with skill and item context (FR-6 variable names)
+    const prompt = buildPrompt(skillTemplate, {
+      current_iteration: iteration,
+      item_id: item.id,
+      item_title: item.title,
+      item_description: item.description,
+      item_rationale: item.rationale,
+    });
+    // US-002-AC01: Invoke agent in non-interactive mode (autonomous execution)
+    const agentResult = await mergedDeps.invokeAgentFn({
+      provider: opts.provider,
+      prompt,
+      cwd: projectRoot,
+      interactive: false,
+    });
+    // AC09 & AC10: Record result after each invocation, continue on failure
+    const succeeded = agentResult.exitCode === 0;
+    entry.attempt_count = entry.attempt_count + 1;
+    entry.last_agent_exit_code = agentResult.exitCode;
+    applyStatusUpdate(entry, succeeded ? "completed" : "failed", mergedDeps.nowFn().toISOString());
+    const writeResult = await mergedDeps.invokeWriteJsonFn(
+      projectRoot,
+      "refactor-execution-progress",
+      join(FLOW_REL_DIR, progressFileName),
+      JSON.stringify(progressData),
+    );
+    if (writeResult.exitCode !== 0) {
+      throw new Error(
+        `Failed to write refactor execution progress: ${writeResult.stderr || "write-json exited non-zero"}.`,
+      );
+    }
+    mergedDeps.logFn(
+      `iteration=it_${iteration} item=${item.id} outcome=${entry.status}`,
+    );
+  }
+  // US-003: Generate markdown execution report (written regardless of failures)
+  const reportFileName = `it_${iteration}_refactor-execution-report.md`;
+  const reportPath = join(projectRoot, FLOW_REL_DIR, reportFileName);
+  const reportContent = buildRefactorExecutionReport(iteration, progressData);
+  await mergedDeps.writeFileFn(reportPath, reportContent, "utf8");
+  // AC11 & AC12: Update state based on overall result
+  const allCompleted = progressData.entries.every((entry) => entry.status === "completed");
+  if (allCompleted) {
+    // AC11: All completed → set status to "completed"
+    state.phases.refactor.refactor_execution.status = "completed";
+  }
+  // AC12: Any failure → stays "in_progress" (already set above)
+  state.last_updated = mergedDeps.nowFn().toISOString();
+  state.updated_by = "nvst:execute-refactor";
+  await writeState(projectRoot, state);
+  if (allCompleted) {
+    mergedDeps.logFn("Refactor execution completed for all items.");
+  } else {
+    mergedDeps.logFn("Refactor execution paused with remaining pending or failed items.");
+  }
+}
+export function buildRefactorExecutionReport(
+  iteration: string,
+  progress: RefactorExecutionProgress,
+): string {
+  const total = progress.entries.length;
+  const completed = progress.entries.filter((e) => e.status === "completed").length;
+  const failed = progress.entries.filter((e) => e.status === "failed").length;
+  const tableRows = progress.entries
+    .map((e) => {
+      const exitCode = e.last_agent_exit_code === null ? "N/A" : String(e.last_agent_exit_code);
+      return `| ${e.id} | ${e.title} | ${e.status} | ${exitCode} |`;
+    })
+    .join("\n");
+  return `# Refactor Execution Report
+**Iteration:** it_${iteration}
+**Total:** ${total}
+**Completed:** ${completed}
+**Failed:** ${failed}
+| RI ID | Title | Status | Agent Exit Code |
+|-------|-------|--------|-----------------|
+${tableRows}
+`;
+}

package/src/commands/execute-test-plan.test.ts CHANGED Viewed

@@ -114,7 +114,7 @@ describe("execute test-plan command", () => {
     expect(source).toContain("if (command === \"execute\") {");
     expect(source).toContain('if (subcommand === "test-plan") {');
     expect(source).toContain("const { provider, remainingArgs: postAgentArgs } = parseAgentArg(args.slice(1));");
-    expect(source).toContain("await runExecuteTestPlan({ provider });");
+    expect(source).toContain("await runExecuteTestPlan({ provider, force });");
     expect(source).toContain("execute test-plan --agent <provider>");
   });
@@ -317,13 +317,15 @@ describe("execute test-plan command", () => {
       join(projectRoot, ".agents", "flow", "it_000005_test-execution-report.md"),
       "utf8",
     );
-    expect(markdownReportRaw).toContain("# Test Execution Report (Iteration 000005)");
-    expect(markdownReportRaw).toContain("- Total Tests: 3");
-    expect(markdownReportRaw).toContain("- Passed: 3");
-    expect(markdownReportRaw).toContain("- Failed: 0");
+    expect(markdownReportRaw).toContain("# Test Execution Report");
+    expect(markdownReportRaw).toContain("**Iteration:** it_000005");
+    expect(markdownReportRaw).toContain("**Total:** 3");
+    expect(markdownReportRaw).toContain("**Passed:** 3");
+    expect(markdownReportRaw).toContain("**Failed:** 0");
     const state = await readState(projectRoot);
     expect(state.phases.prototype.test_execution.status).toBe("completed");
+    expect(state.phases.prototype.prototype_approved).toBe(false);
     expect(state.updated_by).toBe("nvst:execute-test-plan");
   });
@@ -537,6 +539,10 @@ describe("execute test-plan command", () => {
       expect(rerunBatchPrompt).not.toContain("TC-US001-01");
     });
+    // After retry, all pass -> test execution completed but prototype_approved requires explicit approve
+    const stateAfterRetry = await readState(projectRoot);
+    expect(stateAfterRetry.phases.prototype.prototype_approved).toBe(false);
     const progressRaw = await readFile(
       join(projectRoot, ".agents", "flow", "it_000005_test-execution-progress.json"),
       "utf8",
@@ -671,13 +677,12 @@ describe("execute test-plan command", () => {
           promptManualTestFn: async () => {
             return { status: "passed", evidence: "ok", notes: "ok" };
           },
-          writeFileFn: async (path, data) => {
+          writeJsonArtifactFn: async (path, _schema, data) => {
             const pathAsString = path.toString();
             if (pathAsString.endsWith("it_000005_test-execution-progress.json")) {
-              progressSnapshots.push(data.toString());
+              progressSnapshots.push(JSON.stringify(data, null, 2));
             }
-            await writeFile(pathAsString, data.toString(), "utf8");
-            return 0;
+            await writeFile(pathAsString, `${JSON.stringify(data, null, 2)}\n`, "utf8");
           },
         },
       );
@@ -1673,13 +1678,14 @@ describe("US-004: preserve report and state tracking compatibility", () => {
       "utf8",
     );
-    expect(markdownRaw).toContain("# Test Execution Report (Iteration 000005)");
-    expect(markdownRaw).toContain("- Test Plan: `it_000005_TP.json`");
-    expect(markdownRaw).toContain("- Total Tests: 3");
-    expect(markdownRaw).toContain("- Passed: 1");
-    expect(markdownRaw).toContain("- Failed: 2");
+    expect(markdownRaw).toContain("# Test Execution Report");
+    expect(markdownRaw).toContain("**Iteration:** it_000005");
+    expect(markdownRaw).toContain("**Test Plan:** `it_000005_TP.json`");
+    expect(markdownRaw).toContain("**Total:** 3");
+    expect(markdownRaw).toContain("**Passed:** 1");
+    expect(markdownRaw).toContain("**Failed:** 2");
     expect(markdownRaw).toContain("| Test ID | Description | Status | Correlated Requirements | Artifacts |");
-    expect(markdownRaw).toContain("| --- | --- | --- | --- | --- |");
+    expect(markdownRaw).toContain("|---------|-------------|--------|------------------------|-----------|");
     // All three test cases appear in table
     expect(markdownRaw).toContain("TC-US001-01");
     expect(markdownRaw).toContain("TC-US001-02");
@@ -1736,10 +1742,11 @@ describe("US-004: preserve report and state tracking compatibility", () => {
     expect(stateSnapshots[0]!.status).toBe("in_progress");
     expect(stateSnapshots[0]!.file).toBe("it_000005_test-execution-progress.json");
-    // After execution (all passed): completed
+    // After execution (all passed): completed; prototype_approved requires explicit approve command
     const finalState = await readState(projectRoot);
     expect(finalState.phases.prototype.test_execution.status).toBe("completed");
     expect(finalState.phases.prototype.test_execution.file).toBe("it_000005_test-execution-progress.json");
+    expect(finalState.phases.prototype.prototype_approved).toBe(false);
     expect(finalState.updated_by).toBe("nvst:execute-test-plan");
   });
@@ -1779,6 +1786,7 @@ describe("US-004: preserve report and state tracking compatibility", () => {
     const finalState = await readState(projectRoot);
     expect(finalState.phases.prototype.test_execution.status).toBe("failed");
+    expect(finalState.phases.prototype.prototype_approved).toBe(false);
     expect(finalState.phases.prototype.test_execution.file).toBe("it_000005_test-execution-progress.json");
     expect(finalState.updated_by).toBe("nvst:execute-test-plan");
   });

package/src/commands/execute-test-plan.ts CHANGED Viewed

@@ -11,12 +11,20 @@ import {
   type AgentProvider,
   type AgentResult,
 } from "../agent";
+import { assertGuardrail } from "../guardrail";
+import { applyStatusUpdate, idsMatchExactly, sortedValues } from "../progress-utils";
 import { exists, FLOW_REL_DIR, readState, writeState } from "../state";
-import { TestPlanSchema, type TestPlan } from "../../schemas/test-plan";
+import { writeJsonArtifact, type WriteJsonArtifactFn } from "../write-json-artifact";
+import { TestPlanSchema, type TestPlan } from "../../scaffold/schemas/tmpl_test-plan";
+import {
+  TestExecutionProgressSchema,
+  type TestExecutionProgress,
+} from "../../scaffold/schemas/tmpl_test-execution-progress";
 import { extractJson } from "./create-issue";
 export interface ExecuteTestPlanOptions {
   provider: AgentProvider;
+  force?: boolean;
 }
 const ExecutionPayloadSchema = z.object({
@@ -38,24 +46,6 @@ const BatchResultSchema = z.array(BatchResultItemSchema);
 type BatchResultItem = z.infer<typeof BatchResultItemSchema>;
-const TestExecutionProgressStatusSchema = z.enum(["pending", "in_progress", "passed", "failed"]);
-const TestExecutionProgressEntrySchema = z.object({
-  id: z.string(),
-  type: z.enum(["automated", "exploratory_manual"]),
-  status: TestExecutionProgressStatusSchema,
-  attempt_count: z.number().int().nonnegative(),
-  last_agent_exit_code: z.number().int().nullable(),
-  last_error_summary: z.string(),
-  updated_at: z.string(),
-});
-const TestExecutionProgressSchema = z.object({
-  entries: z.array(TestExecutionProgressEntrySchema),
-});
-type TestExecutionProgress = z.infer<typeof TestExecutionProgressSchema>;
 interface FlatTestCase {
   id: string;
   description: string;
@@ -131,6 +121,7 @@ interface ExecuteTestPlanDeps {
   promptManualTestFn: (testCase: FlatTestCase) => Promise<ManualTestUserInput>;
   readFileFn: typeof readFile;
   writeFileFn: typeof Bun.write;
+  writeJsonArtifactFn: WriteJsonArtifactFn;
 }
 const defaultDeps: ExecuteTestPlanDeps = {
@@ -142,6 +133,7 @@ const defaultDeps: ExecuteTestPlanDeps = {
   promptManualTestFn: promptManualTest,
   readFileFn: readFile,
   writeFileFn: Bun.write,
+  writeJsonArtifactFn: writeJsonArtifact,
 };
 function flattenTests(testPlan: TestPlan): FlatTestCase[] {
@@ -195,24 +187,6 @@ function derivePassFail(status: ExecutionPayload["status"]): "pass" | "fail" | n
   return null;
 }
-function sortedValues(values: string[]): string[] {
-  return [...values].sort((a, b) => a.localeCompare(b));
-}
-function idsMatchExactly(left: string[], right: string[]): boolean {
-  if (left.length !== right.length) {
-    return false;
-  }
-  for (let i = 0; i < left.length; i += 1) {
-    if (left[i] !== right[i]) {
-      return false;
-    }
-  }
-  return true;
-}
 function toArtifactSafeSegment(value: string): string {
   return value.replace(/[^a-zA-Z0-9_-]/g, "_");
 }
@@ -229,15 +203,16 @@ function buildMarkdownReport(report: TestExecutionReport): string {
   const failedCount = totalTests - passedCount;
   const lines = [
-    `# Test Execution Report (Iteration ${report.iteration})`,
+    "# Test Execution Report",
     "",
-    `- Test Plan: \`${report.testPlanFile}\``,
-    `- Total Tests: ${totalTests}`,
-    `- Passed: ${passedCount}`,
-    `- Failed: ${failedCount}`,
+    `**Iteration:** it_${report.iteration}`,
+    `**Test Plan:** \`${report.testPlanFile}\``,
+    `**Total:** ${totalTests}`,
+    `**Passed:** ${passedCount}`,
+    `**Failed:** ${failedCount}`,
     "",
     "| Test ID | Description | Status | Correlated Requirements | Artifacts |",
-    "| --- | --- | --- | --- | --- |",
+    "|---------|-------------|--------|------------------------|-----------|",
   ];
   for (const result of report.results) {
@@ -321,13 +296,15 @@ export async function runExecuteTestPlan(
   const projectRoot = process.cwd();
   const mergedDeps: ExecuteTestPlanDeps = { ...defaultDeps, ...deps };
   const state = await readState(projectRoot);
+  const force = opts.force ?? false;
   const tpGeneration = state.phases.prototype.tp_generation;
-  if (tpGeneration.status !== "created") {
-    throw new Error(
-      `Cannot execute test plan: prototype.tp_generation.status must be created. Current status: '${tpGeneration.status}'. Run \`bun nvst approve test-plan\` first.`,
-    );
-  }
+  await assertGuardrail(
+    state,
+    tpGeneration.status !== "created",
+    `Cannot execute test plan: prototype.tp_generation.status must be created. Current status: '${tpGeneration.status}'. Run \`bun nvst approve test-plan\` first.`,
+    { force },
+  );
   if (!tpGeneration.file) {
     throw new Error("Cannot execute test plan: prototype.tp_generation.file is missing.");
@@ -437,7 +414,7 @@ export async function runExecuteTestPlan(
   const executedTestIds: string[] = [];
   const writeProgress = async () => {
-    await mergedDeps.writeFileFn(progressPath, `${JSON.stringify(progress, null, 2)}\n`);
+    await mergedDeps.writeJsonArtifactFn(progressPath, TestExecutionProgressSchema, progress);
   };
   await mergedDeps.mkdirFn(join(projectRoot, FLOW_REL_DIR), { recursive: true });
@@ -456,8 +433,7 @@ export async function runExecuteTestPlan(
     for (const tc of pendingAutomatedTests) {
       const entry = progress.entries.find((e) => e.id === tc.id);
       if (entry) {
-        entry.status = "in_progress";
-        entry.updated_at = new Date().toISOString();
+        applyStatusUpdate(entry, "in_progress", new Date().toISOString());
       }
     }
     await writeProgress();
@@ -606,8 +582,7 @@ export async function runExecuteTestPlan(
       continue;
     }
-    progressEntry.status = "in_progress";
-    progressEntry.updated_at = new Date().toISOString();
+    applyStatusUpdate(progressEntry, "in_progress", new Date().toISOString());
     await writeProgress();
     const userInput = await mergedDeps.promptManualTestFn(testCase);
@@ -626,8 +601,7 @@ export async function runExecuteTestPlan(
     progressEntry.attempt_count += 1;
     progressEntry.last_agent_exit_code = null;
     progressEntry.last_error_summary = payload.status === "passed" ? "" : payload.notes;
-    progressEntry.status = payload.status === "passed" ? "passed" : "failed";
-    progressEntry.updated_at = new Date().toISOString();
+    applyStatusUpdate(progressEntry, payload.status === "passed" ? "passed" : "failed", new Date().toISOString());
     await writeProgress();
     await mergedDeps.writeFileFn(

package/src/commands/flow-config.ts ADDED Viewed

@@ -0,0 +1,79 @@
+export type FlowHandlerKey =
+  | "runCreateProjectContextFn"
+  | "runCreatePrototypeFn"
+  | "runCreateTestPlanFn"
+  | "runDefineRefactorPlanFn"
+  | "runDefineRequirementFn"
+  | "runExecuteRefactorFn"
+  | "runExecuteTestPlanFn";
+type FlowStepDefinition = {
+  id: string;
+  label: string;
+  requiresAgent: boolean;
+  handlerKey: FlowHandlerKey;
+};
+export const FLOW_STEPS = {
+  "define-requirement": {
+    id: "define-requirement",
+    label: "define requirement",
+    requiresAgent: true,
+    handlerKey: "runDefineRequirementFn",
+  },
+  "create-project-context": {
+    id: "create-project-context",
+    label: "create project-context",
+    requiresAgent: true,
+    handlerKey: "runCreateProjectContextFn",
+  },
+  "create-prototype": {
+    id: "create-prototype",
+    label: "create prototype",
+    requiresAgent: true,
+    handlerKey: "runCreatePrototypeFn",
+  },
+  "create-test-plan": {
+    id: "create-test-plan",
+    label: "create test-plan",
+    requiresAgent: true,
+    handlerKey: "runCreateTestPlanFn",
+  },
+  "execute-test-plan": {
+    id: "execute-test-plan",
+    label: "execute test-plan",
+    requiresAgent: true,
+    handlerKey: "runExecuteTestPlanFn",
+  },
+  "define-refactor-plan": {
+    id: "define-refactor-plan",
+    label: "define refactor-plan",
+    requiresAgent: true,
+    handlerKey: "runDefineRefactorPlanFn",
+  },
+  "execute-refactor": {
+    id: "execute-refactor",
+    label: "execute refactor",
+    requiresAgent: true,
+    handlerKey: "runExecuteRefactorFn",
+  },
+} as const satisfies Record<string, FlowStepDefinition>;
+export type FlowStepId = keyof typeof FLOW_STEPS;
+export type FlowStep = (typeof FLOW_STEPS)[FlowStepId];
+export const FLOW_APPROVAL_TARGETS = {
+  requirement: "requirement",
+  projectContext: "project-context",
+  testPlan: "test-plan",
+  prototype: "prototype",
+  refactorPlan: "refactor-plan",
+} as const;
+export type FlowApprovalTarget = (typeof FLOW_APPROVAL_TARGETS)[keyof typeof FLOW_APPROVAL_TARGETS];
+export const FLOW_APPROVAL_GATE_PREFIX = "Waiting for approval. Run: nvst approve";
+export function buildApprovalGateMessage(target: FlowApprovalTarget): string {
+  return `${FLOW_APPROVAL_GATE_PREFIX} ${target} to continue, then re-run nvst flow.`;
+}