@quinteroac/agents-coding-toolkit 0.1.1-preview.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -15
- package/package.json +2 -1
- package/scaffold/.agents/flow/tmpl_it_000001_progress.example.json +20 -0
- package/scaffold/.agents/skills/execute-refactor-item/tmpl_SKILL.md +5 -5
- package/scaffold/schemas/tmpl_prototype-progress.ts +22 -0
- package/scaffold/schemas/tmpl_test-execution-progress.ts +17 -0
- package/schemas/issues.ts +19 -0
- package/schemas/prototype-progress.ts +22 -0
- package/schemas/test-execution-progress.ts +17 -0
- package/schemas/validate-progress.ts +1 -1
- package/schemas/validate-state.ts +1 -1
- package/src/cli.ts +51 -6
- package/src/commands/approve-prototype.test.ts +427 -0
- package/src/commands/approve-prototype.ts +185 -0
- package/src/commands/create-prototype.test.ts +459 -7
- package/src/commands/create-prototype.ts +168 -56
- package/src/commands/execute-automated-fix.test.ts +78 -33
- package/src/commands/execute-automated-fix.ts +34 -101
- package/src/commands/execute-refactor.test.ts +3 -3
- package/src/commands/execute-refactor.ts +8 -12
- package/src/commands/execute-test-plan.test.ts +20 -19
- package/src/commands/execute-test-plan.ts +19 -52
- package/src/commands/flow-config.ts +79 -0
- package/src/commands/flow.test.ts +755 -0
- package/src/commands/flow.ts +405 -0
- package/src/commands/start-iteration.test.ts +52 -0
- package/src/commands/start-iteration.ts +5 -0
- package/src/flow-cli.test.ts +18 -0
- package/src/guardrail.ts +2 -24
- package/src/progress-utils.ts +34 -0
- package/src/readline.ts +23 -0
- package/src/write-json-artifact.ts +33 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { readFile
|
|
1
|
+
import { readFile } from "node:fs/promises";
|
|
2
2
|
import { join } from "node:path";
|
|
3
3
|
import { $ as dollar } from "bun";
|
|
4
4
|
|
|
@@ -11,7 +11,8 @@ import {
|
|
|
11
11
|
type AgentResult,
|
|
12
12
|
} from "../agent";
|
|
13
13
|
import { exists, FLOW_REL_DIR, readState } from "../state";
|
|
14
|
-
import { type Issue } from "../../scaffold/schemas/tmpl_issues";
|
|
14
|
+
import { type Issue, IssuesSchema } from "../../scaffold/schemas/tmpl_issues";
|
|
15
|
+
import { writeJsonArtifact, type WriteJsonArtifactFn } from "../write-json-artifact";
|
|
15
16
|
|
|
16
17
|
export interface ExecuteAutomatedFixOptions {
|
|
17
18
|
provider: AgentProvider;
|
|
@@ -27,7 +28,7 @@ interface ExecuteAutomatedFixDeps {
|
|
|
27
28
|
nowFn: () => Date;
|
|
28
29
|
readFileFn: typeof readFile;
|
|
29
30
|
runCommitFn: (projectRoot: string, message: string) => Promise<number>;
|
|
30
|
-
|
|
31
|
+
writeJsonArtifactFn: WriteJsonArtifactFn;
|
|
31
32
|
}
|
|
32
33
|
|
|
33
34
|
const defaultDeps: ExecuteAutomatedFixDeps = {
|
|
@@ -44,7 +45,7 @@ const defaultDeps: ExecuteAutomatedFixDeps = {
|
|
|
44
45
|
.quiet();
|
|
45
46
|
return result.exitCode;
|
|
46
47
|
},
|
|
47
|
-
|
|
48
|
+
writeJsonArtifactFn: writeJsonArtifact,
|
|
48
49
|
};
|
|
49
50
|
|
|
50
51
|
function isNetworkErrorText(text: string): boolean {
|
|
@@ -72,106 +73,12 @@ function sortIssuesById(issues: Issue[]): Issue[] {
|
|
|
72
73
|
return [...issues].sort((left, right) => left.id.localeCompare(right.id));
|
|
73
74
|
}
|
|
74
75
|
|
|
75
|
-
const ALLOWED_ISSUE_STATUSES: Set<Issue["status"]> = new Set([
|
|
76
|
-
"open",
|
|
77
|
-
"fixed",
|
|
78
|
-
"retry",
|
|
79
|
-
"manual-fix",
|
|
80
|
-
]);
|
|
81
|
-
|
|
82
|
-
function asRecord(value: unknown): Record<string, unknown> | null {
|
|
83
|
-
if (typeof value !== "object" || value === null || Array.isArray(value)) {
|
|
84
|
-
return null;
|
|
85
|
-
}
|
|
86
|
-
return value as Record<string, unknown>;
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
function parseIssuesForProcessing(
|
|
90
|
-
raw: unknown,
|
|
91
|
-
flowRelativePath: string,
|
|
92
|
-
logFn: (message: string) => void,
|
|
93
|
-
): Issue[] {
|
|
94
|
-
if (!Array.isArray(raw)) {
|
|
95
|
-
throw new Error(
|
|
96
|
-
`Deterministic validation error: issues schema mismatch in ${flowRelativePath}.`,
|
|
97
|
-
);
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
const parsedIssues: Issue[] = [];
|
|
101
|
-
const seenIds = new Set<string>();
|
|
102
|
-
|
|
103
|
-
for (const [index, item] of raw.entries()) {
|
|
104
|
-
const issue = asRecord(item);
|
|
105
|
-
if (!issue) {
|
|
106
|
-
logFn(
|
|
107
|
-
`Warning: Skipping invalid issue at index ${index} in ${flowRelativePath}: expected an object.`,
|
|
108
|
-
);
|
|
109
|
-
continue;
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
const id = issue.id;
|
|
113
|
-
const title = issue.title;
|
|
114
|
-
const description = issue.description;
|
|
115
|
-
const status = issue.status;
|
|
116
|
-
|
|
117
|
-
const missingFields: string[] = [];
|
|
118
|
-
if (typeof id !== "string") {
|
|
119
|
-
missingFields.push("id");
|
|
120
|
-
}
|
|
121
|
-
if (typeof title !== "string") {
|
|
122
|
-
missingFields.push("title");
|
|
123
|
-
}
|
|
124
|
-
if (typeof description !== "string") {
|
|
125
|
-
missingFields.push("description");
|
|
126
|
-
}
|
|
127
|
-
if (typeof status !== "string") {
|
|
128
|
-
missingFields.push("status");
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
if (missingFields.length > 0) {
|
|
132
|
-
logFn(
|
|
133
|
-
`Warning: Skipping issue at index ${index} in ${flowRelativePath}: missing required field(s): ${missingFields.join(", ")}.`,
|
|
134
|
-
);
|
|
135
|
-
continue;
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
const validId = id as string;
|
|
139
|
-
const validTitle = title as string;
|
|
140
|
-
const validDescription = description as string;
|
|
141
|
-
const validStatus = status as Issue["status"];
|
|
142
|
-
|
|
143
|
-
if (!ALLOWED_ISSUE_STATUSES.has(validStatus)) {
|
|
144
|
-
logFn(
|
|
145
|
-
`Warning: Skipping issue ${validId} in ${flowRelativePath}: invalid status '${status}'.`,
|
|
146
|
-
);
|
|
147
|
-
continue;
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
if (seenIds.has(validId)) {
|
|
151
|
-
logFn(
|
|
152
|
-
`Warning: Skipping duplicate issue id '${validId}' in ${flowRelativePath}.`,
|
|
153
|
-
);
|
|
154
|
-
continue;
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
seenIds.add(validId);
|
|
158
|
-
parsedIssues.push({
|
|
159
|
-
id: validId,
|
|
160
|
-
title: validTitle,
|
|
161
|
-
description: validDescription,
|
|
162
|
-
status: validStatus,
|
|
163
|
-
});
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
return parsedIssues;
|
|
167
|
-
}
|
|
168
|
-
|
|
169
76
|
async function writeIssuesFile(
|
|
170
77
|
issuesPath: string,
|
|
171
78
|
issues: Issue[],
|
|
172
79
|
deps: ExecuteAutomatedFixDeps,
|
|
173
80
|
): Promise<void> {
|
|
174
|
-
await deps.
|
|
81
|
+
await deps.writeJsonArtifactFn(issuesPath, IssuesSchema, issues);
|
|
175
82
|
}
|
|
176
83
|
|
|
177
84
|
async function commitIssueUpdate(
|
|
@@ -185,6 +92,25 @@ async function commitIssueUpdate(
|
|
|
185
92
|
return exitCode === 0;
|
|
186
93
|
}
|
|
187
94
|
|
|
95
|
+
/**
|
|
96
|
+
* Guardrail policy: `execute-automated-fix` is an explicit exception to the
|
|
97
|
+
* phase-based guardrail system used by `execute-test-plan` and
|
|
98
|
+
* `execute-refactor`. Those commands assert `current_phase` and prerequisite
|
|
99
|
+
* status fields via `assertGuardrail` before running, because they depend on
|
|
100
|
+
* phase-specific state transitions being in place.
|
|
101
|
+
*
|
|
102
|
+
* `execute-automated-fix` is deliberately phase-independent: issues can exist
|
|
103
|
+
* and require automated remediation at any point in the workflow (prototype or
|
|
104
|
+
* refactor phases, or during reruns after partial fixes). Its sole
|
|
105
|
+
* prerequisite is the existence of a valid issues file for the current
|
|
106
|
+
* iteration, which is already enforced by a hard error below. Adding a
|
|
107
|
+
* phase-based guardrail here would prevent legitimate use cases (e.g. fixing
|
|
108
|
+
* issues discovered late in a refactor pass) without adding safety value.
|
|
109
|
+
*
|
|
110
|
+
* `--force` is therefore not applicable to this command and is not accepted as
|
|
111
|
+
* a flag (any unrecognised option, including `--force`, is rejected by the CLI
|
|
112
|
+
* router before reaching this function).
|
|
113
|
+
*/
|
|
188
114
|
export async function runExecuteAutomatedFix(
|
|
189
115
|
opts: ExecuteAutomatedFixOptions,
|
|
190
116
|
deps: Partial<ExecuteAutomatedFixDeps> = {},
|
|
@@ -224,7 +150,14 @@ export async function runExecuteAutomatedFix(
|
|
|
224
150
|
);
|
|
225
151
|
}
|
|
226
152
|
|
|
227
|
-
const
|
|
153
|
+
const issuesValidation = IssuesSchema.safeParse(parsedIssuesRaw);
|
|
154
|
+
if (!issuesValidation.success) {
|
|
155
|
+
throw new Error(
|
|
156
|
+
`Deterministic validation error: issues schema mismatch in ${flowRelativePath}.`,
|
|
157
|
+
);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
const issues = sortIssuesById(issuesValidation.data);
|
|
228
161
|
const openIssues = issues.filter((issue) => issue.status === "open");
|
|
229
162
|
|
|
230
163
|
if (openIssues.length === 0) {
|
|
@@ -233,7 +166,7 @@ export async function runExecuteAutomatedFix(
|
|
|
233
166
|
}
|
|
234
167
|
|
|
235
168
|
const skillTemplate = await mergedDeps.loadSkillFn(projectRoot, "automated-fix");
|
|
236
|
-
const maxIssuesToProcess = opts.iterations ??
|
|
169
|
+
const maxIssuesToProcess = opts.iterations ?? openIssues.length;
|
|
237
170
|
const issuesToProcess = openIssues.slice(0, maxIssuesToProcess);
|
|
238
171
|
const maxRetries = opts.retryOnFail ?? 0;
|
|
239
172
|
|
|
@@ -283,8 +283,8 @@ describe("execute refactor command", () => {
|
|
|
283
283
|
expect(capturedPrompts[0]).toContain("My Rationale");
|
|
284
284
|
});
|
|
285
285
|
|
|
286
|
-
//
|
|
287
|
-
test("invokes agent
|
|
286
|
+
// US-002-AC01: Agent invoked in non-interactive mode
|
|
287
|
+
test("invokes agent with interactive: false (non-interactive mode)", async () => {
|
|
288
288
|
const projectRoot = await createProjectRoot();
|
|
289
289
|
createdRoots.push(projectRoot);
|
|
290
290
|
await seedState(projectRoot);
|
|
@@ -308,7 +308,7 @@ describe("execute refactor command", () => {
|
|
|
308
308
|
});
|
|
309
309
|
|
|
310
310
|
expect(capturedOptions).toHaveLength(1);
|
|
311
|
-
expect(capturedOptions[0].interactive).toBe(
|
|
311
|
+
expect(capturedOptions[0].interactive).toBe(false);
|
|
312
312
|
expect(capturedOptions[0].provider).toBe("codex");
|
|
313
313
|
});
|
|
314
314
|
|
|
@@ -17,6 +17,7 @@ import {
|
|
|
17
17
|
} from "../agent";
|
|
18
18
|
import { CLI_PATH } from "../cli-path";
|
|
19
19
|
import { assertGuardrail } from "../guardrail";
|
|
20
|
+
import { applyStatusUpdate, idsMatchExactly, sortedValues } from "../progress-utils";
|
|
20
21
|
import { exists, FLOW_REL_DIR, readState, writeState } from "../state";
|
|
21
22
|
|
|
22
23
|
export interface ExecuteRefactorOptions {
|
|
@@ -181,12 +182,9 @@ export async function runExecuteRefactor(
|
|
|
181
182
|
}
|
|
182
183
|
|
|
183
184
|
// AC05: Verify progress item IDs match refactor PRD item IDs
|
|
184
|
-
const expectedIds =
|
|
185
|
-
const existingIds =
|
|
186
|
-
if (
|
|
187
|
-
expectedIds.length !== existingIds.length ||
|
|
188
|
-
expectedIds.some((id, i) => id !== existingIds[i])
|
|
189
|
-
) {
|
|
185
|
+
const expectedIds = sortedValues(refactorItems.map((item) => item.id));
|
|
186
|
+
const existingIds = sortedValues(progressValidation.data.entries.map((entry) => entry.id));
|
|
187
|
+
if (!idsMatchExactly(existingIds, expectedIds)) {
|
|
190
188
|
throw new Error(
|
|
191
189
|
"Refactor execution progress file out of sync: entry ids do not match refactor PRD item ids.",
|
|
192
190
|
);
|
|
@@ -226,8 +224,7 @@ export async function runExecuteRefactor(
|
|
|
226
224
|
}
|
|
227
225
|
|
|
228
226
|
// Set current item to in_progress before invoking agent (FR-4; observability on interrupt)
|
|
229
|
-
entry
|
|
230
|
-
entry.updated_at = mergedDeps.nowFn().toISOString();
|
|
227
|
+
applyStatusUpdate(entry, "in_progress", mergedDeps.nowFn().toISOString());
|
|
231
228
|
const writeInProgressResult = await mergedDeps.invokeWriteJsonFn(
|
|
232
229
|
projectRoot,
|
|
233
230
|
"refactor-execution-progress",
|
|
@@ -249,20 +246,19 @@ export async function runExecuteRefactor(
|
|
|
249
246
|
item_rationale: item.rationale,
|
|
250
247
|
});
|
|
251
248
|
|
|
252
|
-
//
|
|
249
|
+
// US-002-AC01: Invoke agent in non-interactive mode (autonomous execution)
|
|
253
250
|
const agentResult = await mergedDeps.invokeAgentFn({
|
|
254
251
|
provider: opts.provider,
|
|
255
252
|
prompt,
|
|
256
253
|
cwd: projectRoot,
|
|
257
|
-
interactive:
|
|
254
|
+
interactive: false,
|
|
258
255
|
});
|
|
259
256
|
|
|
260
257
|
// AC09 & AC10: Record result after each invocation, continue on failure
|
|
261
258
|
const succeeded = agentResult.exitCode === 0;
|
|
262
|
-
entry.status = succeeded ? "completed" : "failed";
|
|
263
259
|
entry.attempt_count = entry.attempt_count + 1;
|
|
264
260
|
entry.last_agent_exit_code = agentResult.exitCode;
|
|
265
|
-
entry
|
|
261
|
+
applyStatusUpdate(entry, succeeded ? "completed" : "failed", mergedDeps.nowFn().toISOString());
|
|
266
262
|
|
|
267
263
|
const writeResult = await mergedDeps.invokeWriteJsonFn(
|
|
268
264
|
projectRoot,
|
|
@@ -317,14 +317,15 @@ describe("execute test-plan command", () => {
|
|
|
317
317
|
join(projectRoot, ".agents", "flow", "it_000005_test-execution-report.md"),
|
|
318
318
|
"utf8",
|
|
319
319
|
);
|
|
320
|
-
expect(markdownReportRaw).toContain("# Test Execution Report
|
|
321
|
-
expect(markdownReportRaw).toContain("
|
|
322
|
-
expect(markdownReportRaw).toContain("
|
|
323
|
-
expect(markdownReportRaw).toContain("
|
|
320
|
+
expect(markdownReportRaw).toContain("# Test Execution Report");
|
|
321
|
+
expect(markdownReportRaw).toContain("**Iteration:** it_000005");
|
|
322
|
+
expect(markdownReportRaw).toContain("**Total:** 3");
|
|
323
|
+
expect(markdownReportRaw).toContain("**Passed:** 3");
|
|
324
|
+
expect(markdownReportRaw).toContain("**Failed:** 0");
|
|
324
325
|
|
|
325
326
|
const state = await readState(projectRoot);
|
|
326
327
|
expect(state.phases.prototype.test_execution.status).toBe("completed");
|
|
327
|
-
expect(state.phases.prototype.prototype_approved).toBe(
|
|
328
|
+
expect(state.phases.prototype.prototype_approved).toBe(false);
|
|
328
329
|
expect(state.updated_by).toBe("nvst:execute-test-plan");
|
|
329
330
|
});
|
|
330
331
|
|
|
@@ -538,9 +539,9 @@ describe("execute test-plan command", () => {
|
|
|
538
539
|
expect(rerunBatchPrompt).not.toContain("TC-US001-01");
|
|
539
540
|
});
|
|
540
541
|
|
|
541
|
-
// After retry, all pass ->
|
|
542
|
+
// After retry, all pass -> test execution completed but prototype_approved requires explicit approve
|
|
542
543
|
const stateAfterRetry = await readState(projectRoot);
|
|
543
|
-
expect(stateAfterRetry.phases.prototype.prototype_approved).toBe(
|
|
544
|
+
expect(stateAfterRetry.phases.prototype.prototype_approved).toBe(false);
|
|
544
545
|
|
|
545
546
|
const progressRaw = await readFile(
|
|
546
547
|
join(projectRoot, ".agents", "flow", "it_000005_test-execution-progress.json"),
|
|
@@ -676,13 +677,12 @@ describe("execute test-plan command", () => {
|
|
|
676
677
|
promptManualTestFn: async () => {
|
|
677
678
|
return { status: "passed", evidence: "ok", notes: "ok" };
|
|
678
679
|
},
|
|
679
|
-
|
|
680
|
+
writeJsonArtifactFn: async (path, _schema, data) => {
|
|
680
681
|
const pathAsString = path.toString();
|
|
681
682
|
if (pathAsString.endsWith("it_000005_test-execution-progress.json")) {
|
|
682
|
-
progressSnapshots.push(
|
|
683
|
+
progressSnapshots.push(JSON.stringify(data, null, 2));
|
|
683
684
|
}
|
|
684
|
-
await writeFile(pathAsString,
|
|
685
|
-
return 0;
|
|
685
|
+
await writeFile(pathAsString, `${JSON.stringify(data, null, 2)}\n`, "utf8");
|
|
686
686
|
},
|
|
687
687
|
},
|
|
688
688
|
);
|
|
@@ -1678,13 +1678,14 @@ describe("US-004: preserve report and state tracking compatibility", () => {
|
|
|
1678
1678
|
"utf8",
|
|
1679
1679
|
);
|
|
1680
1680
|
|
|
1681
|
-
expect(markdownRaw).toContain("# Test Execution Report
|
|
1682
|
-
expect(markdownRaw).toContain("
|
|
1683
|
-
expect(markdownRaw).toContain("
|
|
1684
|
-
expect(markdownRaw).toContain("
|
|
1685
|
-
expect(markdownRaw).toContain("
|
|
1681
|
+
expect(markdownRaw).toContain("# Test Execution Report");
|
|
1682
|
+
expect(markdownRaw).toContain("**Iteration:** it_000005");
|
|
1683
|
+
expect(markdownRaw).toContain("**Test Plan:** `it_000005_TP.json`");
|
|
1684
|
+
expect(markdownRaw).toContain("**Total:** 3");
|
|
1685
|
+
expect(markdownRaw).toContain("**Passed:** 1");
|
|
1686
|
+
expect(markdownRaw).toContain("**Failed:** 2");
|
|
1686
1687
|
expect(markdownRaw).toContain("| Test ID | Description | Status | Correlated Requirements | Artifacts |");
|
|
1687
|
-
expect(markdownRaw).toContain("
|
|
1688
|
+
expect(markdownRaw).toContain("|---------|-------------|--------|------------------------|-----------|");
|
|
1688
1689
|
// All three test cases appear in table
|
|
1689
1690
|
expect(markdownRaw).toContain("TC-US001-01");
|
|
1690
1691
|
expect(markdownRaw).toContain("TC-US001-02");
|
|
@@ -1741,11 +1742,11 @@ describe("US-004: preserve report and state tracking compatibility", () => {
|
|
|
1741
1742
|
expect(stateSnapshots[0]!.status).toBe("in_progress");
|
|
1742
1743
|
expect(stateSnapshots[0]!.file).toBe("it_000005_test-execution-progress.json");
|
|
1743
1744
|
|
|
1744
|
-
// After execution (all passed): completed
|
|
1745
|
+
// After execution (all passed): completed; prototype_approved requires explicit approve command
|
|
1745
1746
|
const finalState = await readState(projectRoot);
|
|
1746
1747
|
expect(finalState.phases.prototype.test_execution.status).toBe("completed");
|
|
1747
1748
|
expect(finalState.phases.prototype.test_execution.file).toBe("it_000005_test-execution-progress.json");
|
|
1748
|
-
expect(finalState.phases.prototype.prototype_approved).toBe(
|
|
1749
|
+
expect(finalState.phases.prototype.prototype_approved).toBe(false);
|
|
1749
1750
|
expect(finalState.updated_by).toBe("nvst:execute-test-plan");
|
|
1750
1751
|
});
|
|
1751
1752
|
|
|
@@ -12,8 +12,14 @@ import {
|
|
|
12
12
|
type AgentResult,
|
|
13
13
|
} from "../agent";
|
|
14
14
|
import { assertGuardrail } from "../guardrail";
|
|
15
|
+
import { applyStatusUpdate, idsMatchExactly, sortedValues } from "../progress-utils";
|
|
15
16
|
import { exists, FLOW_REL_DIR, readState, writeState } from "../state";
|
|
17
|
+
import { writeJsonArtifact, type WriteJsonArtifactFn } from "../write-json-artifact";
|
|
16
18
|
import { TestPlanSchema, type TestPlan } from "../../scaffold/schemas/tmpl_test-plan";
|
|
19
|
+
import {
|
|
20
|
+
TestExecutionProgressSchema,
|
|
21
|
+
type TestExecutionProgress,
|
|
22
|
+
} from "../../scaffold/schemas/tmpl_test-execution-progress";
|
|
17
23
|
import { extractJson } from "./create-issue";
|
|
18
24
|
|
|
19
25
|
export interface ExecuteTestPlanOptions {
|
|
@@ -40,24 +46,6 @@ const BatchResultSchema = z.array(BatchResultItemSchema);
|
|
|
40
46
|
|
|
41
47
|
type BatchResultItem = z.infer<typeof BatchResultItemSchema>;
|
|
42
48
|
|
|
43
|
-
const TestExecutionProgressStatusSchema = z.enum(["pending", "in_progress", "passed", "failed"]);
|
|
44
|
-
|
|
45
|
-
const TestExecutionProgressEntrySchema = z.object({
|
|
46
|
-
id: z.string(),
|
|
47
|
-
type: z.enum(["automated", "exploratory_manual"]),
|
|
48
|
-
status: TestExecutionProgressStatusSchema,
|
|
49
|
-
attempt_count: z.number().int().nonnegative(),
|
|
50
|
-
last_agent_exit_code: z.number().int().nullable(),
|
|
51
|
-
last_error_summary: z.string(),
|
|
52
|
-
updated_at: z.string(),
|
|
53
|
-
});
|
|
54
|
-
|
|
55
|
-
const TestExecutionProgressSchema = z.object({
|
|
56
|
-
entries: z.array(TestExecutionProgressEntrySchema),
|
|
57
|
-
});
|
|
58
|
-
|
|
59
|
-
type TestExecutionProgress = z.infer<typeof TestExecutionProgressSchema>;
|
|
60
|
-
|
|
61
49
|
interface FlatTestCase {
|
|
62
50
|
id: string;
|
|
63
51
|
description: string;
|
|
@@ -133,6 +121,7 @@ interface ExecuteTestPlanDeps {
|
|
|
133
121
|
promptManualTestFn: (testCase: FlatTestCase) => Promise<ManualTestUserInput>;
|
|
134
122
|
readFileFn: typeof readFile;
|
|
135
123
|
writeFileFn: typeof Bun.write;
|
|
124
|
+
writeJsonArtifactFn: WriteJsonArtifactFn;
|
|
136
125
|
}
|
|
137
126
|
|
|
138
127
|
const defaultDeps: ExecuteTestPlanDeps = {
|
|
@@ -144,6 +133,7 @@ const defaultDeps: ExecuteTestPlanDeps = {
|
|
|
144
133
|
promptManualTestFn: promptManualTest,
|
|
145
134
|
readFileFn: readFile,
|
|
146
135
|
writeFileFn: Bun.write,
|
|
136
|
+
writeJsonArtifactFn: writeJsonArtifact,
|
|
147
137
|
};
|
|
148
138
|
|
|
149
139
|
function flattenTests(testPlan: TestPlan): FlatTestCase[] {
|
|
@@ -197,24 +187,6 @@ function derivePassFail(status: ExecutionPayload["status"]): "pass" | "fail" | n
|
|
|
197
187
|
return null;
|
|
198
188
|
}
|
|
199
189
|
|
|
200
|
-
function sortedValues(values: string[]): string[] {
|
|
201
|
-
return [...values].sort((a, b) => a.localeCompare(b));
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
function idsMatchExactly(left: string[], right: string[]): boolean {
|
|
205
|
-
if (left.length !== right.length) {
|
|
206
|
-
return false;
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
for (let i = 0; i < left.length; i += 1) {
|
|
210
|
-
if (left[i] !== right[i]) {
|
|
211
|
-
return false;
|
|
212
|
-
}
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
return true;
|
|
216
|
-
}
|
|
217
|
-
|
|
218
190
|
function toArtifactSafeSegment(value: string): string {
|
|
219
191
|
return value.replace(/[^a-zA-Z0-9_-]/g, "_");
|
|
220
192
|
}
|
|
@@ -231,15 +203,16 @@ function buildMarkdownReport(report: TestExecutionReport): string {
|
|
|
231
203
|
const failedCount = totalTests - passedCount;
|
|
232
204
|
|
|
233
205
|
const lines = [
|
|
234
|
-
|
|
206
|
+
"# Test Execution Report",
|
|
235
207
|
"",
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
208
|
+
`**Iteration:** it_${report.iteration}`,
|
|
209
|
+
`**Test Plan:** \`${report.testPlanFile}\``,
|
|
210
|
+
`**Total:** ${totalTests}`,
|
|
211
|
+
`**Passed:** ${passedCount}`,
|
|
212
|
+
`**Failed:** ${failedCount}`,
|
|
240
213
|
"",
|
|
241
214
|
"| Test ID | Description | Status | Correlated Requirements | Artifacts |",
|
|
242
|
-
"
|
|
215
|
+
"|---------|-------------|--------|------------------------|-----------|",
|
|
243
216
|
];
|
|
244
217
|
|
|
245
218
|
for (const result of report.results) {
|
|
@@ -441,7 +414,7 @@ export async function runExecuteTestPlan(
|
|
|
441
414
|
const executedTestIds: string[] = [];
|
|
442
415
|
|
|
443
416
|
const writeProgress = async () => {
|
|
444
|
-
await mergedDeps.
|
|
417
|
+
await mergedDeps.writeJsonArtifactFn(progressPath, TestExecutionProgressSchema, progress);
|
|
445
418
|
};
|
|
446
419
|
|
|
447
420
|
await mergedDeps.mkdirFn(join(projectRoot, FLOW_REL_DIR), { recursive: true });
|
|
@@ -460,8 +433,7 @@ export async function runExecuteTestPlan(
|
|
|
460
433
|
for (const tc of pendingAutomatedTests) {
|
|
461
434
|
const entry = progress.entries.find((e) => e.id === tc.id);
|
|
462
435
|
if (entry) {
|
|
463
|
-
entry
|
|
464
|
-
entry.updated_at = new Date().toISOString();
|
|
436
|
+
applyStatusUpdate(entry, "in_progress", new Date().toISOString());
|
|
465
437
|
}
|
|
466
438
|
}
|
|
467
439
|
await writeProgress();
|
|
@@ -610,8 +582,7 @@ export async function runExecuteTestPlan(
|
|
|
610
582
|
continue;
|
|
611
583
|
}
|
|
612
584
|
|
|
613
|
-
progressEntry
|
|
614
|
-
progressEntry.updated_at = new Date().toISOString();
|
|
585
|
+
applyStatusUpdate(progressEntry, "in_progress", new Date().toISOString());
|
|
615
586
|
await writeProgress();
|
|
616
587
|
|
|
617
588
|
const userInput = await mergedDeps.promptManualTestFn(testCase);
|
|
@@ -630,8 +601,7 @@ export async function runExecuteTestPlan(
|
|
|
630
601
|
progressEntry.attempt_count += 1;
|
|
631
602
|
progressEntry.last_agent_exit_code = null;
|
|
632
603
|
progressEntry.last_error_summary = payload.status === "passed" ? "" : payload.notes;
|
|
633
|
-
progressEntry
|
|
634
|
-
progressEntry.updated_at = new Date().toISOString();
|
|
604
|
+
applyStatusUpdate(progressEntry, payload.status === "passed" ? "passed" : "failed", new Date().toISOString());
|
|
635
605
|
await writeProgress();
|
|
636
606
|
|
|
637
607
|
await mergedDeps.writeFileFn(
|
|
@@ -714,9 +684,6 @@ export async function runExecuteTestPlan(
|
|
|
714
684
|
const hasFailedTests = progress.entries.some((entry) => entry.status === "failed");
|
|
715
685
|
state.phases.prototype.test_execution.status = hasFailedTests ? "failed" : "completed";
|
|
716
686
|
state.phases.prototype.test_execution.file = progressFileName;
|
|
717
|
-
if (!hasFailedTests) {
|
|
718
|
-
state.phases.prototype.prototype_approved = true;
|
|
719
|
-
}
|
|
720
687
|
state.last_updated = mergedDeps.nowFn().toISOString();
|
|
721
688
|
state.updated_by = "nvst:execute-test-plan";
|
|
722
689
|
await writeState(projectRoot, state);
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
export type FlowHandlerKey =
|
|
2
|
+
| "runCreateProjectContextFn"
|
|
3
|
+
| "runCreatePrototypeFn"
|
|
4
|
+
| "runCreateTestPlanFn"
|
|
5
|
+
| "runDefineRefactorPlanFn"
|
|
6
|
+
| "runDefineRequirementFn"
|
|
7
|
+
| "runExecuteRefactorFn"
|
|
8
|
+
| "runExecuteTestPlanFn";
|
|
9
|
+
|
|
10
|
+
type FlowStepDefinition = {
|
|
11
|
+
id: string;
|
|
12
|
+
label: string;
|
|
13
|
+
requiresAgent: boolean;
|
|
14
|
+
handlerKey: FlowHandlerKey;
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
export const FLOW_STEPS = {
|
|
18
|
+
"define-requirement": {
|
|
19
|
+
id: "define-requirement",
|
|
20
|
+
label: "define requirement",
|
|
21
|
+
requiresAgent: true,
|
|
22
|
+
handlerKey: "runDefineRequirementFn",
|
|
23
|
+
},
|
|
24
|
+
"create-project-context": {
|
|
25
|
+
id: "create-project-context",
|
|
26
|
+
label: "create project-context",
|
|
27
|
+
requiresAgent: true,
|
|
28
|
+
handlerKey: "runCreateProjectContextFn",
|
|
29
|
+
},
|
|
30
|
+
"create-prototype": {
|
|
31
|
+
id: "create-prototype",
|
|
32
|
+
label: "create prototype",
|
|
33
|
+
requiresAgent: true,
|
|
34
|
+
handlerKey: "runCreatePrototypeFn",
|
|
35
|
+
},
|
|
36
|
+
"create-test-plan": {
|
|
37
|
+
id: "create-test-plan",
|
|
38
|
+
label: "create test-plan",
|
|
39
|
+
requiresAgent: true,
|
|
40
|
+
handlerKey: "runCreateTestPlanFn",
|
|
41
|
+
},
|
|
42
|
+
"execute-test-plan": {
|
|
43
|
+
id: "execute-test-plan",
|
|
44
|
+
label: "execute test-plan",
|
|
45
|
+
requiresAgent: true,
|
|
46
|
+
handlerKey: "runExecuteTestPlanFn",
|
|
47
|
+
},
|
|
48
|
+
"define-refactor-plan": {
|
|
49
|
+
id: "define-refactor-plan",
|
|
50
|
+
label: "define refactor-plan",
|
|
51
|
+
requiresAgent: true,
|
|
52
|
+
handlerKey: "runDefineRefactorPlanFn",
|
|
53
|
+
},
|
|
54
|
+
"execute-refactor": {
|
|
55
|
+
id: "execute-refactor",
|
|
56
|
+
label: "execute refactor",
|
|
57
|
+
requiresAgent: true,
|
|
58
|
+
handlerKey: "runExecuteRefactorFn",
|
|
59
|
+
},
|
|
60
|
+
} as const satisfies Record<string, FlowStepDefinition>;
|
|
61
|
+
|
|
62
|
+
export type FlowStepId = keyof typeof FLOW_STEPS;
|
|
63
|
+
export type FlowStep = (typeof FLOW_STEPS)[FlowStepId];
|
|
64
|
+
|
|
65
|
+
export const FLOW_APPROVAL_TARGETS = {
|
|
66
|
+
requirement: "requirement",
|
|
67
|
+
projectContext: "project-context",
|
|
68
|
+
testPlan: "test-plan",
|
|
69
|
+
prototype: "prototype",
|
|
70
|
+
refactorPlan: "refactor-plan",
|
|
71
|
+
} as const;
|
|
72
|
+
|
|
73
|
+
export type FlowApprovalTarget = (typeof FLOW_APPROVAL_TARGETS)[keyof typeof FLOW_APPROVAL_TARGETS];
|
|
74
|
+
|
|
75
|
+
export const FLOW_APPROVAL_GATE_PREFIX = "Waiting for approval. Run: nvst approve";
|
|
76
|
+
|
|
77
|
+
export function buildApprovalGateMessage(target: FlowApprovalTarget): string {
|
|
78
|
+
return `${FLOW_APPROVAL_GATE_PREFIX} ${target} to continue, then re-run nvst flow.`;
|
|
79
|
+
}
|