ccqa 0.3.10 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -26
- package/dist/bin/ccqa.mjs +1595 -1291
- package/dist/package.json +2 -2
- package/dist/runtime/test-helpers.mjs +1 -53
- package/dist/runtime/vitest.config.d.mts +10 -10
- package/dist/spawn-ab-BxjEhA5e.mjs +65 -0
- package/package.json +2 -2
package/dist/bin/ccqa.mjs
CHANGED
|
@@ -1,33 +1,41 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
+
import { n as spawnAB } from "../spawn-ab-BxjEhA5e.mjs";
|
|
2
3
|
import { createRequire } from "node:module";
|
|
3
4
|
import { Command } from "commander";
|
|
4
|
-
import { accessSync, readFileSync, statSync } from "node:fs";
|
|
5
|
+
import { accessSync, existsSync, readFileSync, statSync } from "node:fs";
|
|
5
6
|
import { fileURLToPath } from "node:url";
|
|
6
|
-
import { access, mkdir, mkdtemp, readFile, readdir, rm, stat,
|
|
7
|
-
import { delimiter, dirname, join, relative, resolve } from "node:path";
|
|
7
|
+
import { access, mkdir, mkdtemp, readFile, readdir, rm, stat, writeFile } from "node:fs/promises";
|
|
8
8
|
import { query } from "@anthropic-ai/claude-agent-sdk";
|
|
9
|
-
import
|
|
9
|
+
import { delimiter, dirname, join, relative, resolve } from "node:path";
|
|
10
|
+
import { parse, stringify } from "yaml";
|
|
11
|
+
import { ZodError, z } from "zod";
|
|
10
12
|
import { execFile, spawn } from "node:child_process";
|
|
11
13
|
import { createInterface } from "node:readline";
|
|
12
|
-
import { tmpdir } from "node:os";
|
|
14
|
+
import { homedir, tmpdir } from "node:os";
|
|
13
15
|
import { createInterface as createInterface$1 } from "node:readline/promises";
|
|
14
|
-
import { z } from "zod";
|
|
15
16
|
import { promisify } from "node:util";
|
|
16
17
|
//#region src/prompts/trace.ts
|
|
17
18
|
function generateSessionName() {
|
|
18
19
|
return `ccqa-trace-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-")}`;
|
|
19
20
|
}
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
21
|
+
/**
|
|
22
|
+
* Build the trace system prompt. `input.steps` is a flat list with includes
|
|
23
|
+
* already expanded (each step carries id / source / instruction / expected).
|
|
24
|
+
* The spec opens URLs via explicit step instructions (e.g.
|
|
25
|
+
* `instruction: "${APP_URL}/articles を開く"`).
|
|
26
|
+
*
|
|
27
|
+
* In v0.4 every spec is traced from scratch — block contents are inlined
|
|
28
|
+
* into the spec's own step list at expand time, so the prompt has no
|
|
29
|
+
* special "this is a block" mode. The `source` tag on each step still
|
|
30
|
+
* distinguishes spec-native steps from inlined block steps for the
|
|
31
|
+
* `// step:` comments in the eventual codegen output.
|
|
32
|
+
*/
|
|
33
|
+
function buildTraceSystemPrompt(input) {
|
|
34
|
+
const sessionName = input.sessionName ?? generateSessionName();
|
|
35
|
+
const stepsText = input.steps.map((step) => `### ${step.id} [${step.source}]
|
|
27
36
|
- **Instruction**: ${step.instruction}
|
|
28
37
|
- **Expected**: ${step.expected}`).join("\n\n");
|
|
29
|
-
const
|
|
30
|
-
const relatedPathsBlock = emitRelatedPaths ? buildRelatedPathsInstruction() : "";
|
|
38
|
+
const relatedPathsBlock = buildRelatedPathsInstruction();
|
|
31
39
|
return `You are an expert QA engineer executing a browser E2E test. Execute each step precisely and record every browser action as a structured log line.
|
|
32
40
|
|
|
33
41
|
## Session
|
|
@@ -48,7 +56,8 @@ agent-browser --session SESSION uncheck "<selector>"
|
|
|
48
56
|
agent-browser --session SESSION press <Key>
|
|
49
57
|
agent-browser --session SESSION select "<selector>" "<value>"
|
|
50
58
|
agent-browser --session SESSION hover "<selector>"
|
|
51
|
-
agent-browser --session SESSION wait --text "<text>"
|
|
59
|
+
agent-browser --session SESSION wait --text "<text>" [--timeout <ms>]
|
|
60
|
+
agent-browser --session SESSION wait "<selector>" [--timeout <ms>] [--state visible|hidden]
|
|
52
61
|
agent-browser --session SESSION cookies clear
|
|
53
62
|
\`\`\`
|
|
54
63
|
|
|
@@ -83,17 +92,18 @@ agent-browser --session SESSION cookies clear
|
|
|
83
92
|
|
|
84
93
|
## Test Specification
|
|
85
94
|
|
|
86
|
-
Title: ${
|
|
87
|
-
Base URL: ${spec.baseUrl}
|
|
95
|
+
Title: ${input.title}
|
|
88
96
|
|
|
89
|
-
|
|
97
|
+
Each step's instruction names the URL to open directly (or via \`\${ENV_VAR}\`). Open exactly the URL the step says to open.
|
|
98
|
+
|
|
99
|
+
## Steps
|
|
90
100
|
|
|
91
101
|
${stepsText}
|
|
92
102
|
|
|
93
103
|
## Execution Workflow
|
|
94
104
|
|
|
95
105
|
For each step:
|
|
96
|
-
1. Emit \`STEP_START|<step-id>|<step
|
|
106
|
+
1. Emit \`STEP_START|<step-id>|<short description of what this step does>\`
|
|
97
107
|
2. Run \`snapshot\` and identify selectors from the ARIA tree
|
|
98
108
|
3. Execute the action using an ALLOWED selector
|
|
99
109
|
4. Emit \`AB_ACTION|...\` for every browser action (see below)
|
|
@@ -180,6 +190,15 @@ AB_ACTION|assert|<assertType>|<selector or "">|<value or "">|<observation>
|
|
|
180
190
|
|
|
181
191
|
The selector in AB_ACTION must be one of the ALLOWED formats above.
|
|
182
192
|
|
|
193
|
+
**CRITICAL — record only successful actions.** The AB_ACTION stream is the
|
|
194
|
+
canonical replay sequence: every line in it must be reproducible on a fresh
|
|
195
|
+
browser session. Therefore:
|
|
196
|
+
|
|
197
|
+
- If you tried a selector and \`agent-browser\` returned a non-zero exit (selector not found, element not interactable, timeout): **do NOT emit \`AB_ACTION|...\`** for that attempt. Take a fresh snapshot, switch selector, and only emit the AB_ACTION for the call that finally succeeded.
|
|
198
|
+
- If you explored multiple selectors for the same logical action (e.g. tried \`[aria-label='Email']\`, it failed, then \`[placeholder='Email']\` worked): emit AB_ACTION for the **working selector only**. The failed attempt must not appear in the trace.
|
|
199
|
+
- The same rule applies to \`AB_ACTION|assert|...\` lines: only emit them for assertions you actually verified on the current page in the current snapshot. Never declare an assertion against a selector you have not just confirmed visible — even if you intended to use it earlier.
|
|
200
|
+
- If a step ultimately fails after retries: emit \`ASSERTION_FAILED\` and STOP. Do NOT leave half-recorded actions for the failed step in the AB_ACTION stream.
|
|
201
|
+
|
|
183
202
|
## Assertion Protocol
|
|
184
203
|
|
|
185
204
|
After verifying each step, emit \`AB_ACTION|assert\` lines for each signal you confirmed.
|
|
@@ -211,9 +230,36 @@ After verifying each step, emit \`AB_ACTION|assert\` lines for each signal you c
|
|
|
211
230
|
**Selector rules for assert actions — CRITICAL:**
|
|
212
231
|
- Use the **same ALLOWED formats** as browser actions — never invent aria-label values
|
|
213
232
|
- Only use \`[aria-label='...']\` if that **exact** aria-label string appears in the current ARIA snapshot output
|
|
214
|
-
- When unsure, prefer \`text_visible\`/\`text_not_visible\` (no selector needed) over guessing a selector
|
|
233
|
+
- When unsure, prefer \`text_visible\`/\`text_not_visible\` (no selector needed) over guessing a selector — but still pre-verify with \`wait --text\` per the MUST-VERIFY rule below; \`alt\`-attribute "text" will not match.
|
|
215
234
|
- For \`element_disabled\`/\`element_enabled\`: use a CSS class selector if no aria-label is confirmed in the snapshot
|
|
216
235
|
|
|
236
|
+
**MUST-VERIFY rule — STRICT (applies to every assert except \`url_contains\`):**
|
|
237
|
+
|
|
238
|
+
The \`snapshot\` output is the **accessibility tree**: a semantic view. \`agent-browser\` queries the **real DOM**. They DO NOT always match. Two known traps:
|
|
239
|
+
|
|
240
|
+
1. *Selector trap*: a snapshot row like \`textbox "Email address"\` is reachable via \`[placeholder='...']\` but **NOT** via \`[aria-label='...']\` if no \`aria-label\` attribute is actually set — the browser inferred the label from \`<label for=>\` / surrounding text / \`placeholder\`.
|
|
241
|
+
2. *Text trap*: a snapshot row like \`link "Dashboard"\` may come from \`<a><img alt="Dashboard"></a>\` — the visible "text" is an \`alt\` attribute, not a text node. \`text_visible\` (which scans visible text nodes via \`wait --text\`) will NOT find it.
|
|
242
|
+
|
|
243
|
+
Before emitting an \`AB_ACTION|assert|...\` line, **verify the assertion form actually resolves on the live page**:
|
|
244
|
+
|
|
245
|
+
\`\`\`bash
|
|
246
|
+
# element_visible / element_enabled / element_disabled / element_checked / element_unchecked
|
|
247
|
+
agent-browser --session SESSION wait "<selector>" --timeout 3000
|
|
248
|
+
|
|
249
|
+
# element_not_visible
|
|
250
|
+
agent-browser --session SESSION wait "<selector>" --state hidden --timeout 3000
|
|
251
|
+
|
|
252
|
+
# text_visible
|
|
253
|
+
agent-browser --session SESSION wait --text "<text>" --timeout 3000
|
|
254
|
+
|
|
255
|
+
# text_not_visible
|
|
256
|
+
agent-browser --session SESSION wait --text "<text>" --state hidden --timeout 3000
|
|
257
|
+
\`\`\`
|
|
258
|
+
|
|
259
|
+
Apply the "record only successful actions" rule from the AB_ACTION section above. **Additionally**, when *no* form verifies — e.g. you tried \`[aria-label='X']\`, \`[placeholder='X']\`, and \`text=X\` and they all timed out, or the "text" turned out to be an \`alt\` / aria-label — **DROP the assertion entirely**. Fewer, real assertions beat invented ones that fail at replay. Prefer swapping a failed \`text_visible\` for an \`element_visible\` against the link/button selector when the visible label came from \`alt\` / aria-label.
|
|
260
|
+
|
|
261
|
+
\`url_contains\` is exempt — it checks the current URL string, not the DOM/accessibility tree.
|
|
262
|
+
|
|
217
263
|
**Examples:**
|
|
218
264
|
\`\`\`
|
|
219
265
|
AB_ACTION|assert|url_contains|||/dashboard|Navigated to dashboard
|
|
@@ -229,7 +275,7 @@ AB_ACTION|assert|text_visible|||Success|Confirmation message appeared
|
|
|
229
275
|
Emit exactly one status line per step (outside any code block):
|
|
230
276
|
|
|
231
277
|
\`\`\`
|
|
232
|
-
STEP_START|<step-id>|<step
|
|
278
|
+
STEP_START|<step-id>|<short description of what this step does>
|
|
233
279
|
STEP_DONE|<step-id>|<what was verified>
|
|
234
280
|
ASSERTION_FAILED|<step-id>|<category: app-bug|env-issue|auth-blocked|missing-test-data|selector-drift|agent-misread>: <reason>
|
|
235
281
|
STEP_SKIPPED|<step-id>|<reason>
|
|
@@ -242,37 +288,29 @@ RUN_COMPLETED|failed|<summary>
|
|
|
242
288
|
After each step (outside any code block):
|
|
243
289
|
|
|
244
290
|
\`\`\`
|
|
245
|
-
ROUTE_STEP|<step-id>|<
|
|
291
|
+
ROUTE_STEP|<step-id>|<short description>|ACTION:<what you did>|OBSERVATION:<what you verified>|STATUS:<PASSED|FAILED|SKIPPED>
|
|
246
292
|
\`\`\`
|
|
247
293
|
|
|
248
294
|
${relatedPathsBlock}## Start
|
|
249
295
|
|
|
250
|
-
|
|
296
|
+
Begin by clearing cookies, then proceed straight to the first step's instruction.
|
|
251
297
|
|
|
252
298
|
\`\`\`bash
|
|
253
|
-
agent-browser --session ${sessionName} open ${spec.baseUrl}
|
|
254
|
-
\`\`\`
|
|
255
|
-
|
|
256
|
-
Emit:
|
|
257
|
-
\`\`\`
|
|
258
|
-
AB_ACTION|open|${spec.baseUrl}
|
|
259
|
-
\`\`\`` : `\`\`\`bash
|
|
260
299
|
agent-browser --session ${sessionName} cookies clear
|
|
261
|
-
agent-browser --session ${sessionName} open ${spec.baseUrl}
|
|
262
300
|
\`\`\`
|
|
263
301
|
|
|
264
302
|
Emit:
|
|
265
303
|
\`\`\`
|
|
266
304
|
AB_ACTION|cookies_clear
|
|
267
|
-
|
|
268
|
-
\`\`\``}
|
|
305
|
+
\`\`\`
|
|
269
306
|
|
|
270
|
-
Then emit \`STEP_START|step-01|...\` and
|
|
307
|
+
Then emit \`STEP_START|step-01|...\` and execute the first step. The first step is responsible for opening the initial URL.
|
|
308
|
+
`;
|
|
271
309
|
}
|
|
272
310
|
function buildRelatedPathsInstruction() {
|
|
273
311
|
return `## Post-run: emit \`relatedPaths\` block
|
|
274
312
|
|
|
275
|
-
After all steps are complete (regardless of pass/fail) and **before** \`RUN_COMPLETED\`, you MUST emit a single \`RELATED_PATHS\` block. The host (not you) writes these paths into the spec
|
|
313
|
+
After all steps are complete (regardless of pass/fail) and **before** \`RUN_COMPLETED\`, you MUST emit a single \`RELATED_PATHS\` block. The host (not you) writes these paths into the spec — your only job is to emit the block.
|
|
276
314
|
|
|
277
315
|
\`relatedPaths\` is a list of glob patterns identifying the source files this spec depends on. CI uses them to decide whether a code change should trigger a drift check for this spec.
|
|
278
316
|
|
|
@@ -303,18 +341,8 @@ Emit the block outside any other code block, on its own lines. If the test could
|
|
|
303
341
|
|
|
304
342
|
`;
|
|
305
343
|
}
|
|
306
|
-
function buildTracePrompt(
|
|
307
|
-
return `Execute the test for "${
|
|
308
|
-
}
|
|
309
|
-
function buildSetupTraceSystemPrompt(spec) {
|
|
310
|
-
return buildTraceSystemPromptInner({
|
|
311
|
-
title: spec.title,
|
|
312
|
-
baseUrl: "about:blank",
|
|
313
|
-
steps: spec.steps
|
|
314
|
-
}, void 0, false);
|
|
315
|
-
}
|
|
316
|
-
function buildSetupTracePrompt(spec) {
|
|
317
|
-
return `Execute the setup procedure "${spec.title}". Follow each step precisely.`;
|
|
344
|
+
function buildTracePrompt(title) {
|
|
345
|
+
return `Execute the test for "${title}". Each step's instruction includes the URL or selector context it needs.`;
|
|
318
346
|
}
|
|
319
347
|
//#endregion
|
|
320
348
|
//#region src/cli/logger.ts
|
|
@@ -390,6 +418,11 @@ async function invokeClaudeStreaming(options, onEvent) {
|
|
|
390
418
|
const { prompt, systemPrompt, allowedTools, disableBuiltinTools = false, maxTurns, env, model, cwd, onAbAction, onAbActionFailed, silenceBashLog = false } = options;
|
|
391
419
|
const resolvedModel = resolveModel(model);
|
|
392
420
|
let lastAbToolUseId = null;
|
|
421
|
+
const claimAbToolUse = (toolUseId) => {
|
|
422
|
+
if (toolUseId !== lastAbToolUseId) return false;
|
|
423
|
+
lastAbToolUseId = null;
|
|
424
|
+
return true;
|
|
425
|
+
};
|
|
393
426
|
const sdkOptions = {
|
|
394
427
|
systemPrompt,
|
|
395
428
|
maxTurns,
|
|
@@ -424,13 +457,17 @@ async function invokeClaudeStreaming(options, onEvent) {
|
|
|
424
457
|
} else lastAbToolUseId = null;
|
|
425
458
|
return {};
|
|
426
459
|
}] }],
|
|
460
|
+
PostToolUse: [{ hooks: [async (input) => {
|
|
461
|
+
if (input.hook_event_name !== "PostToolUse") return {};
|
|
462
|
+
if (input.tool_name !== "Bash") return {};
|
|
463
|
+
if (!isBashToolResponseError(input.tool_response)) return {};
|
|
464
|
+
if (claimAbToolUse(input.tool_use_id) && onAbActionFailed) onAbActionFailed();
|
|
465
|
+
return {};
|
|
466
|
+
}] }],
|
|
427
467
|
PostToolUseFailure: [{ hooks: [async (input) => {
|
|
428
468
|
if (input.hook_event_name !== "PostToolUseFailure") return {};
|
|
429
469
|
if (input.tool_name !== "Bash") return {};
|
|
430
|
-
if (input.tool_use_id
|
|
431
|
-
onAbActionFailed();
|
|
432
|
-
lastAbToolUseId = null;
|
|
433
|
-
}
|
|
470
|
+
if (claimAbToolUse(input.tool_use_id) && onAbActionFailed) onAbActionFailed();
|
|
434
471
|
return {};
|
|
435
472
|
}] }]
|
|
436
473
|
} : void 0
|
|
@@ -500,6 +537,26 @@ function isBlockedAbSubcommand(cmd) {
|
|
|
500
537
|
const sub = extractAbSubcommand(cmd);
|
|
501
538
|
return sub !== null && BLOCKED_AB_SUBCOMMANDS.has(sub);
|
|
502
539
|
}
|
|
540
|
+
/**
|
|
541
|
+
* Detects "the Bash tool returned an error" from a SDK PostToolUse hook's
|
|
542
|
+
* `tool_response`. The SDK can shape this two ways depending on how Claude
|
|
543
|
+
* Code reports Bash failures:
|
|
544
|
+
*
|
|
545
|
+
* - `{ is_error: true, ... }` — the canonical Bash failure shape
|
|
546
|
+
* - `{ output, exitCode, killed?, ... }` — the BashOutput shape; treat
|
|
547
|
+
* non-zero exit / kill as error
|
|
548
|
+
*
|
|
549
|
+
* We accept either. Anything else (including missing fields) is treated as a
|
|
550
|
+
* successful response so we never roll back over an unrelated tool call.
|
|
551
|
+
*/
|
|
552
|
+
function isBashToolResponseError(tool_response) {
|
|
553
|
+
if (tool_response === null || typeof tool_response !== "object") return false;
|
|
554
|
+
const r = tool_response;
|
|
555
|
+
if (r["is_error"] === true) return true;
|
|
556
|
+
if (typeof r["exitCode"] === "number" && r["exitCode"] !== 0) return true;
|
|
557
|
+
if (r["killed"] === true) return true;
|
|
558
|
+
return false;
|
|
559
|
+
}
|
|
503
560
|
/** Returns true if any argument to an agent-browser command uses a @ref selector (e.g. @e14). */
|
|
504
561
|
function hasRefSelector(cmd) {
|
|
505
562
|
const abIdx = cmd.indexOf("agent-browser");
|
|
@@ -561,87 +618,255 @@ async function* replayMockMessages(path) {
|
|
|
561
618
|
}
|
|
562
619
|
}
|
|
563
620
|
//#endregion
|
|
621
|
+
//#region src/runtime/env-vars.ts
|
|
622
|
+
const ENV_VAR_RE = /\$\{([A-Z_][A-Z0-9_]*)\}|\$([A-Z_][A-Z0-9_]*)/g;
|
|
623
|
+
const ANY_VAR_RE = /\$\{([A-Za-z_][A-Za-z0-9_]*)\}|\$([A-Za-z_][A-Za-z0-9_]*)/g;
|
|
624
|
+
/**
|
|
625
|
+
* Replace every `$NAME` / `${NAME}` reference in `value` using `lookup`. When
|
|
626
|
+
* `lookup` returns `undefined`, the original reference text is preserved
|
|
627
|
+
* (callers that want empty-string substitution should wrap with `?? ""`).
|
|
628
|
+
*/
|
|
629
|
+
function substituteVars(value, lookup) {
|
|
630
|
+
ANY_VAR_RE.lastIndex = 0;
|
|
631
|
+
return value.replace(ANY_VAR_RE, (match, braced, plain) => {
|
|
632
|
+
const replacement = lookup(braced ?? plain ?? "");
|
|
633
|
+
return replacement === void 0 ? match : replacement;
|
|
634
|
+
});
|
|
635
|
+
}
|
|
636
|
+
/**
|
|
637
|
+
* Resolve every `$VAR` / `${VAR}` reference against the current process env.
|
|
638
|
+
*
|
|
639
|
+
* Missing variables expand to the empty string, mirroring `sh` behaviour.
|
|
640
|
+
* Throwing would force ccqa to be invoked with every var set even for
|
|
641
|
+
* unused blocks, which is more user-hostile than letting the test fail
|
|
642
|
+
* downstream with a clearer message ("login form rejected: empty password").
|
|
643
|
+
*/
|
|
644
|
+
function resolveEnvRefs(value) {
|
|
645
|
+
return value.replace(ENV_VAR_RE, (_, braced, plain) => {
|
|
646
|
+
const name = braced ?? plain ?? "";
|
|
647
|
+
return process.env[name] ?? "";
|
|
648
|
+
});
|
|
649
|
+
}
|
|
650
|
+
/**
|
|
651
|
+
* Embed `$VAR` / `${VAR}` as a JS template-literal expression that reads
|
|
652
|
+
* `process.env.VAR ?? ""` at runtime. Used by `ccqa generate` so the test
|
|
653
|
+
* script never bakes in the secret value.
|
|
654
|
+
*
|
|
655
|
+
* Returns a JavaScript string-literal expression (template literal when env
|
|
656
|
+
* refs are present, plain string literal otherwise).
|
|
657
|
+
*
|
|
658
|
+
* Examples:
|
|
659
|
+
* "${PASSWORD}" -> '`${process.env.PASSWORD ?? ""}`'
|
|
660
|
+
* "user-${SUFFIX}@x.com" -> '`user-${process.env.SUFFIX ?? ""}@x.com`'
|
|
661
|
+
* "literal value" -> '"literal value"'
|
|
662
|
+
*/
|
|
663
|
+
function envRefsToJsExpression(value) {
|
|
664
|
+
return refsToJsExpression(value, () => null);
|
|
665
|
+
}
|
|
666
|
+
/**
|
|
667
|
+
* Generalised version of `envRefsToJsExpression`. Each `$NAME` / `${NAME}`
|
|
668
|
+
* reference in `value` is passed to `nameToExpr(name)` first:
|
|
669
|
+
*
|
|
670
|
+
* - If it returns a string, that string is interpolated as a JS expression
|
|
671
|
+
* (no quoting / no `?? ""` wrap — the caller decides the shape).
|
|
672
|
+
* - If it returns `null`, the reference is treated as a missing env var
|
|
673
|
+
* and expands to `process.env.<NAME> ?? ""` (the legacy behaviour).
|
|
674
|
+
*
|
|
675
|
+
* Used by the block codegen path: param names map to `params.<name>`,
|
|
676
|
+
* everything else falls through to `process.env.X ?? ""`.
|
|
677
|
+
*/
|
|
678
|
+
function refsToJsExpression(value, nameToExpr) {
|
|
679
|
+
ANY_VAR_RE.lastIndex = 0;
|
|
680
|
+
if (!ANY_VAR_RE.test(value)) return JSON.stringify(value);
|
|
681
|
+
const escaped = value.replace(/\\/g, "\\\\").replace(/`/g, "\\`").replace(/\$\{/g, (_match, offset, source) => {
|
|
682
|
+
const probe = new RegExp(ANY_VAR_RE.source, "g");
|
|
683
|
+
let m;
|
|
684
|
+
while ((m = probe.exec(source)) !== null) if (m.index === offset) return "${";
|
|
685
|
+
return "\\${";
|
|
686
|
+
});
|
|
687
|
+
ANY_VAR_RE.lastIndex = 0;
|
|
688
|
+
return `\`${escaped.replace(ANY_VAR_RE, (_match, braced, plain) => {
|
|
689
|
+
const name = braced ?? plain ?? "";
|
|
690
|
+
const expr = nameToExpr(name);
|
|
691
|
+
return expr !== null ? `\${${expr}}` : `\${process.env.${name} ?? ""}`;
|
|
692
|
+
})}\``;
|
|
693
|
+
}
|
|
694
|
+
//#endregion
|
|
695
|
+
//#region src/spec/yaml-schema.ts
|
|
696
|
+
/**
|
|
697
|
+
* An action step: one user-facing browser interaction. `instruction` and
|
|
698
|
+
* `expected` are the natural-language description handed to Claude during
|
|
699
|
+
* `ccqa trace`. URLs live inside `instruction`, either verbatim or via
|
|
700
|
+
* `${ENV_VAR}` references (resolved at runtime).
|
|
701
|
+
*/
|
|
702
|
+
const ActionStepSchema = z.object({
|
|
703
|
+
instruction: z.string().min(1),
|
|
704
|
+
expected: z.string().min(1)
|
|
705
|
+
}).strict();
|
|
706
|
+
/**
|
|
707
|
+
* An include step: invokes a reusable block (`.ccqa/blocks/<name>/spec.yaml`).
|
|
708
|
+
* `params` values are plain strings; env refs (`${VAR}`) inside them are
|
|
709
|
+
* resolved at expand time the same way step instructions are.
|
|
710
|
+
*/
|
|
711
|
+
const IncludeStepSchema = z.object({
|
|
712
|
+
include: z.string().min(1),
|
|
713
|
+
params: z.record(z.string(), z.string()).optional()
|
|
714
|
+
}).strict();
|
|
715
|
+
/**
|
|
716
|
+
* A spec step is either an action step or an include step. The two are
|
|
717
|
+
* discriminated by the presence of the `include` key — see `isIncludeStep`.
|
|
718
|
+
*/
|
|
719
|
+
const StepSchema = z.union([ActionStepSchema, IncludeStepSchema]);
|
|
720
|
+
/** Top-level spec schema. `.strict()` rejects any unknown key. */
|
|
721
|
+
const TestSpecSchema = z.object({
|
|
722
|
+
title: z.string().min(1),
|
|
723
|
+
relatedPaths: z.array(z.string().min(1)).optional(),
|
|
724
|
+
steps: z.array(StepSchema).min(1)
|
|
725
|
+
}).strict();
|
|
726
|
+
/**
|
|
727
|
+
* A block param declaration. `required` defaults to true; only explicit
|
|
728
|
+
* `required: false` makes it optional. `secret: true` flags the value as
|
|
729
|
+
* sensitive — codegen renders such values as `process.env.<NAME> ?? ""`
|
|
730
|
+
* template literals so the secret never ends up baked into test.spec.ts.
|
|
731
|
+
* `dummy` is a placeholder value surfaced by the draft / drift prompts
|
|
732
|
+
* (which see the block in isolation, before any include site exists);
|
|
733
|
+
* `description` is the param's semantic role, also consumed by those
|
|
734
|
+
* prompts and by spec authors browsing the block.
|
|
735
|
+
*/
|
|
736
|
+
const BlockParamSchema = z.object({
|
|
737
|
+
name: z.string().min(1),
|
|
738
|
+
required: z.boolean().optional(),
|
|
739
|
+
secret: z.boolean().optional(),
|
|
740
|
+
dummy: z.string().optional(),
|
|
741
|
+
description: z.string().optional()
|
|
742
|
+
}).strict();
|
|
743
|
+
/**
|
|
744
|
+
* Block schema. Block steps are restricted to ActionStep — nested blocks are
|
|
745
|
+
* forbidden. Including a block from inside another block fails parsing here
|
|
746
|
+
* (the store layer maps the cryptic "Unrecognized key: 'include'" error into
|
|
747
|
+
* a targeted nested-block message).
|
|
748
|
+
*/
|
|
749
|
+
const BlockSpecSchema = z.object({
|
|
750
|
+
title: z.string().min(1),
|
|
751
|
+
params: z.array(BlockParamSchema).optional(),
|
|
752
|
+
steps: z.array(ActionStepSchema).min(1)
|
|
753
|
+
}).strict();
|
|
754
|
+
/** Runtime predicate for the StepSchema union. */
|
|
755
|
+
function isIncludeStep(step) {
|
|
756
|
+
return "include" in step;
|
|
757
|
+
}
|
|
758
|
+
/** Returns true if a block param is required (default: true). */
|
|
759
|
+
function isParamRequired(param) {
|
|
760
|
+
return param.required !== false;
|
|
761
|
+
}
|
|
762
|
+
//#endregion
|
|
564
763
|
//#region src/spec/parser.ts
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
const
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
setups: parseSetupRefs(data["setups"]),
|
|
574
|
-
relatedPaths: parseRelatedPaths(data["relatedPaths"]),
|
|
575
|
-
steps
|
|
576
|
-
};
|
|
764
|
+
/** Parse a spec.yaml. Schema rejections are rewritten with actionable messages. */
|
|
765
|
+
function parseTestSpec(content, source = "spec.yaml") {
|
|
766
|
+
const raw = parseYamlOrThrow(content, source);
|
|
767
|
+
try {
|
|
768
|
+
return TestSpecSchema.parse(raw);
|
|
769
|
+
} catch (e) {
|
|
770
|
+
throw enrichZodError(e, source, false);
|
|
771
|
+
}
|
|
577
772
|
}
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
773
|
+
/**
|
|
774
|
+
* Parse a block's spec.yaml. Block-specific errors include the targeted
|
|
775
|
+
* nested-block message (the underlying zod failure on an `include` key
|
|
776
|
+
* inside a block step is hard to read).
|
|
777
|
+
*/
|
|
778
|
+
function parseBlockSpec(content, source = "block spec.yaml") {
|
|
779
|
+
const raw = parseYamlOrThrow(content, source);
|
|
780
|
+
try {
|
|
781
|
+
return BlockSpecSchema.parse(raw);
|
|
782
|
+
} catch (e) {
|
|
783
|
+
throw enrichZodError(e, source, true);
|
|
784
|
+
}
|
|
583
785
|
}
|
|
584
|
-
function
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
placeholders: Object.keys(placeholders).length > 0 ? placeholders : void 0,
|
|
591
|
-
steps
|
|
592
|
-
};
|
|
786
|
+
function parseYamlOrThrow(content, source) {
|
|
787
|
+
try {
|
|
788
|
+
return parse(content);
|
|
789
|
+
} catch (e) {
|
|
790
|
+
throw new Error(`Failed to parse YAML (${source}): ${e.message}`);
|
|
791
|
+
}
|
|
593
792
|
}
|
|
594
|
-
function
|
|
595
|
-
if (!
|
|
596
|
-
const
|
|
597
|
-
for (const
|
|
598
|
-
const
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
description: v["description"] ? String(v["description"]) : void 0
|
|
602
|
-
};
|
|
793
|
+
function enrichZodError(error, source, isBlock) {
|
|
794
|
+
if (!(error instanceof ZodError)) return error;
|
|
795
|
+
const lines = [`Invalid ${source}:`];
|
|
796
|
+
for (const issue of error.issues) {
|
|
797
|
+
const path = issue.path.join(".") || "(root)";
|
|
798
|
+
const message = humanizeIssue(issue, isBlock);
|
|
799
|
+
lines.push(` - ${path}: ${message}`);
|
|
603
800
|
}
|
|
604
|
-
return
|
|
605
|
-
}
|
|
606
|
-
function
|
|
607
|
-
if (
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
refs.push({
|
|
612
|
-
name: String(i["name"]),
|
|
613
|
-
params: i["params"] && typeof i["params"] === "object" ? Object.fromEntries(Object.entries(i["params"]).map(([k, v]) => [k, String(v)])) : void 0
|
|
614
|
-
});
|
|
801
|
+
return new Error(lines.join("\n"));
|
|
802
|
+
}
|
|
803
|
+
function humanizeIssue(issue, isBlock) {
|
|
804
|
+
if (issue.code === "unrecognized_keys") {
|
|
805
|
+
const keys = Array.isArray(issue.keys) ? issue.keys : [];
|
|
806
|
+
if (isBlock && keys.includes("include")) return `Nested blocks are not supported — flatten by inlining the included block's steps into this block.`;
|
|
807
|
+
return `Unknown keys: ${keys.join(", ")}`;
|
|
615
808
|
}
|
|
616
|
-
return
|
|
617
|
-
}
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
809
|
+
return issue.message;
|
|
810
|
+
}
|
|
811
|
+
//#endregion
|
|
812
|
+
//#region src/spec/expand.ts
|
|
813
|
+
/**
|
|
814
|
+
* Walk the spec's top-level steps, inlining any `- include: <block>` reference
|
|
815
|
+
* as the block's own steps in order. The result is a flat `step-NN`-numbered
|
|
816
|
+
* sequence — block boundaries survive only as the `source` tag, so trace and
|
|
817
|
+
* codegen never need a separate block code path.
|
|
818
|
+
*/
|
|
819
|
+
function expandSpec(spec, options) {
|
|
820
|
+
const out = [];
|
|
821
|
+
let counter = 0;
|
|
822
|
+
const allocId = () => {
|
|
823
|
+
counter += 1;
|
|
824
|
+
return `step-${String(counter).padStart(2, "0")}`;
|
|
825
|
+
};
|
|
826
|
+
for (const step of spec.steps) if (isIncludeStep(step)) {
|
|
827
|
+
const block = resolveBlock(step.include, step.params ?? {}, options.blocks);
|
|
828
|
+
for (const blockStep of block.steps) out.push({
|
|
829
|
+
id: allocId(),
|
|
830
|
+
source: step.include,
|
|
831
|
+
instruction: substituteVars(blockStep.instruction, block.lookup),
|
|
832
|
+
expected: substituteVars(blockStep.expected, block.lookup)
|
|
638
833
|
});
|
|
639
|
-
}
|
|
640
|
-
|
|
834
|
+
} else out.push({
|
|
835
|
+
id: allocId(),
|
|
836
|
+
source: "spec",
|
|
837
|
+
instruction: step.instruction,
|
|
838
|
+
expected: step.expected
|
|
839
|
+
});
|
|
840
|
+
return out;
|
|
841
|
+
}
|
|
842
|
+
function resolveBlock(blockName, rawParams, blocks) {
|
|
843
|
+
const block = blocks.get(blockName);
|
|
844
|
+
if (!block) throw new Error(`Unknown block: "${blockName}". Define it under .ccqa/blocks/${blockName}/spec.yaml.`);
|
|
845
|
+
const declaredParams = new Map((block.params ?? []).map((p) => [p.name, p]));
|
|
846
|
+
for (const key of Object.keys(rawParams)) if (!declaredParams.has(key)) throw new Error(`Block "${blockName}" received unknown param "${key}". Declared params: ${[...declaredParams.keys()].join(", ") || "(none)"}.`);
|
|
847
|
+
for (const [pname, def] of declaredParams) if (isParamRequired(def) && !(pname in rawParams)) throw new Error(`Block "${blockName}" is missing required param "${pname}".`);
|
|
848
|
+
const lookup = (name) => {
|
|
849
|
+
if (Object.prototype.hasOwnProperty.call(rawParams, name)) return rawParams[name];
|
|
850
|
+
};
|
|
851
|
+
return {
|
|
852
|
+
steps: block.steps,
|
|
853
|
+
lookup
|
|
854
|
+
};
|
|
855
|
+
}
|
|
856
|
+
/**
|
|
857
|
+
* Collect every block name referenced by a spec (top-level only — blocks
|
|
858
|
+
* cannot nest). Used by the store / drift layers to know which blocks to
|
|
859
|
+
* load or invalidate.
|
|
860
|
+
*/
|
|
861
|
+
function collectIncludedBlockNames(spec) {
|
|
862
|
+
const names = /* @__PURE__ */ new Set();
|
|
863
|
+
for (const step of spec.steps) if (isIncludeStep(step)) names.add(step.include);
|
|
864
|
+
return [...names];
|
|
641
865
|
}
|
|
642
866
|
//#endregion
|
|
643
867
|
//#region src/store/index.ts
|
|
644
868
|
const CCQA_DIR = ".ccqa";
|
|
869
|
+
const SPEC_FILE = "spec.yaml";
|
|
645
870
|
function getCcqaDir(cwd = process.cwd()) {
|
|
646
871
|
return join(cwd, CCQA_DIR);
|
|
647
872
|
}
|
|
@@ -669,39 +894,44 @@ function getSpecDir(featureName, specName, cwd) {
|
|
|
669
894
|
}
|
|
670
895
|
async function ensureCcqaDir(cwd) {
|
|
671
896
|
await mkdir(join(getCcqaDir(cwd), "features"), { recursive: true });
|
|
897
|
+
await mkdir(join(getCcqaDir(cwd), "blocks"), { recursive: true });
|
|
672
898
|
}
|
|
673
899
|
async function readSpecFile(featureName, specName, cwd) {
|
|
674
|
-
const specPath = join(getSpecDir(featureName, specName, cwd),
|
|
900
|
+
const specPath = join(getSpecDir(featureName, specName, cwd), SPEC_FILE);
|
|
675
901
|
return readFile(specPath, "utf-8").catch(() => {
|
|
676
902
|
throw new Error(`Spec file not found: ${specPath}`);
|
|
677
903
|
});
|
|
678
904
|
}
|
|
679
905
|
async function tryReadSpecFile(featureName, specName, cwd) {
|
|
680
|
-
return readFile(join(getSpecDir(featureName, specName, cwd),
|
|
906
|
+
return readFile(join(getSpecDir(featureName, specName, cwd), SPEC_FILE), "utf-8").catch(() => null);
|
|
681
907
|
}
|
|
682
908
|
async function saveSpecFile(featureName, specName, content, cwd) {
|
|
683
909
|
const specDir = getSpecDir(featureName, specName, cwd);
|
|
684
910
|
await mkdir(specDir, { recursive: true });
|
|
685
|
-
const specPath = join(specDir,
|
|
911
|
+
const specPath = join(specDir, SPEC_FILE);
|
|
686
912
|
await writeFile(specPath, content.endsWith("\n") ? content : content + "\n", "utf-8");
|
|
687
913
|
return specPath;
|
|
688
914
|
}
|
|
689
915
|
/**
|
|
690
|
-
* Replace (or insert) the `relatedPaths` key in the spec
|
|
691
|
-
*
|
|
692
|
-
*
|
|
916
|
+
* Replace (or insert) the `relatedPaths` key in the spec. Preserves every
|
|
917
|
+
* other top-level field and the entire steps array. Returns the absolute
|
|
918
|
+
* path that was written, or null if the spec file does not exist.
|
|
693
919
|
*/
|
|
694
920
|
async function updateSpecRelatedPaths(featureName, specName, relatedPaths, cwd) {
|
|
695
|
-
const specPath = join(getSpecDir(featureName, specName, cwd),
|
|
921
|
+
const specPath = join(getSpecDir(featureName, specName, cwd), SPEC_FILE);
|
|
696
922
|
const existing = await readFile(specPath, "utf-8").catch(() => null);
|
|
697
923
|
if (existing === null) return null;
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
await writeFile(specPath, matter.stringify(parsed.content, data), "utf-8");
|
|
924
|
+
await writeFile(specPath, stringify(stripUndefined({
|
|
925
|
+
...parseTestSpec(existing, specPath),
|
|
926
|
+
relatedPaths: relatedPaths.length > 0 ? relatedPaths : void 0
|
|
927
|
+
}), { lineWidth: 0 }), "utf-8");
|
|
703
928
|
return specPath;
|
|
704
929
|
}
|
|
930
|
+
function stripUndefined(obj) {
|
|
931
|
+
const out = {};
|
|
932
|
+
for (const [k, v] of Object.entries(obj)) if (v !== void 0) out[k] = v;
|
|
933
|
+
return out;
|
|
934
|
+
}
|
|
705
935
|
async function saveRoute(featureName, specName, route, cwd) {
|
|
706
936
|
const specDir = getSpecDir(featureName, specName, cwd);
|
|
707
937
|
await mkdir(specDir, { recursive: true });
|
|
@@ -716,38 +946,72 @@ async function saveTraceActions(featureName, specName, actions, cwd) {
|
|
|
716
946
|
await writeFile(actionsPath, JSON.stringify(actions, null, 2), "utf-8");
|
|
717
947
|
return actionsPath;
|
|
718
948
|
}
|
|
719
|
-
function
|
|
720
|
-
return join(getCcqaDir(cwd), "
|
|
949
|
+
function getBlocksDir(cwd) {
|
|
950
|
+
return join(getCcqaDir(cwd), "blocks");
|
|
721
951
|
}
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
952
|
+
/**
|
|
953
|
+
* Inverse of `getBlockDir`. Given a file path that appears in a git diff,
|
|
954
|
+
* return the block name if the path points at the block's spec.yaml, else
|
|
955
|
+
* null. Used by `drift --changed` to invalidate specs whose included blocks
|
|
956
|
+
* were edited. (v0.4 inlines blocks into every spec's own trace, so the
|
|
957
|
+
* block directory holds only spec.yaml — no per-block actions.json / route
|
|
958
|
+
* lives here anymore.)
|
|
959
|
+
*/
|
|
960
|
+
function parseBlockPath(path) {
|
|
961
|
+
return path.match(/(?:^|\/)\.ccqa\/blocks\/([^/]+)\/spec\.yaml$/)?.[1] ?? null;
|
|
727
962
|
}
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
963
|
+
/**
|
|
964
|
+
* Load every block under `.ccqa/blocks/<name>/spec.yaml`. Used by the trace /
|
|
965
|
+
* generate / drift entry points to validate include references at parse time.
|
|
966
|
+
*
|
|
967
|
+
* A malformed block is fatal — surfaces as a thrown Error with the path that
|
|
968
|
+
* failed. Missing block directories (no `spec.yaml`) are silently skipped so
|
|
969
|
+
* stray files don't break the loader.
|
|
970
|
+
*/
|
|
971
|
+
async function loadAllBlocks(cwd) {
|
|
972
|
+
const dir = getBlocksDir(cwd);
|
|
973
|
+
const names = await readdir(dir).catch(() => []);
|
|
974
|
+
const entries = await Promise.all(names.map(async (name) => {
|
|
975
|
+
const path = join(dir, name, SPEC_FILE);
|
|
976
|
+
const content = await readFile(path, "utf-8").catch(() => null);
|
|
977
|
+
return content === null ? null : [name, parseBlockSpec(content, path)];
|
|
978
|
+
}));
|
|
979
|
+
return new Map(entries.filter((e) => e !== null));
|
|
734
980
|
}
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
981
|
+
/**
|
|
982
|
+
* Project the parsed blocks into the shape the draft / drift prompts consume.
|
|
983
|
+
* Co-located with `loadAllBlocks` so callers don't have to remember the
|
|
984
|
+
* isParamRequired / secret-default mapping.
|
|
985
|
+
*/
|
|
986
|
+
async function loadAvailableBlocks(cwd) {
|
|
987
|
+
return [...(await loadAllBlocks(cwd)).entries()].map(([name, block]) => ({
|
|
988
|
+
name,
|
|
989
|
+
title: block.title,
|
|
990
|
+
params: (block.params ?? []).map((p) => ({
|
|
991
|
+
name: p.name,
|
|
992
|
+
required: isParamRequired(p),
|
|
993
|
+
secret: p.secret === true
|
|
994
|
+
}))
|
|
995
|
+
}));
|
|
744
996
|
}
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
997
|
+
/**
|
|
998
|
+
* Probe for orphaned files left over from earlier ccqa versions inside
|
|
999
|
+
* `.ccqa/blocks/<name>/`. Both pre-v0.4 `test.spec.ts` (function-export
|
|
1000
|
+
* blocks) and the short-lived `actions.json` / `route.md` (recorded-block
|
|
1001
|
+
* variant) are dead in the new "blocks are pure spec templates" model and
|
|
1002
|
+
* should be deleted manually. Returns the absolute paths.
|
|
1003
|
+
*/
|
|
1004
|
+
async function findStaleBlockArtifacts(cwd) {
|
|
1005
|
+
const dir = getBlocksDir(cwd);
|
|
1006
|
+
const names = await readdir(dir).catch(() => []);
|
|
1007
|
+
return (await Promise.all(names.flatMap((name) => [
|
|
1008
|
+
"test.spec.ts",
|
|
1009
|
+
"actions.json",
|
|
1010
|
+
"route.md"
|
|
1011
|
+
].map(async (f) => {
|
|
1012
|
+
const path = join(dir, name, f);
|
|
1013
|
+
return await stat(path).then(() => true).catch(() => false) ? path : null;
|
|
1014
|
+
})))).filter((p) => p !== null);
|
|
751
1015
|
}
|
|
752
1016
|
async function getTraceActions(featureName, specName, cwd) {
|
|
753
1017
|
const path = join(getSpecDir(featureName, specName, cwd), "actions.json");
|
|
@@ -789,8 +1053,7 @@ async function listSpecsForFeature(featureName, cwd) {
|
|
|
789
1053
|
}
|
|
790
1054
|
/**
|
|
791
1055
|
* Lists every feature/spec dir under .ccqa/features/, regardless of whether
|
|
792
|
-
* the spec is fully drafted yet. Each spec file is read at most once
|
|
793
|
-
* and relatedPaths are both extracted from the same parse.
|
|
1056
|
+
* the spec is fully drafted yet. Each spec file is read at most once.
|
|
794
1057
|
*/
|
|
795
1058
|
async function listFeatureTree(cwd) {
|
|
796
1059
|
const featuresDir = join(getCcqaDir(cwd), "features");
|
|
@@ -801,18 +1064,19 @@ async function listFeatureTree(cwd) {
|
|
|
801
1064
|
return {
|
|
802
1065
|
featureName,
|
|
803
1066
|
specs: await Promise.all(specDirs.map(async (specName) => {
|
|
804
|
-
const
|
|
1067
|
+
const specFile = join(testCasesDir, specName, SPEC_FILE);
|
|
1068
|
+
const content = await readFile(specFile, "utf-8").catch(() => null);
|
|
805
1069
|
if (content === null) return {
|
|
806
1070
|
specName,
|
|
807
1071
|
hasSpecFile: false
|
|
808
1072
|
};
|
|
809
1073
|
try {
|
|
810
|
-
const spec = parseTestSpec(content);
|
|
1074
|
+
const spec = parseTestSpec(content, specFile);
|
|
811
1075
|
const entry = {
|
|
812
1076
|
specName,
|
|
813
|
-
hasSpecFile: true
|
|
1077
|
+
hasSpecFile: true,
|
|
1078
|
+
includedBlocks: collectIncludedBlockNames(spec)
|
|
814
1079
|
};
|
|
815
|
-
if (spec.title && spec.title !== "Untitled") entry.title = spec.title;
|
|
816
1080
|
if (spec.relatedPaths) entry.relatedPaths = spec.relatedPaths;
|
|
817
1081
|
return entry;
|
|
818
1082
|
} catch {
|
|
@@ -845,6 +1109,20 @@ function routeToMarkdown(route) {
|
|
|
845
1109
|
return lines.join("\n");
|
|
846
1110
|
}
|
|
847
1111
|
//#endregion
|
|
1112
|
+
//#region src/cli/stale-blocks.ts
|
|
1113
|
+
/**
|
|
1114
|
+
* Hint when stale per-block artifacts (`test.spec.ts`, `actions.json`,
|
|
1115
|
+
* `route.md`) from earlier ccqa versions are still present. v0.4 treats
|
|
1116
|
+
* blocks as pure spec templates — they no longer have their own executable
|
|
1117
|
+
* or recorded artifacts, so these files are dead code and should be deleted
|
|
1118
|
+
* manually. Shared by `trace` and `generate`.
|
|
1119
|
+
*/
|
|
1120
|
+
async function warnStaleBlockArtifacts() {
|
|
1121
|
+
const stale = await findStaleBlockArtifacts();
|
|
1122
|
+
if (stale.length === 0) return;
|
|
1123
|
+
for (const p of stale) hint(`stale block artifact detected: ${p} — v0.4 no longer uses these; delete it manually.`);
|
|
1124
|
+
}
|
|
1125
|
+
//#endregion
|
|
848
1126
|
//#region src/drift/parse-related-paths.ts
|
|
849
1127
|
/**
|
|
850
1128
|
* Pull a `RELATED_PATHS_BEGIN ... RELATED_PATHS_END` block out of the trace
|
|
@@ -869,103 +1147,8 @@ function parseRelatedPathsBlock(text) {
|
|
|
869
1147
|
return out;
|
|
870
1148
|
}
|
|
871
1149
|
//#endregion
|
|
872
|
-
//#region src/runtime/bundled-config.ts
|
|
873
|
-
const CANDIDATES = [
|
|
874
|
-
"../runtime/vitest.config.mjs",
|
|
875
|
-
"./vitest.config.mjs",
|
|
876
|
-
"./vitest.config.ts"
|
|
877
|
-
];
|
|
878
|
-
function bundledVitestConfigPath() {
|
|
879
|
-
for (const rel of CANDIDATES) {
|
|
880
|
-
const candidate = fileURLToPath(new URL(rel, import.meta.url));
|
|
881
|
-
try {
|
|
882
|
-
accessSync(candidate);
|
|
883
|
-
return candidate;
|
|
884
|
-
} catch {}
|
|
885
|
-
}
|
|
886
|
-
return fileURLToPath(new URL("./vitest.config.ts", import.meta.url));
|
|
887
|
-
}
|
|
888
|
-
//#endregion
|
|
889
|
-
//#region src/runtime/spawn-vitest.ts
|
|
890
|
-
const require$2 = createRequire(import.meta.url);
|
|
891
|
-
function resolveVitestBin() {
|
|
892
|
-
const pkgPath = require$2.resolve("vitest/package.json");
|
|
893
|
-
const pkg = require$2(pkgPath);
|
|
894
|
-
const binRel = typeof pkg.bin === "string" ? pkg.bin : pkg.bin?.vitest;
|
|
895
|
-
if (!binRel) throw new Error(`vitest package.json has no bin entry (resolved at ${pkgPath})`);
|
|
896
|
-
return resolve(dirname(pkgPath), binRel);
|
|
897
|
-
}
|
|
898
|
-
async function spawnVitestCaptured(args, opts = {}) {
|
|
899
|
-
const child = spawnVitestChild(args, opts, "pipe");
|
|
900
|
-
const [stdout, stderr, exitCode] = await Promise.all([
|
|
901
|
-
drain(child.stdout),
|
|
902
|
-
drain(child.stderr),
|
|
903
|
-
waitExit(child)
|
|
904
|
-
]);
|
|
905
|
-
return {
|
|
906
|
-
exitCode,
|
|
907
|
-
stdout,
|
|
908
|
-
stderr
|
|
909
|
-
};
|
|
910
|
-
}
|
|
911
|
-
async function spawnVitestTeed(args, opts = {}) {
|
|
912
|
-
const child = spawnVitestChild(args, opts, "pipe");
|
|
913
|
-
const [stdout, stderr, exitCode] = await Promise.all([
|
|
914
|
-
teeDrain(child.stdout, process.stdout),
|
|
915
|
-
teeDrain(child.stderr, process.stderr),
|
|
916
|
-
waitExit(child)
|
|
917
|
-
]);
|
|
918
|
-
return {
|
|
919
|
-
exitCode,
|
|
920
|
-
stdout,
|
|
921
|
-
stderr
|
|
922
|
-
};
|
|
923
|
-
}
|
|
924
|
-
function spawnVitestStreaming(args, opts = {}) {
|
|
925
|
-
const child = spawnVitestChild(args, opts, "pipe");
|
|
926
|
-
return {
|
|
927
|
-
child,
|
|
928
|
-
stdout: child.stdout,
|
|
929
|
-
stderr: child.stderr,
|
|
930
|
-
exited: waitExit(child)
|
|
931
|
-
};
|
|
932
|
-
}
|
|
933
|
-
function spawnVitestChild(args, opts, stdio) {
|
|
934
|
-
const vitestBin = resolveVitestBin();
|
|
935
|
-
return spawn(process.execPath, [vitestBin, ...args], {
|
|
936
|
-
cwd: opts.cwd,
|
|
937
|
-
env: opts.env ?? process.env,
|
|
938
|
-
stdio: [
|
|
939
|
-
"ignore",
|
|
940
|
-
stdio,
|
|
941
|
-
stdio
|
|
942
|
-
]
|
|
943
|
-
});
|
|
944
|
-
}
|
|
945
|
-
async function drain(stream) {
|
|
946
|
-
stream.setEncoding("utf8");
|
|
947
|
-
let buf = "";
|
|
948
|
-
for await (const chunk of stream) buf += chunk;
|
|
949
|
-
return buf;
|
|
950
|
-
}
|
|
951
|
-
async function teeDrain(stream, sink) {
|
|
952
|
-
stream.setEncoding("utf8");
|
|
953
|
-
let buf = "";
|
|
954
|
-
for await (const chunk of stream) {
|
|
955
|
-
buf += chunk;
|
|
956
|
-
sink.write(chunk);
|
|
957
|
-
}
|
|
958
|
-
return buf;
|
|
959
|
-
}
|
|
960
|
-
function waitExit(child) {
|
|
961
|
-
return new Promise((resolvePromise, rejectPromise) => {
|
|
962
|
-
child.once("exit", (code) => resolvePromise(code ?? 0));
|
|
963
|
-
child.once("error", rejectPromise);
|
|
964
|
-
});
|
|
965
|
-
}
|
|
966
|
-
//#endregion
|
|
967
1150
|
//#region src/runtime/agent-browser-bin.ts
|
|
968
|
-
const require$
|
|
1151
|
+
const require$2 = createRequire(import.meta.url);
|
|
969
1152
|
function hasAgentBrowserShim(dir) {
|
|
970
1153
|
try {
|
|
971
1154
|
statSync(join(dir, "agent-browser"));
|
|
@@ -999,10 +1182,10 @@ function findNodeModulesBin(start) {
|
|
|
999
1182
|
function resolveAgentBrowserBinDir() {
|
|
1000
1183
|
const fromCwd = findNodeModulesBin(process.cwd());
|
|
1001
1184
|
if (fromCwd) return fromCwd;
|
|
1002
|
-
const fromSelf = findNodeModulesBin(dirname(require$
|
|
1185
|
+
const fromSelf = findNodeModulesBin(dirname(require$2.resolve("agent-browser/package.json")));
|
|
1003
1186
|
if (fromSelf) return fromSelf;
|
|
1004
1187
|
try {
|
|
1005
|
-
const candidate = join(dirname(require$
|
|
1188
|
+
const candidate = join(dirname(require$2.resolve("agent-browser/package.json")), "node_modules", ".bin");
|
|
1006
1189
|
if (hasAgentBrowserShim(candidate)) return candidate;
|
|
1007
1190
|
} catch {}
|
|
1008
1191
|
return null;
|
|
@@ -1062,54 +1245,197 @@ function formatAgentBrowserUnavailableMessage() {
|
|
|
1062
1245
|
].join("\n");
|
|
1063
1246
|
}
|
|
1064
1247
|
//#endregion
|
|
1065
|
-
//#region src/runtime/
|
|
1066
|
-
const
|
|
1248
|
+
//#region src/runtime/replay-validate.ts
|
|
1249
|
+
const SHORT_TIMEOUT_MS = 5e3;
|
|
1250
|
+
const ASSERT_TIMEOUT_MS = 1e4;
|
|
1067
1251
|
/**
|
|
1068
|
-
*
|
|
1252
|
+
* Convert one recorded action into the `agent-browser` arg list that would
|
|
1253
|
+
* exercise it. Returns `null` for actions that should not be validated
|
|
1254
|
+
* (snapshot has no side effect; assert types whose codegen forms aren't
|
|
1255
|
+
* directly verifiable here fall through to the caller's `unverifiable`
|
|
1256
|
+
* fallback).
|
|
1069
1257
|
*/
|
|
1070
|
-
function
|
|
1071
|
-
|
|
1072
|
-
|
|
1258
|
+
function actionToAbArgs(action, sessionName) {
|
|
1259
|
+
const base = ["--session", sessionName];
|
|
1260
|
+
const sub = (s) => s === void 0 ? "" : resolveEnvRefs(s);
|
|
1261
|
+
switch (action.command) {
|
|
1262
|
+
case "cookies_clear": return [
|
|
1263
|
+
...base,
|
|
1264
|
+
"cookies",
|
|
1265
|
+
"clear"
|
|
1266
|
+
];
|
|
1267
|
+
case "open": return [
|
|
1268
|
+
...base,
|
|
1269
|
+
"open",
|
|
1270
|
+
sub(action.value).replace(/^["']|["']$/g, "")
|
|
1271
|
+
];
|
|
1272
|
+
case "click": return [
|
|
1273
|
+
...base,
|
|
1274
|
+
"click",
|
|
1275
|
+
sub(action.selector)
|
|
1276
|
+
];
|
|
1277
|
+
case "dblclick": return [
|
|
1278
|
+
...base,
|
|
1279
|
+
"dblclick",
|
|
1280
|
+
sub(action.selector)
|
|
1281
|
+
];
|
|
1282
|
+
case "fill":
|
|
1283
|
+
case "type": return [
|
|
1284
|
+
...base,
|
|
1285
|
+
"fill",
|
|
1286
|
+
sub(action.selector),
|
|
1287
|
+
sub(action.value)
|
|
1288
|
+
];
|
|
1289
|
+
case "check": return [
|
|
1290
|
+
...base,
|
|
1291
|
+
"check",
|
|
1292
|
+
sub(action.selector)
|
|
1293
|
+
];
|
|
1294
|
+
case "uncheck": return [
|
|
1295
|
+
...base,
|
|
1296
|
+
"uncheck",
|
|
1297
|
+
sub(action.selector)
|
|
1298
|
+
];
|
|
1299
|
+
case "press": return [
|
|
1300
|
+
...base,
|
|
1301
|
+
"press",
|
|
1302
|
+
sub(action.value)
|
|
1303
|
+
];
|
|
1304
|
+
case "select": return [
|
|
1305
|
+
...base,
|
|
1306
|
+
"select",
|
|
1307
|
+
sub(action.selector),
|
|
1308
|
+
sub(action.value)
|
|
1309
|
+
];
|
|
1310
|
+
case "hover": return [
|
|
1311
|
+
...base,
|
|
1312
|
+
"hover",
|
|
1313
|
+
sub(action.selector)
|
|
1314
|
+
];
|
|
1315
|
+
case "scroll": {
|
|
1316
|
+
const args = [action.direction ?? "down", ...action.pixels ? [action.pixels] : []];
|
|
1317
|
+
return [
|
|
1318
|
+
...base,
|
|
1319
|
+
"scroll",
|
|
1320
|
+
...args
|
|
1321
|
+
];
|
|
1322
|
+
}
|
|
1323
|
+
case "drag": return [
|
|
1324
|
+
...base,
|
|
1325
|
+
"drag",
|
|
1326
|
+
sub(action.selector),
|
|
1327
|
+
sub(action.target)
|
|
1328
|
+
];
|
|
1329
|
+
case "wait": {
|
|
1330
|
+
const raw = sub(action.selector);
|
|
1331
|
+
if (!raw) return null;
|
|
1332
|
+
if (/^\d+$/.test(raw)) return null;
|
|
1333
|
+
if (raw.startsWith("text=")) return [
|
|
1334
|
+
...base,
|
|
1335
|
+
"wait",
|
|
1336
|
+
"--text",
|
|
1337
|
+
raw.slice(5),
|
|
1338
|
+
"--timeout",
|
|
1339
|
+
String(SHORT_TIMEOUT_MS)
|
|
1340
|
+
];
|
|
1341
|
+
return [
|
|
1342
|
+
...base,
|
|
1343
|
+
"wait",
|
|
1344
|
+
raw,
|
|
1345
|
+
"--timeout",
|
|
1346
|
+
String(SHORT_TIMEOUT_MS)
|
|
1347
|
+
];
|
|
1348
|
+
}
|
|
1349
|
+
case "snapshot": return null;
|
|
1350
|
+
case "assert": return assertToAbArgs(action, sub, sessionName);
|
|
1351
|
+
}
|
|
1073
1352
|
}
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1353
|
+
function assertToAbArgs(action, sub, sessionName) {
|
|
1354
|
+
const base = ["--session", sessionName];
|
|
1355
|
+
const val = sub(action.value ?? action.observation);
|
|
1356
|
+
const sel = sub(action.selector ?? action.observation);
|
|
1357
|
+
switch (action.assertType) {
|
|
1358
|
+
case "text_visible":
|
|
1359
|
+
if (!val) return null;
|
|
1360
|
+
return [
|
|
1361
|
+
...base,
|
|
1362
|
+
"wait",
|
|
1363
|
+
"--text",
|
|
1364
|
+
val,
|
|
1365
|
+
"--timeout",
|
|
1366
|
+
String(ASSERT_TIMEOUT_MS)
|
|
1367
|
+
];
|
|
1368
|
+
case "text_not_visible": return null;
|
|
1369
|
+
case "element_visible":
|
|
1370
|
+
if (!sel) return null;
|
|
1371
|
+
return [
|
|
1372
|
+
...base,
|
|
1373
|
+
"wait",
|
|
1374
|
+
sel,
|
|
1375
|
+
"--timeout",
|
|
1376
|
+
String(ASSERT_TIMEOUT_MS)
|
|
1377
|
+
];
|
|
1378
|
+
case "element_not_visible": return null;
|
|
1379
|
+
case "url_contains": return null;
|
|
1380
|
+
case "element_enabled":
|
|
1381
|
+
case "element_disabled":
|
|
1382
|
+
case "element_checked":
|
|
1383
|
+
case "element_unchecked":
|
|
1384
|
+
if (!sel || sel.startsWith("text=") || sel.startsWith("[aria-label=")) return null;
|
|
1385
|
+
return [
|
|
1386
|
+
...base,
|
|
1387
|
+
"wait",
|
|
1388
|
+
sel,
|
|
1389
|
+
"--timeout",
|
|
1390
|
+
String(ASSERT_TIMEOUT_MS)
|
|
1391
|
+
];
|
|
1392
|
+
default: return null;
|
|
1393
|
+
}
|
|
1394
|
+
}
|
|
1395
|
+
function validateActions(actions, opts) {
|
|
1396
|
+
const kept = [];
|
|
1397
|
+
const dropped = [];
|
|
1398
|
+
let skipUntilSideEffect = false;
|
|
1399
|
+
for (let i = 0; i < actions.length; i++) {
|
|
1400
|
+
const action = actions[i];
|
|
1401
|
+
if (skipUntilSideEffect && isPassiveCommand(action.command)) {
|
|
1402
|
+
dropped.push({
|
|
1403
|
+
index: i,
|
|
1404
|
+
action,
|
|
1405
|
+
reason: "skipped after a preceding action failed"
|
|
1406
|
+
});
|
|
1407
|
+
continue;
|
|
1408
|
+
}
|
|
1409
|
+
skipUntilSideEffect = false;
|
|
1410
|
+
const args = actionToAbArgs(action, opts.sessionName);
|
|
1411
|
+
if (args === null) {
|
|
1412
|
+
kept.push(action);
|
|
1413
|
+
continue;
|
|
1414
|
+
}
|
|
1415
|
+
const result = spawnAB(args);
|
|
1416
|
+
if (result.status === 0) {
|
|
1417
|
+
kept.push(action);
|
|
1418
|
+
continue;
|
|
1419
|
+
}
|
|
1420
|
+
dropped.push({
|
|
1421
|
+
index: i,
|
|
1422
|
+
action,
|
|
1423
|
+
reason: (result.stderr.trim() || result.stdout.trim() || `agent-browser exit ${result.status ?? "?"}`).slice(0, 200)
|
|
1424
|
+
});
|
|
1425
|
+
skipUntilSideEffect = true;
|
|
1426
|
+
}
|
|
1427
|
+
return {
|
|
1428
|
+
kept,
|
|
1429
|
+
dropped
|
|
1430
|
+
};
|
|
1087
1431
|
}
|
|
1088
1432
|
/**
|
|
1089
|
-
*
|
|
1090
|
-
*
|
|
1091
|
-
*
|
|
1092
|
-
*
|
|
1093
|
-
* Returns a JavaScript string-literal expression (template literal when env
|
|
1094
|
-
* refs are present, plain string literal otherwise).
|
|
1095
|
-
*
|
|
1096
|
-
* Examples:
|
|
1097
|
-
* "${PASSWORD}" -> '`${process.env.PASSWORD ?? ""}`'
|
|
1098
|
-
* "user-${SUFFIX}@x.com" -> '`user-${process.env.SUFFIX ?? ""}@x.com`'
|
|
1099
|
-
* "literal value" -> '"literal value"'
|
|
1433
|
+
* Passive (read-only) commands whose only effect is observation. When a
|
|
1434
|
+
* preceding action fails, dropping these too is the right move because
|
|
1435
|
+
* they were trying to observe state the failed action would have set up.
|
|
1100
1436
|
*/
|
|
1101
|
-
function
|
|
1102
|
-
|
|
1103
|
-
const escaped = value.replace(/\\/g, "\\\\").replace(/`/g, "\\`").replace(/\$\{/g, (match, offset, source) => {
|
|
1104
|
-
ENV_VAR_RE.lastIndex = 0;
|
|
1105
|
-
let m;
|
|
1106
|
-
while ((m = ENV_VAR_RE.exec(source)) !== null) if (m.index === offset) return "${";
|
|
1107
|
-
return "\\${";
|
|
1108
|
-
});
|
|
1109
|
-
ENV_VAR_RE.lastIndex = 0;
|
|
1110
|
-
return `\`${escaped.replace(ENV_VAR_RE, (_, braced, plain) => {
|
|
1111
|
-
return `\${process.env.${braced ?? plain ?? ""} ?? ""}`;
|
|
1112
|
-
})}\``;
|
|
1437
|
+
function isPassiveCommand(cmd) {
|
|
1438
|
+
return cmd === "snapshot" || cmd === "wait" || cmd === "assert";
|
|
1113
1439
|
}
|
|
1114
1440
|
//#endregion
|
|
1115
1441
|
//#region src/cli/trace.ts
|
|
@@ -1129,30 +1455,35 @@ async function runTrace(featureName, specName, model) {
|
|
|
1129
1455
|
throw e;
|
|
1130
1456
|
}
|
|
1131
1457
|
await ensureCcqaDir();
|
|
1458
|
+
await warnStaleBlockArtifacts();
|
|
1132
1459
|
const spec = parseTestSpec(await readSpecFile(featureName, specName));
|
|
1133
|
-
const
|
|
1460
|
+
const expanded = expandSpec(spec, { blocks: await loadAllBlocks() });
|
|
1134
1461
|
meta("spec", spec.title);
|
|
1135
|
-
meta("
|
|
1136
|
-
|
|
1137
|
-
meta("
|
|
1462
|
+
meta("steps", expanded.length);
|
|
1463
|
+
const includes = collectIncludedBlockNames(spec);
|
|
1464
|
+
if (includes.length > 0) meta("blocks", includes.join(", "));
|
|
1138
1465
|
blank();
|
|
1139
1466
|
const sessionName = generateSessionName();
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
}
|
|
1145
|
-
const systemPrompt = buildTraceSystemPrompt(spec, {
|
|
1146
|
-
sessionName,
|
|
1147
|
-
skipCookiesClear: hasSetups
|
|
1467
|
+
const systemPrompt = buildTraceSystemPrompt({
|
|
1468
|
+
title: spec.title,
|
|
1469
|
+
steps: expanded,
|
|
1470
|
+
sessionName
|
|
1148
1471
|
});
|
|
1149
|
-
const prompt = buildTracePrompt(spec);
|
|
1472
|
+
const prompt = buildTracePrompt(spec.title);
|
|
1150
1473
|
info("Running agent-browser session...");
|
|
1151
1474
|
blank();
|
|
1152
1475
|
const routeSteps = [];
|
|
1153
1476
|
let overallStatus = "passed";
|
|
1154
1477
|
const traceActions = [];
|
|
1478
|
+
let currentStepId;
|
|
1155
1479
|
let relatedPathsBuffer = null;
|
|
1480
|
+
const withStepId = (action) => {
|
|
1481
|
+
if (!action) return null;
|
|
1482
|
+
return currentStepId ? {
|
|
1483
|
+
...action,
|
|
1484
|
+
stepId: currentStepId
|
|
1485
|
+
} : action;
|
|
1486
|
+
};
|
|
1156
1487
|
const { isError } = await invokeClaudeStreaming({
|
|
1157
1488
|
prompt,
|
|
1158
1489
|
systemPrompt,
|
|
@@ -1168,7 +1499,7 @@ async function runTrace(featureName, specName, model) {
|
|
|
1168
1499
|
},
|
|
1169
1500
|
model,
|
|
1170
1501
|
onAbAction: (abAction) => {
|
|
1171
|
-
const action = parseAbAction(abAction);
|
|
1502
|
+
const action = withStepId(parseAbAction(abAction));
|
|
1172
1503
|
if (action) traceActions.push(action);
|
|
1173
1504
|
},
|
|
1174
1505
|
onAbActionFailed: () => {
|
|
@@ -1184,10 +1515,14 @@ async function runTrace(featureName, specName, model) {
|
|
|
1184
1515
|
const idx = text.indexOf("RELATED_PATHS_BEGIN");
|
|
1185
1516
|
if (idx !== -1) relatedPathsBuffer = text.slice(idx) + "\n";
|
|
1186
1517
|
}
|
|
1187
|
-
const statusLine = parseStatusLine(text);
|
|
1188
|
-
if (statusLine) step(statusLine.type, statusLine.stepId, statusLine.detail);
|
|
1189
1518
|
for (const line of text.split("\n")) {
|
|
1190
1519
|
const trimmed = line.trim();
|
|
1520
|
+
const status = parseStatusLine(line);
|
|
1521
|
+
if (status) {
|
|
1522
|
+
if (status.type === "STEP_START" && status.stepId) currentStepId = status.stepId;
|
|
1523
|
+
step(status.type, status.stepId, status.detail);
|
|
1524
|
+
continue;
|
|
1525
|
+
}
|
|
1191
1526
|
if (trimmed.startsWith("ROUTE_STEP|")) {
|
|
1192
1527
|
const routeStep = parseRouteStep(trimmed);
|
|
1193
1528
|
if (routeStep) {
|
|
@@ -1195,24 +1530,25 @@ async function runTrace(featureName, specName, model) {
|
|
|
1195
1530
|
if (routeStep.status === "FAILED") overallStatus = "failed";
|
|
1196
1531
|
}
|
|
1197
1532
|
} else if (trimmed.startsWith("AB_ACTION|snapshot|") || trimmed.startsWith("AB_ACTION|assert|")) {
|
|
1198
|
-
const action = parseAbAction(trimmed);
|
|
1533
|
+
const action = withStepId(parseAbAction(trimmed));
|
|
1199
1534
|
if (action) traceActions.push(action);
|
|
1200
1535
|
}
|
|
1201
1536
|
}
|
|
1202
1537
|
}
|
|
1203
1538
|
});
|
|
1204
1539
|
if (isError) overallStatus = "failed";
|
|
1540
|
+
const validatedActions = validateAndReport(traceActions);
|
|
1205
1541
|
const route = {
|
|
1206
1542
|
specName,
|
|
1207
1543
|
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1208
1544
|
status: overallStatus,
|
|
1209
1545
|
steps: routeSteps
|
|
1210
1546
|
};
|
|
1211
|
-
const [routePath, actionsPath] = await Promise.all([saveRoute(featureName, specName, route), saveTraceActions(featureName, specName,
|
|
1547
|
+
const [routePath, actionsPath] = await Promise.all([saveRoute(featureName, specName, route), saveTraceActions(featureName, specName, validatedActions)]);
|
|
1212
1548
|
blank();
|
|
1213
1549
|
meta("route", routePath);
|
|
1214
1550
|
meta("saved", actionsPath);
|
|
1215
|
-
meta("actions",
|
|
1551
|
+
meta("actions", validatedActions.length);
|
|
1216
1552
|
meta("status", overallStatus.toUpperCase());
|
|
1217
1553
|
const relatedPaths = relatedPathsBuffer !== null ? parseRelatedPathsBlock(relatedPathsBuffer) : null;
|
|
1218
1554
|
if (relatedPaths !== null) {
|
|
@@ -1222,34 +1558,23 @@ async function runTrace(featureName, specName, model) {
|
|
|
1222
1558
|
hint(`run 'ccqa generate ${featureName}/${specName}' to generate a test script`);
|
|
1223
1559
|
}
|
|
1224
1560
|
/**
|
|
1225
|
-
*
|
|
1226
|
-
*
|
|
1561
|
+
* Run the post-trace replay validation and emit user-visible drop reports.
|
|
1562
|
+
* Splitting this out keeps `runTrace` readable; the function is pure aside
|
|
1563
|
+
* from `log.*` and the agent-browser invocations inside `validateActions`.
|
|
1227
1564
|
*/
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
const tmpPath = join(getSetupDir(ref.name), `_run.spec.ts`);
|
|
1238
|
-
await writeFile(tmpPath, script, "utf-8");
|
|
1239
|
-
try {
|
|
1240
|
-
const { exitCode, stdout, stderr } = await spawnVitestCaptured([
|
|
1241
|
-
"run",
|
|
1242
|
-
"--config",
|
|
1243
|
-
bundledVitestConfigPath(),
|
|
1244
|
-
tmpPath
|
|
1245
|
-
]);
|
|
1246
|
-
process.stdout.write(stdout);
|
|
1247
|
-
if (stderr) process.stderr.write(stderr);
|
|
1248
|
-
if (exitCode !== 0) throw new Error(`Setup '${ref.name}' failed (exit ${exitCode})`);
|
|
1249
|
-
} finally {
|
|
1250
|
-
await unlink(tmpPath).catch(() => {});
|
|
1251
|
-
}
|
|
1565
|
+
function validateAndReport(actions) {
|
|
1566
|
+
if (actions.length === 0) return actions;
|
|
1567
|
+
const sessionName = `${generateSessionName()}-validate`;
|
|
1568
|
+
blank();
|
|
1569
|
+
info("post-trace validation (replaying recorded actions)...");
|
|
1570
|
+
const { kept, dropped } = validateActions(actions, { sessionName });
|
|
1571
|
+
if (dropped.length === 0) {
|
|
1572
|
+
meta("validated", `${kept.length}/${actions.length} kept`);
|
|
1573
|
+
return kept;
|
|
1252
1574
|
}
|
|
1575
|
+
for (const d of dropped) warn(`dropped action #${d.index + 1} (${d.action.command}${d.action.selector ? " " + d.action.selector : ""}): ${d.reason}`);
|
|
1576
|
+
meta("validated", `${kept.length}/${actions.length} kept (${dropped.length} dropped)`);
|
|
1577
|
+
return kept;
|
|
1253
1578
|
}
|
|
1254
1579
|
function parseStatusLine(text) {
|
|
1255
1580
|
for (const line of text.split("\n")) {
|
|
@@ -1346,21 +1671,32 @@ function parseAbAction(line) {
|
|
|
1346
1671
|
}
|
|
1347
1672
|
//#endregion
|
|
1348
1673
|
//#region src/codegen/actions-to-script.ts
|
|
1349
|
-
function actionsToScript(
|
|
1674
|
+
function actionsToScript(input) {
|
|
1675
|
+
const { actions, testName, stepMarkers = [] } = input;
|
|
1350
1676
|
const parts = [...[
|
|
1351
1677
|
`import { test } from "vitest";`,
|
|
1352
1678
|
`import { spawnSync } from "node:child_process";`,
|
|
1353
|
-
`import {
|
|
1679
|
+
`import { ${[
|
|
1680
|
+
"ab",
|
|
1681
|
+
"abWait",
|
|
1682
|
+
"abAssertTextVisible",
|
|
1683
|
+
"abAssertVisible",
|
|
1684
|
+
"abAssertNotVisible",
|
|
1685
|
+
"abAssertUrl",
|
|
1686
|
+
"abAssertEnabled",
|
|
1687
|
+
"abAssertDisabled",
|
|
1688
|
+
"abAssertChecked",
|
|
1689
|
+
"abAssertUnchecked"
|
|
1690
|
+
].join(", ")} } from "ccqa/test-helpers";`,
|
|
1354
1691
|
"",
|
|
1355
|
-
`// Single session shared across
|
|
1356
|
-
`//
|
|
1357
|
-
`//
|
|
1692
|
+
`// Single session shared across the run. Use ||= so an outer harness`,
|
|
1693
|
+
`// (e.g. ccqa generate's auto-fix loop) can pre-set the session name`,
|
|
1694
|
+
`// and inspect the same session after the run finishes.`,
|
|
1358
1695
|
`process.env.AGENT_BROWSER_SESSION ||= \`ccqa-run-\${Date.now()}\`;`,
|
|
1359
1696
|
""
|
|
1360
1697
|
]];
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
parts.push(`test(${JSON.stringify(title)}, () => {`, body, "}, 5 * 60 * 1000);", "");
|
|
1698
|
+
const body = actionsToLines(actions, stepMarkers).map((l) => ` ${l}`).join("\n");
|
|
1699
|
+
parts.push(`test(${JSON.stringify(testName)}, () => {`, body, "}, 5 * 60 * 1000);", "");
|
|
1364
1700
|
return parts.join("\n");
|
|
1365
1701
|
}
|
|
1366
1702
|
/** Commands that interact with page elements and need the page to be loaded */
|
|
@@ -1375,11 +1711,18 @@ const ELEMENT_COMMANDS = new Set([
|
|
|
1375
1711
|
"hover",
|
|
1376
1712
|
"drag"
|
|
1377
1713
|
]);
|
|
1378
|
-
function actionsToLines(actions) {
|
|
1714
|
+
function actionsToLines(actions, stepMarkers) {
|
|
1379
1715
|
const lines = [];
|
|
1380
1716
|
let prevLine = null;
|
|
1381
1717
|
let prevCommand = null;
|
|
1382
|
-
|
|
1718
|
+
const markerByIndex = new Map(stepMarkers.map((m) => [m.actionIndex, m]));
|
|
1719
|
+
for (let i = 0; i < actions.length; i++) {
|
|
1720
|
+
const marker = markerByIndex.get(i);
|
|
1721
|
+
if (marker) {
|
|
1722
|
+
if (lines.length > 0) lines.push("");
|
|
1723
|
+
lines.push(`// step: ${marker.stepId} [${marker.source}]`);
|
|
1724
|
+
}
|
|
1725
|
+
const action = actions[i];
|
|
1383
1726
|
const line = actionToLine(action);
|
|
1384
1727
|
if (line === null) continue;
|
|
1385
1728
|
if (line === prevLine) continue;
|
|
@@ -1398,16 +1741,16 @@ function actionToLine(action) {
|
|
|
1398
1741
|
if ("selector" in action && isRefSelector(action.selector)) return null;
|
|
1399
1742
|
switch (action.command) {
|
|
1400
1743
|
case "cookies_clear": return `ab("cookies", "clear");`;
|
|
1401
|
-
case "open": return `ab("open", ${
|
|
1744
|
+
case "open": return `ab("open", ${jExpr((action.value ?? "").replace(/^["']|["']$/g, ""))});`;
|
|
1402
1745
|
case "snapshot": return action.observation ? `// ${action.observation}` : null;
|
|
1403
1746
|
case "click": return `ab("click", ${j(action.selector)});`;
|
|
1404
1747
|
case "dblclick": return `ab("dblclick", ${j(action.selector)});`;
|
|
1405
|
-
case "fill": return `ab("fill", ${j(action.selector)}, ${
|
|
1406
|
-
case "type": return `ab("fill", ${j(action.selector)}, ${
|
|
1748
|
+
case "fill": return `ab("fill", ${j(action.selector)}, ${jExpr(action.value)});`;
|
|
1749
|
+
case "type": return `ab("fill", ${j(action.selector)}, ${jExpr(action.value)});`;
|
|
1407
1750
|
case "check": return `ab("check", ${j(action.selector)});`;
|
|
1408
1751
|
case "uncheck": return `ab("uncheck", ${j(action.selector)});`;
|
|
1409
|
-
case "press": return `ab("press", ${
|
|
1410
|
-
case "select": return `ab("select", ${j(action.selector)}, ${
|
|
1752
|
+
case "press": return `ab("press", ${jExpr(action.value)});`;
|
|
1753
|
+
case "select": return `ab("select", ${j(action.selector)}, ${jExpr(action.value)});`;
|
|
1411
1754
|
case "hover": return `ab("hover", ${j(action.selector)});`;
|
|
1412
1755
|
case "scroll": return `ab("scroll", ${[action.direction ?? "down", ...action.pixels ? [action.pixels] : []].map(j).join(", ")});`;
|
|
1413
1756
|
case "drag": return `ab("drag", ${j(action.selector)}, ${j(action.target)});`;
|
|
@@ -1423,10 +1766,10 @@ function actionToLine(action) {
|
|
|
1423
1766
|
let assertLine = null;
|
|
1424
1767
|
switch (action.assertType) {
|
|
1425
1768
|
case "text_visible":
|
|
1426
|
-
if (val) assertLine = `abAssertTextVisible(${
|
|
1769
|
+
if (val) assertLine = `abAssertTextVisible(${jExpr(val)});`;
|
|
1427
1770
|
break;
|
|
1428
1771
|
case "text_not_visible":
|
|
1429
|
-
if (val) assertLine = `abAssertNotVisible(${
|
|
1772
|
+
if (val) assertLine = `abAssertNotVisible(${jExpr("text=" + val)}, 180_000);`;
|
|
1430
1773
|
break;
|
|
1431
1774
|
case "element_visible":
|
|
1432
1775
|
if (sel) assertLine = `abAssertVisible(${j(sel)});`;
|
|
@@ -1435,7 +1778,7 @@ function actionToLine(action) {
|
|
|
1435
1778
|
if (sel) assertLine = `abAssertNotVisible(${j(sel)});`;
|
|
1436
1779
|
break;
|
|
1437
1780
|
case "url_contains":
|
|
1438
|
-
if (val) assertLine = `abAssertUrl(${
|
|
1781
|
+
if (val) assertLine = `abAssertUrl(${jExpr(val)});`;
|
|
1439
1782
|
break;
|
|
1440
1783
|
case "element_enabled":
|
|
1441
1784
|
if (sel && !sel.startsWith("text=") && !sel.startsWith("[aria-label=")) assertLine = `abAssertEnabled(${j(sel)});`;
|
|
@@ -1458,6 +1801,14 @@ function actionToLine(action) {
|
|
|
1458
1801
|
}
|
|
1459
1802
|
/** JSON.stringify — produces a quoted string literal safe for embedding in TS source. */
|
|
1460
1803
|
const j = (s) => JSON.stringify(s);
|
|
1804
|
+
/**
|
|
1805
|
+
* Like `j`, but recognises `$VAR` / `${VAR}` env-ref forms in the value and
|
|
1806
|
+
* emits them as `${process.env.VAR ?? ""}` template-literal substitutions
|
|
1807
|
+
* instead of baking the literal `$VAR` string into the script. Used for
|
|
1808
|
+
* values that came from a spec or block param: form fills, opened URLs,
|
|
1809
|
+
* assertion texts/URLs.
|
|
1810
|
+
*/
|
|
1811
|
+
const jExpr = (s) => envRefsToJsExpression(s);
|
|
1461
1812
|
//#endregion
|
|
1462
1813
|
//#region src/prompts/codegen.ts
|
|
1463
1814
|
function buildCleanupPrompt(actions) {
|
|
@@ -1490,6 +1841,109 @@ ${actions.map((a, i) => {
|
|
|
1490
1841
|
}).join("\n")}`;
|
|
1491
1842
|
}
|
|
1492
1843
|
//#endregion
|
|
1844
|
+
//#region src/codegen/cleanup.ts
|
|
1845
|
+
/**
|
|
1846
|
+
* Best-effort cleanup of a recorded action list. Hands the actions to
|
|
1847
|
+
* Claude with the cleanup prompt and parses the returned JSON array; on
|
|
1848
|
+
* any failure (Claude error, malformed JSON, empty array) falls back to
|
|
1849
|
+
* the original input so the caller can always proceed.
|
|
1850
|
+
*
|
|
1851
|
+
* Note: the prompt deliberately does not surface the `stepId` field.
|
|
1852
|
+
* Callers that need to preserve stepIds across cleanup (only `ccqa generate`
|
|
1853
|
+
* today) must re-attach them after this returns.
|
|
1854
|
+
*/
|
|
1855
|
+
async function cleanupActions$1(actions, model) {
|
|
1856
|
+
try {
|
|
1857
|
+
const { result, isError } = await invokeClaudeStreaming({
|
|
1858
|
+
prompt: buildCleanupPrompt(actions),
|
|
1859
|
+
disableBuiltinTools: true,
|
|
1860
|
+
maxTurns: 1,
|
|
1861
|
+
model
|
|
1862
|
+
}, () => {});
|
|
1863
|
+
if (isError || !result) return actions;
|
|
1864
|
+
const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
|
|
1865
|
+
const parsed = JSON.parse(json);
|
|
1866
|
+
if (Array.isArray(parsed) && parsed.length > 0) return parsed;
|
|
1867
|
+
} catch {}
|
|
1868
|
+
return actions;
|
|
1869
|
+
}
|
|
1870
|
+
//#endregion
|
|
1871
|
+
//#region src/runtime/bundled-config.ts
|
|
1872
|
+
const CANDIDATES = [
|
|
1873
|
+
"../runtime/vitest.config.mjs",
|
|
1874
|
+
"./vitest.config.mjs",
|
|
1875
|
+
"./vitest.config.ts"
|
|
1876
|
+
];
|
|
1877
|
+
function bundledVitestConfigPath() {
|
|
1878
|
+
for (const rel of CANDIDATES) {
|
|
1879
|
+
const candidate = fileURLToPath(new URL(rel, import.meta.url));
|
|
1880
|
+
try {
|
|
1881
|
+
accessSync(candidate);
|
|
1882
|
+
return candidate;
|
|
1883
|
+
} catch {}
|
|
1884
|
+
}
|
|
1885
|
+
return fileURLToPath(new URL("./vitest.config.ts", import.meta.url));
|
|
1886
|
+
}
|
|
1887
|
+
//#endregion
|
|
1888
|
+
//#region src/runtime/spawn-vitest.ts
|
|
1889
|
+
const require$1 = createRequire(import.meta.url);
|
|
1890
|
+
function resolveVitestBin() {
|
|
1891
|
+
const pkgPath = require$1.resolve("vitest/package.json");
|
|
1892
|
+
const pkg = require$1(pkgPath);
|
|
1893
|
+
const binRel = typeof pkg.bin === "string" ? pkg.bin : pkg.bin?.vitest;
|
|
1894
|
+
if (!binRel) throw new Error(`vitest package.json has no bin entry (resolved at ${pkgPath})`);
|
|
1895
|
+
return resolve(dirname(pkgPath), binRel);
|
|
1896
|
+
}
|
|
1897
|
+
async function spawnVitestTeed(args, opts = {}) {
|
|
1898
|
+
const child = spawnVitestChild(args, opts, "pipe");
|
|
1899
|
+
const [stdout, stderr, exitCode] = await Promise.all([
|
|
1900
|
+
teeDrain(child.stdout, process.stdout),
|
|
1901
|
+
teeDrain(child.stderr, process.stderr),
|
|
1902
|
+
waitExit(child)
|
|
1903
|
+
]);
|
|
1904
|
+
return {
|
|
1905
|
+
exitCode,
|
|
1906
|
+
stdout,
|
|
1907
|
+
stderr
|
|
1908
|
+
};
|
|
1909
|
+
}
|
|
1910
|
+
function spawnVitestStreaming(args, opts = {}) {
|
|
1911
|
+
const child = spawnVitestChild(args, opts, "pipe");
|
|
1912
|
+
return {
|
|
1913
|
+
child,
|
|
1914
|
+
stdout: child.stdout,
|
|
1915
|
+
stderr: child.stderr,
|
|
1916
|
+
exited: waitExit(child)
|
|
1917
|
+
};
|
|
1918
|
+
}
|
|
1919
|
+
function spawnVitestChild(args, opts, stdio) {
|
|
1920
|
+
const vitestBin = resolveVitestBin();
|
|
1921
|
+
return spawn(process.execPath, [vitestBin, ...args], {
|
|
1922
|
+
cwd: opts.cwd,
|
|
1923
|
+
env: opts.env ?? process.env,
|
|
1924
|
+
stdio: [
|
|
1925
|
+
"ignore",
|
|
1926
|
+
stdio,
|
|
1927
|
+
stdio
|
|
1928
|
+
]
|
|
1929
|
+
});
|
|
1930
|
+
}
|
|
1931
|
+
async function teeDrain(stream, sink) {
|
|
1932
|
+
stream.setEncoding("utf8");
|
|
1933
|
+
let buf = "";
|
|
1934
|
+
for await (const chunk of stream) {
|
|
1935
|
+
buf += chunk;
|
|
1936
|
+
sink.write(chunk);
|
|
1937
|
+
}
|
|
1938
|
+
return buf;
|
|
1939
|
+
}
|
|
1940
|
+
function waitExit(child) {
|
|
1941
|
+
return new Promise((resolvePromise, rejectPromise) => {
|
|
1942
|
+
child.once("exit", (code) => resolvePromise(code ?? 0));
|
|
1943
|
+
child.once("error", rejectPromise);
|
|
1944
|
+
});
|
|
1945
|
+
}
|
|
1946
|
+
//#endregion
|
|
1493
1947
|
//#region src/diagnose/apply.ts
|
|
1494
1948
|
function applyDiagnosis(script, diagnosis) {
|
|
1495
1949
|
switch (diagnosis.type) {
|
|
@@ -1540,6 +1994,7 @@ function applyTiming(script, fixes) {
|
|
|
1540
1994
|
summary: summary.join("; ")
|
|
1541
1995
|
};
|
|
1542
1996
|
}
|
|
1997
|
+
const REMOVABLE_ASSERT_RE = /\b(?:abAssert\w*|abWait)\b/;
|
|
1543
1998
|
function applyOverAssertion(script, lineNumbers) {
|
|
1544
1999
|
if (lineNumbers.length === 0) return {
|
|
1545
2000
|
applied: false,
|
|
@@ -1552,13 +2007,13 @@ function applyOverAssertion(script, lineNumbers) {
|
|
|
1552
2007
|
const idx = line - 1;
|
|
1553
2008
|
if (idx < 0 || idx >= lines.length) continue;
|
|
1554
2009
|
const content = lines[idx];
|
|
1555
|
-
if (
|
|
2010
|
+
if (!REMOVABLE_ASSERT_RE.test(content)) continue;
|
|
1556
2011
|
removed.push(`line ${line}: ${content.trim()}`);
|
|
1557
2012
|
lines.splice(idx, 1);
|
|
1558
2013
|
}
|
|
1559
2014
|
if (removed.length === 0) return {
|
|
1560
2015
|
applied: false,
|
|
1561
|
-
reason: "no abAssert lines matched the proposed line numbers"
|
|
2016
|
+
reason: "no abAssert/abWait lines matched the proposed line numbers"
|
|
1562
2017
|
};
|
|
1563
2018
|
return {
|
|
1564
2019
|
applied: true,
|
|
@@ -1604,7 +2059,7 @@ function previewDiff(before, after) {
|
|
|
1604
2059
|
//#endregion
|
|
1605
2060
|
//#region src/diagnose/prompt.ts
|
|
1606
2061
|
function buildDiagnosePrompt(input) {
|
|
1607
|
-
const { script,
|
|
2062
|
+
const { script, specYaml, actions, failureLog, pageSnapshot, outputLanguage = "en" } = input;
|
|
1608
2063
|
const numbered = script.split("\n").map((l, i) => `${i + 1}: ${l}`).join("\n");
|
|
1609
2064
|
return `You are diagnosing a failing E2E test. The test was generated from a recorded trace of the original interaction. Compare the failing run against the original spec and recorded actions to determine WHY the test failed and what the right fix is.
|
|
1610
2065
|
|
|
@@ -1695,11 +2150,11 @@ Pick exactly ONE category. The output JSON must follow the shape for that catego
|
|
|
1695
2150
|
- Your **final** assistant message must start with \`{\` and end with \`}\` — a single JSON object, nothing before or after. No prose preamble like "Confirmed: ...", no markdown fences, no commentary, no tool calls in the same turn. If you have an analysis sentence, put it in the \`reasoning\` field.
|
|
1696
2151
|
- Line numbers refer to the numbered test script below (1-based).
|
|
1697
2152
|
- For SELECTOR_DRIFT, \`oldSelector\` must match a substring of the script at that line; \`newSelector\` must be backed by a concrete file:line you read with Grep/Read (do not invent). Cite the evidence in \`reasoning\`.
|
|
1698
|
-
- For OVER_ASSERTION, only include lines that contain assert calls (\`abAssert*\`).
|
|
1699
|
-
- Cross-check assertions against the spec
|
|
2153
|
+
- For OVER_ASSERTION, only include lines that contain assert calls (\`abAssert*\`) or existence-checking waits (\`abWait\`); a recorded \`abWait("[selector]")\` is an implicit existence assertion and a valid removal candidate when the spec never required that element to be present.
|
|
2154
|
+
- Cross-check assertions against the spec YAML. If the spec doesn't require the assertion, OVER_ASSERTION is the better diagnosis than SELECTOR_DRIFT.
|
|
1700
2155
|
|
|
1701
|
-
## Test Spec (
|
|
1702
|
-
${
|
|
2156
|
+
## Test Spec (spec.yaml)
|
|
2157
|
+
${specYaml}
|
|
1703
2158
|
|
|
1704
2159
|
## Recorded Actions (actions.json summary)
|
|
1705
2160
|
${actions.map((a, i) => {
|
|
@@ -1910,8 +2365,7 @@ function normaliseSleepFixes(raw) {
|
|
|
1910
2365
|
const line = typeof item["line"] === "number" ? item["line"] : null;
|
|
1911
2366
|
if (line === null) continue;
|
|
1912
2367
|
const reason = typeof item["reason"] === "string" ? item["reason"] : "";
|
|
1913
|
-
|
|
1914
|
-
if (kind === "insert" || typeof item["seconds"] === "number" && item["increase_to"] === void 0) {
|
|
2368
|
+
if (item["kind"] === "insert") {
|
|
1915
2369
|
const seconds = typeof item["seconds"] === "number" ? item["seconds"] : null;
|
|
1916
2370
|
if (seconds === null) continue;
|
|
1917
2371
|
out.push({
|
|
@@ -1920,9 +2374,7 @@ function normaliseSleepFixes(raw) {
|
|
|
1920
2374
|
seconds,
|
|
1921
2375
|
reason
|
|
1922
2376
|
});
|
|
1923
|
-
|
|
1924
|
-
}
|
|
1925
|
-
if (kind === "increase" || typeof item["increase_to"] === "number") {
|
|
2377
|
+
} else if (item["kind"] === "increase") {
|
|
1926
2378
|
const increaseTo = typeof item["increase_to"] === "number" ? item["increase_to"] : null;
|
|
1927
2379
|
if (increaseTo === null) continue;
|
|
1928
2380
|
out.push({
|
|
@@ -1931,7 +2383,6 @@ function normaliseSleepFixes(raw) {
|
|
|
1931
2383
|
increase_to: increaseTo,
|
|
1932
2384
|
reason
|
|
1933
2385
|
});
|
|
1934
|
-
continue;
|
|
1935
2386
|
}
|
|
1936
2387
|
}
|
|
1937
2388
|
return out;
|
|
@@ -2116,7 +2567,7 @@ const DEFAULT_CONFIDENCE_THRESHOLD = .8;
|
|
|
2116
2567
|
* or the diagnose loop chose to bail out early.
|
|
2117
2568
|
*/
|
|
2118
2569
|
async function runAutoFixLoop(input) {
|
|
2119
|
-
const { scriptPath, initialRun,
|
|
2570
|
+
const { scriptPath, initialRun, specYaml, actions, maxRetries, mode, runVitest, agentBrowserSession, outputLanguage, model } = input;
|
|
2120
2571
|
let { exitCode, output, currentScript } = initialRun;
|
|
2121
2572
|
if (exitCode === 0) return true;
|
|
2122
2573
|
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
@@ -2127,7 +2578,7 @@ async function runAutoFixLoop(input) {
|
|
|
2127
2578
|
else fix("page snapshot unavailable; continuing without it");
|
|
2128
2579
|
const fixed = await diagnoseAndFix({
|
|
2129
2580
|
script: currentScript,
|
|
2130
|
-
|
|
2581
|
+
specYaml,
|
|
2131
2582
|
actions,
|
|
2132
2583
|
failureLog: output,
|
|
2133
2584
|
pageSnapshot: pageSnapshot ?? void 0,
|
|
@@ -2148,10 +2599,10 @@ async function runAutoFixLoop(input) {
|
|
|
2148
2599
|
return false;
|
|
2149
2600
|
}
|
|
2150
2601
|
async function diagnoseAndFix(input) {
|
|
2151
|
-
const { script,
|
|
2602
|
+
const { script, specYaml, actions, failureLog, pageSnapshot, mode, outputLanguage, model } = input;
|
|
2152
2603
|
const outcome = await timedPhase("diagnose", () => diagnose({
|
|
2153
2604
|
script,
|
|
2154
|
-
|
|
2605
|
+
specYaml,
|
|
2155
2606
|
actions,
|
|
2156
2607
|
failureLog,
|
|
2157
2608
|
pageSnapshot,
|
|
@@ -2186,7 +2637,7 @@ async function diagnoseAndFix(input) {
|
|
|
2186
2637
|
return apply.script;
|
|
2187
2638
|
}
|
|
2188
2639
|
if (decision === "skip-low-confidence") {
|
|
2189
|
-
fix(`confidence ${result.confidence.toFixed(2)} below threshold ${DEFAULT_CONFIDENCE_THRESHOLD}; skipping (
|
|
2640
|
+
fix(`confidence ${result.confidence.toFixed(2)} below threshold ${DEFAULT_CONFIDENCE_THRESHOLD}; skipping (mode: ${mode})`);
|
|
2190
2641
|
handoffToUser(result, outcome.raw, outputLanguage);
|
|
2191
2642
|
return null;
|
|
2192
2643
|
}
|
|
@@ -2210,10 +2661,15 @@ async function diagnoseAndFix(input) {
|
|
|
2210
2661
|
process.exit(1);
|
|
2211
2662
|
}
|
|
2212
2663
|
}
|
|
2664
|
+
/**
|
|
2665
|
+
* Map a diagnosis to one of three actions. `auto` previously bypassed the
|
|
2666
|
+
* confidence threshold; it no longer does — a low-confidence guess can
|
|
2667
|
+
* corrupt working code, and CI wants "apply obvious fixes, fail loudly on
|
|
2668
|
+
* the rest" rather than "apply every guess".
|
|
2669
|
+
*/
|
|
2213
2670
|
function decide(result, mode) {
|
|
2214
|
-
if (mode === "auto") return "apply-auto";
|
|
2215
2671
|
const highConfidence = result.confidence >= DEFAULT_CONFIDENCE_THRESHOLD;
|
|
2216
|
-
if (mode === "non-interactive") return highConfidence ? "apply-auto" : "skip-low-confidence";
|
|
2672
|
+
if (mode === "auto" || mode === "non-interactive") return highConfidence ? "apply-auto" : "skip-low-confidence";
|
|
2217
2673
|
return highConfidence ? "apply-auto" : "interactive";
|
|
2218
2674
|
}
|
|
2219
2675
|
function reportDiagnosis(result) {
|
|
@@ -2250,27 +2706,27 @@ function handoffMessage(diagnosis, language) {
|
|
|
2250
2706
|
}
|
|
2251
2707
|
function handoffEn(diagnosis) {
|
|
2252
2708
|
switch (diagnosis.type) {
|
|
2253
|
-
case "DATA_MISSING": return [`application-side issue: required data is missing. ${diagnosis.reason}`, "next step: seed the data (or update
|
|
2709
|
+
case "DATA_MISSING": return [`application-side issue: required data is missing. ${diagnosis.reason}`, "next step: seed the data (or update spec.yaml prerequisites), then re-run trace + generate."];
|
|
2254
2710
|
case "UNKNOWN": return [`could not classify the failure. ${diagnosis.reason}`, "next step: read the failure log above, decide whether the test or the app is wrong, and fix manually. consider re-running ccqa trace if the recorded flow no longer matches the live app."];
|
|
2255
2711
|
case "SELECTOR_DRIFT": return [
|
|
2256
2712
|
`selector likely drifted but auto-apply was not safe.`,
|
|
2257
2713
|
`proposed: line ${diagnosis.line}: "${diagnosis.oldSelector}" → "${diagnosis.newSelector}" (${diagnosis.reason}).`,
|
|
2258
2714
|
"next step: confirm in the live app and either accept the proposal manually, or re-run ccqa trace to recapture the new selector."
|
|
2259
2715
|
];
|
|
2260
|
-
case "OVER_ASSERTION": return [`assertion may not be required by the spec. lines: ${diagnosis.lines.join(", ")} (${diagnosis.reason}).`, "next step: cross-check
|
|
2716
|
+
case "OVER_ASSERTION": return [`assertion may not be required by the spec. lines: ${diagnosis.lines.join(", ")} (${diagnosis.reason}).`, "next step: cross-check spec.yaml. either delete the assertion from the test, or tighten the spec to require it."];
|
|
2261
2717
|
case "TIMING_ISSUE": return [`timing fix proposed but couldn't be applied automatically.`, "next step: insert a sleep manually before the failing line, or re-run with a higher confidence trace."];
|
|
2262
2718
|
}
|
|
2263
2719
|
}
|
|
2264
2720
|
function handoffJa(diagnosis) {
|
|
2265
2721
|
switch (diagnosis.type) {
|
|
2266
|
-
case "DATA_MISSING": return [`アプリ側の問題: 必要なデータが不足しています。${diagnosis.reason}`, "次のステップ: データを seed する(または
|
|
2722
|
+
case "DATA_MISSING": return [`アプリ側の問題: 必要なデータが不足しています。${diagnosis.reason}`, "次のステップ: データを seed する(または spec.yaml の prerequisites を更新)してから ccqa trace + generate をやり直してください。"];
|
|
2267
2723
|
case "UNKNOWN": return [`失敗を分類できませんでした。${diagnosis.reason}`, "次のステップ: 上の失敗ログを確認し、テストとアプリのどちらが原因か判断して手動で修正してください。記録した手順がアプリの現状と合わない場合は ccqa trace の再実行を検討してください。"];
|
|
2268
2724
|
case "SELECTOR_DRIFT": return [
|
|
2269
2725
|
"selector が変わった可能性が高いですが、自動適用は安全でないと判断しました。",
|
|
2270
2726
|
`提案: 行 ${diagnosis.line}: "${diagnosis.oldSelector}" → "${diagnosis.newSelector}" (${diagnosis.reason})`,
|
|
2271
2727
|
"次のステップ: アプリで新 selector を確認し、手動で適用するか ccqa trace をやり直して新しい selector を取り直してください。"
|
|
2272
2728
|
];
|
|
2273
|
-
case "OVER_ASSERTION": return [`spec が要求していない assertion の可能性があります。対象行: ${diagnosis.lines.join(", ")} (${diagnosis.reason})`, "次のステップ:
|
|
2729
|
+
case "OVER_ASSERTION": return [`spec が要求していない assertion の可能性があります。対象行: ${diagnosis.lines.join(", ")} (${diagnosis.reason})`, "次のステップ: spec.yaml と照合して、テスト側の assertion を削るか、spec 側を更新してください。"];
|
|
2274
2730
|
case "TIMING_ISSUE": return ["timing 関連の修正案は出ましたが、自動適用できませんでした。", "次のステップ: 失敗行の前に手動で sleep を入れるか、より信頼度の高い trace を取り直してください。"];
|
|
2275
2731
|
}
|
|
2276
2732
|
}
|
|
@@ -2306,18 +2762,24 @@ async function runGenerate(featureName, specName, maxRetries, mode, force, useSn
|
|
|
2306
2762
|
meta("actions", actions.length);
|
|
2307
2763
|
const specContent = await readSpecFile(featureName, specName);
|
|
2308
2764
|
const spec = parseTestSpec(specContent);
|
|
2309
|
-
const
|
|
2310
|
-
|
|
2765
|
+
const expanded = expandSpec(spec, { blocks: await loadAllBlocks() });
|
|
2766
|
+
await warnStaleBlockArtifacts();
|
|
2767
|
+
meta("steps", expanded.length);
|
|
2311
2768
|
meta("fix-mode", mode);
|
|
2312
2769
|
meta("language", outputLanguage);
|
|
2313
2770
|
blank();
|
|
2314
|
-
const cleanedActions = await cleanupActions
|
|
2771
|
+
const cleanedActions = await cleanupActions(actions, model);
|
|
2315
2772
|
if (cleanedActions.length !== actions.length) meta("cleaned", cleanedActions.length);
|
|
2316
|
-
const
|
|
2773
|
+
const markers = buildStepMarkers(expanded, cleanedActions);
|
|
2774
|
+
const scriptPath = await saveTestScript(featureName, specName, actionsToScript({
|
|
2775
|
+
actions: cleanedActions,
|
|
2776
|
+
testName: spec.title,
|
|
2777
|
+
stepMarkers: markers
|
|
2778
|
+
}));
|
|
2317
2779
|
meta("saved", scriptPath);
|
|
2318
2780
|
blank();
|
|
2319
2781
|
const agentBrowserSession = useSnapshot ? `ccqa-generate-${Date.now()}` : void 0;
|
|
2320
|
-
const runVitestForSession = (path) => runVitest
|
|
2782
|
+
const runVitestForSession = (path) => runVitest(path, agentBrowserSession);
|
|
2321
2783
|
let signalHandler = null;
|
|
2322
2784
|
if (agentBrowserSession) {
|
|
2323
2785
|
await closeSession(agentBrowserSession);
|
|
@@ -2336,7 +2798,7 @@ async function runGenerate(featureName, specName, maxRetries, mode, force, useSn
|
|
|
2336
2798
|
if (await runAutoFixLoop({
|
|
2337
2799
|
scriptPath,
|
|
2338
2800
|
initialRun,
|
|
2339
|
-
|
|
2801
|
+
specYaml: specContent,
|
|
2340
2802
|
actions: cleanedActions,
|
|
2341
2803
|
maxRetries,
|
|
2342
2804
|
mode,
|
|
@@ -2358,6 +2820,30 @@ async function runGenerate(featureName, specName, maxRetries, mode, force, useSn
|
|
|
2358
2820
|
if (agentBrowserSession) await closeSession(agentBrowserSession);
|
|
2359
2821
|
}
|
|
2360
2822
|
}
|
|
2823
|
+
/**
|
|
2824
|
+
* Build the per-step markers consumed by `actionsToScript`. Each action's
|
|
2825
|
+
* `stepId` (assigned at trace time from the last `STEP_START|...` line)
|
|
2826
|
+
* groups contiguous actions; we emit one marker at the first action of
|
|
2827
|
+
* each contiguous run. Unknown step ids are skipped rather than mis-labelled.
|
|
2828
|
+
*/
|
|
2829
|
+
function buildStepMarkers(steps, actions) {
|
|
2830
|
+
const stepById = new Map(steps.map((s) => [s.id, s]));
|
|
2831
|
+
const markers = [];
|
|
2832
|
+
let lastEmittedStepId = null;
|
|
2833
|
+
for (let i = 0; i < actions.length; i++) {
|
|
2834
|
+
const id = actions[i].stepId;
|
|
2835
|
+
if (!id || id === lastEmittedStepId) continue;
|
|
2836
|
+
const step = stepById.get(id);
|
|
2837
|
+
if (!step) continue;
|
|
2838
|
+
markers.push({
|
|
2839
|
+
actionIndex: i,
|
|
2840
|
+
stepId: step.id,
|
|
2841
|
+
source: step.source
|
|
2842
|
+
});
|
|
2843
|
+
lastEmittedStepId = id;
|
|
2844
|
+
}
|
|
2845
|
+
return markers;
|
|
2846
|
+
}
|
|
2361
2847
|
async function confirmOverwrite(path) {
|
|
2362
2848
|
if (!process.stdin.isTTY) {
|
|
2363
2849
|
warn(`${path} exists and stdin is not a TTY; refusing to overwrite. Pass --force to allow.`);
|
|
@@ -2377,67 +2863,7 @@ async function confirmOverwrite(path) {
|
|
|
2377
2863
|
rl.close();
|
|
2378
2864
|
}
|
|
2379
2865
|
}
|
|
2380
|
-
async function
|
|
2381
|
-
if (!setups?.length) return [];
|
|
2382
|
-
const result = [];
|
|
2383
|
-
for (const ref of setups) {
|
|
2384
|
-
const scriptPath = join(getSetupDir(ref.name), "test.spec.ts");
|
|
2385
|
-
const resolved = replacePlaceholders(extractTestBody(await readFile(scriptPath, "utf-8").catch(() => {
|
|
2386
|
-
throw new Error(`Setup test script not found: ${scriptPath}. Run \`ccqa generate-setup ${ref.name}\` first.`);
|
|
2387
|
-
})), ref.params ?? {});
|
|
2388
|
-
result.push({
|
|
2389
|
-
name: ref.name,
|
|
2390
|
-
body: resolved
|
|
2391
|
-
});
|
|
2392
|
-
}
|
|
2393
|
-
return result;
|
|
2394
|
-
}
|
|
2395
|
-
/**
|
|
2396
|
-
* Extract the test body (statements inside the test callback) from a setup
|
|
2397
|
-
* test script.
|
|
2398
|
-
*
|
|
2399
|
-
* Locates the first arrow callback (`=> {`) after a top-level `test(` call
|
|
2400
|
-
* and returns the text between the matching `{` and `}`. Handles both
|
|
2401
|
-
* single-line and multi-line `test(...)` formatting (the latter is what
|
|
2402
|
-
* prettier produces).
|
|
2403
|
-
*
|
|
2404
|
-
* Brace tracking is naive (string/regex/comment literals are not parsed
|
|
2405
|
-
* specially), but setup test scripts are themselves generated by ccqa and
|
|
2406
|
-
* follow a fixed shape, so this is sufficient in practice.
|
|
2407
|
-
*/
|
|
2408
|
-
function extractTestBody(script) {
|
|
2409
|
-
const testCallMatch = /\btest\s*\(/.exec(script);
|
|
2410
|
-
if (!testCallMatch) return "";
|
|
2411
|
-
const arrowIdx = script.indexOf("=> {", testCallMatch.index);
|
|
2412
|
-
if (arrowIdx === -1) return "";
|
|
2413
|
-
const bodyStart = arrowIdx + 4;
|
|
2414
|
-
let depth = 1;
|
|
2415
|
-
let i = bodyStart;
|
|
2416
|
-
for (; i < script.length; i++) {
|
|
2417
|
-
const ch = script[i];
|
|
2418
|
-
if (ch === "{") depth++;
|
|
2419
|
-
else if (ch === "}") {
|
|
2420
|
-
depth--;
|
|
2421
|
-
if (depth === 0) break;
|
|
2422
|
-
}
|
|
2423
|
-
}
|
|
2424
|
-
if (depth !== 0) return "";
|
|
2425
|
-
return script.slice(bodyStart, i).replace(/^\n/, "").replace(/\n\s*$/, "");
|
|
2426
|
-
}
|
|
2427
|
-
function replacePlaceholders(body, params) {
|
|
2428
|
-
let result = body;
|
|
2429
|
-
for (const [key, value] of Object.entries(params)) if (hasEnvRef(value)) {
|
|
2430
|
-
const expr = envRefsToJsExpression(value);
|
|
2431
|
-
const re = new RegExp(`(["'])\\{\\{${escapeRegExp(key)}\\}\\}\\1`, "g");
|
|
2432
|
-
result = result.replace(re, expr);
|
|
2433
|
-
result = result.replaceAll(`{{${key}}}`, value);
|
|
2434
|
-
} else result = result.replaceAll(`{{${key}}}`, value);
|
|
2435
|
-
return result;
|
|
2436
|
-
}
|
|
2437
|
-
function escapeRegExp(s) {
|
|
2438
|
-
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
2439
|
-
}
|
|
2440
|
-
async function runVitest$1(scriptPath, agentBrowserSession) {
|
|
2866
|
+
async function runVitest(scriptPath, agentBrowserSession) {
|
|
2441
2867
|
const { exitCode, stdout, stderr } = await spawnVitestTeed([
|
|
2442
2868
|
"run",
|
|
2443
2869
|
"--config",
|
|
@@ -2454,513 +2880,89 @@ async function runVitest$1(scriptPath, agentBrowserSession) {
|
|
|
2454
2880
|
currentScript
|
|
2455
2881
|
};
|
|
2456
2882
|
}
|
|
2457
|
-
async function cleanupActions
|
|
2458
|
-
|
|
2459
|
-
|
|
2460
|
-
prompt: buildCleanupPrompt(actions),
|
|
2461
|
-
disableBuiltinTools: true,
|
|
2462
|
-
maxTurns: 1,
|
|
2463
|
-
model
|
|
2464
|
-
}, () => {});
|
|
2465
|
-
if (isError || !result) return actions;
|
|
2466
|
-
const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
|
|
2467
|
-
const parsed = JSON.parse(json);
|
|
2468
|
-
if (Array.isArray(parsed) && parsed.length > 0) return parsed;
|
|
2469
|
-
} catch {}
|
|
2470
|
-
return actions;
|
|
2883
|
+
async function cleanupActions(actions, model) {
|
|
2884
|
+
const cleaned = await cleanupActions$1(actions, model);
|
|
2885
|
+
return cleaned === actions ? actions : reattachStepIds(cleaned, actions);
|
|
2471
2886
|
}
|
|
2472
|
-
|
|
2473
|
-
|
|
2474
|
-
|
|
2475
|
-
|
|
2476
|
-
|
|
2477
|
-
|
|
2478
|
-
|
|
2479
|
-
|
|
2480
|
-
|
|
2887
|
+
/**
|
|
2888
|
+
* The Claude cleanup pass returns a pruned array without the `stepId` field
|
|
2889
|
+
* (the prompt deliberately doesn't expose it — that would make the prompt
|
|
2890
|
+
* easier to misformat). Re-attach stepIds here by replaying the cleaned
|
|
2891
|
+
* stream against the original and matching the next compatible action.
|
|
2892
|
+
*
|
|
2893
|
+
* Algorithm: walk both arrays in lockstep. For each cleaned action, scan
|
|
2894
|
+
* forward in `original` (from the last-matched cursor) for the next entry
|
|
2895
|
+
* with the same `command` + `selector` + `value` + `assertType` shape, and
|
|
2896
|
+
* borrow its `stepId`. Cleaned actions Claude invented from thin air (rare,
|
|
2897
|
+
* and explicitly forbidden by the prompt) end up with no stepId — codegen
|
|
2898
|
+
* just won't emit a step marker for that index, which is the same outcome
|
|
2899
|
+
* as a wholly stepId-less actions.json.
|
|
2900
|
+
*
|
|
2901
|
+
* The matching is forward-only so that if cleanup keeps two identical fills
|
|
2902
|
+
* (e.g. typing the same value twice intentionally), they're paired to the
|
|
2903
|
+
* first and second occurrence in the original — not both to the first.
|
|
2904
|
+
*/
|
|
2905
|
+
function reattachStepIds(cleaned, original) {
|
|
2906
|
+
let cursor = 0;
|
|
2907
|
+
const out = [];
|
|
2908
|
+
for (const c of cleaned) {
|
|
2909
|
+
let matched = null;
|
|
2910
|
+
for (let i = cursor; i < original.length; i++) if (sameShape(c, original[i])) {
|
|
2911
|
+
matched = original[i];
|
|
2912
|
+
cursor = i + 1;
|
|
2913
|
+
break;
|
|
2914
|
+
}
|
|
2915
|
+
if (matched?.stepId) out.push({
|
|
2916
|
+
...c,
|
|
2917
|
+
stepId: matched.stepId
|
|
2918
|
+
});
|
|
2919
|
+
else out.push(c);
|
|
2481
2920
|
}
|
|
2921
|
+
return out;
|
|
2482
2922
|
}
|
|
2483
|
-
|
|
2484
|
-
|
|
2485
|
-
});
|
|
2486
|
-
async function runTests(target) {
|
|
2487
|
-
header("run", target);
|
|
2488
|
-
const specs = await resolveSpecs(target);
|
|
2489
|
-
if (specs.length === 0) {
|
|
2490
|
-
error("no test scripts found");
|
|
2491
|
-
hint("run 'ccqa generate <feature>/<spec>' first to generate tests");
|
|
2492
|
-
process.exit(1);
|
|
2493
|
-
}
|
|
2494
|
-
const tmpDir = await mkdtemp(join(tmpdir(), "ccqa-run-"));
|
|
2495
|
-
const summaries = [];
|
|
2496
|
-
let overallExitCode = 0;
|
|
2497
|
-
const vitestConfig = await resolveVitestConfig();
|
|
2498
|
-
try {
|
|
2499
|
-
for (let i = 0; i < specs.length; i++) {
|
|
2500
|
-
const { featureName, specName } = specs[i];
|
|
2501
|
-
const scriptFile = await getTestScript(featureName, specName);
|
|
2502
|
-
if (!scriptFile) {
|
|
2503
|
-
warn(`${featureName}/${specName}: no test.spec.ts found`);
|
|
2504
|
-
continue;
|
|
2505
|
-
}
|
|
2506
|
-
run(`${featureName}/${specName}`);
|
|
2507
|
-
meta("test", scriptFile);
|
|
2508
|
-
blank();
|
|
2509
|
-
const reportFile = join(tmpDir, `report-${i}.json`);
|
|
2510
|
-
const proc = spawnVitestStreaming([
|
|
2511
|
-
"run",
|
|
2512
|
-
"--config",
|
|
2513
|
-
vitestConfig,
|
|
2514
|
-
scriptFile,
|
|
2515
|
-
"--reporter=json",
|
|
2516
|
-
`--outputFile.json=${reportFile}`
|
|
2517
|
-
]);
|
|
2518
|
-
await Promise.all([streamFiltered(proc.stdout, process.stdout), streamFiltered(proc.stderr, process.stderr)]);
|
|
2519
|
-
const exitCode = await proc.exited;
|
|
2520
|
-
if (exitCode !== 0) overallExitCode = exitCode;
|
|
2521
|
-
const report = await readReport(reportFile);
|
|
2522
|
-
summaries.push({
|
|
2523
|
-
featureName,
|
|
2524
|
-
specName,
|
|
2525
|
-
scriptFile,
|
|
2526
|
-
report,
|
|
2527
|
-
exitCode
|
|
2528
|
-
});
|
|
2529
|
-
blank();
|
|
2530
|
-
}
|
|
2531
|
-
printSummary(summaries);
|
|
2532
|
-
} finally {
|
|
2533
|
-
await rm(tmpDir, {
|
|
2534
|
-
recursive: true,
|
|
2535
|
-
force: true
|
|
2536
|
-
});
|
|
2537
|
-
}
|
|
2538
|
-
process.exit(overallExitCode);
|
|
2539
|
-
}
|
|
2540
|
-
async function readReport(path) {
|
|
2541
|
-
try {
|
|
2542
|
-
const raw = await readFile(path, "utf8");
|
|
2543
|
-
return JSON.parse(raw);
|
|
2544
|
-
} catch {
|
|
2545
|
-
return null;
|
|
2546
|
-
}
|
|
2547
|
-
}
|
|
2548
|
-
const useColor = process.stdout.isTTY && process.env.NO_COLOR == null;
|
|
2549
|
-
const C = {
|
|
2550
|
-
reset: useColor ? "\x1B[0m" : "",
|
|
2551
|
-
bold: useColor ? "\x1B[1m" : "",
|
|
2552
|
-
dim: useColor ? "\x1B[2m" : "",
|
|
2553
|
-
green: useColor ? "\x1B[32m" : "",
|
|
2554
|
-
red: useColor ? "\x1B[31m" : "",
|
|
2555
|
-
yellow: useColor ? "\x1B[33m" : "",
|
|
2556
|
-
cyan: useColor ? "\x1B[36m" : "",
|
|
2557
|
-
gray: useColor ? "\x1B[90m" : ""
|
|
2558
|
-
};
|
|
2559
|
-
function printSummary(summaries) {
|
|
2560
|
-
process.stdout.write(`\n${C.cyan}${C.bold}──────── ccqa summary ────────${C.reset}\n\n`);
|
|
2561
|
-
let totalTests = 0;
|
|
2562
|
-
let totalPassed = 0;
|
|
2563
|
-
let totalFailed = 0;
|
|
2564
|
-
let totalSkipped = 0;
|
|
2565
|
-
for (const s of summaries) {
|
|
2566
|
-
const header = `${C.bold}${s.featureName}/${s.specName}${C.reset}`;
|
|
2567
|
-
if (!s.report) {
|
|
2568
|
-
const icon = s.exitCode === 0 ? `${C.green}✔${C.reset}` : `${C.red}✖${C.reset}`;
|
|
2569
|
-
process.stdout.write(`${icon} ${header} ${C.dim}(no report)${C.reset}\n`);
|
|
2570
|
-
continue;
|
|
2571
|
-
}
|
|
2572
|
-
totalTests += s.report.numTotalTests;
|
|
2573
|
-
totalPassed += s.report.numPassedTests;
|
|
2574
|
-
totalFailed += s.report.numFailedTests;
|
|
2575
|
-
totalSkipped += s.report.numPendingTests;
|
|
2576
|
-
const ok = s.report.success;
|
|
2577
|
-
const icon = ok ? `${C.green}✔${C.reset}` : `${C.red}✖${C.reset}`;
|
|
2578
|
-
const countColor = ok ? C.green : C.red;
|
|
2579
|
-
process.stdout.write(`${icon} ${header} ${countColor}${s.report.numPassedTests}/${s.report.numTotalTests}${C.reset} ${C.dim}passed${C.reset}\n`);
|
|
2580
|
-
for (const file of s.report.testResults) for (const a of file.assertionResults) {
|
|
2581
|
-
const aIcon = assertionIcon(a.status);
|
|
2582
|
-
const dur = a.duration != null ? ` ${C.gray}${formatDuration(a.duration)}${C.reset}` : "";
|
|
2583
|
-
process.stdout.write(` ${aIcon} ${a.fullName}${dur}\n`);
|
|
2584
|
-
if (a.status === "failed" && a.failureMessages?.length) for (const msg of a.failureMessages) {
|
|
2585
|
-
const firstLine = msg.split("\n")[0] ?? msg;
|
|
2586
|
-
process.stdout.write(` ${C.red}${firstLine}${C.reset}\n`);
|
|
2587
|
-
}
|
|
2588
|
-
}
|
|
2589
|
-
}
|
|
2590
|
-
const specsPassed = summaries.filter((s) => s.exitCode === 0).length;
|
|
2591
|
-
const specsFailed = summaries.filter((s) => s.exitCode !== 0).length;
|
|
2592
|
-
process.stdout.write("\n");
|
|
2593
|
-
process.stdout.write(` ${C.bold}Specs${C.reset} ${summaries.length} (${C.green}${specsPassed} passed${C.reset}, ${specsFailed > 0 ? C.red : C.dim}${specsFailed} failed${C.reset})\n`);
|
|
2594
|
-
process.stdout.write(` ${C.bold}Tests${C.reset} ${totalTests} (${C.green}${totalPassed} passed${C.reset}, ${totalFailed > 0 ? C.red : C.dim}${totalFailed} failed${C.reset}, ${C.yellow}${totalSkipped} skipped${C.reset})\n`);
|
|
2595
|
-
process.stdout.write("\n");
|
|
2596
|
-
}
|
|
2597
|
-
function assertionIcon(status) {
|
|
2598
|
-
switch (status) {
|
|
2599
|
-
case "passed": return `${C.green}✔${C.reset}`;
|
|
2600
|
-
case "failed": return `${C.red}✖${C.reset}`;
|
|
2601
|
-
case "skipped":
|
|
2602
|
-
case "pending":
|
|
2603
|
-
case "todo": return `${C.yellow}◌${C.reset}`;
|
|
2604
|
-
}
|
|
2605
|
-
}
|
|
2606
|
-
function formatDuration(ms) {
|
|
2607
|
-
if (ms < 1e3) return `${Math.round(ms)}ms`;
|
|
2608
|
-
return `${(ms / 1e3).toFixed(2)}s`;
|
|
2609
|
-
}
|
|
2610
|
-
const NOISE_LINE_PATTERNS = [/^JSON report written to /];
|
|
2611
|
-
async function streamFiltered(source, sink) {
|
|
2612
|
-
source.setEncoding("utf8");
|
|
2613
|
-
let buffer = "";
|
|
2614
|
-
for await (const chunk of source) {
|
|
2615
|
-
buffer += chunk;
|
|
2616
|
-
let nl = buffer.indexOf("\n");
|
|
2617
|
-
while (nl !== -1) {
|
|
2618
|
-
const line = buffer.slice(0, nl);
|
|
2619
|
-
buffer = buffer.slice(nl + 1);
|
|
2620
|
-
if (!NOISE_LINE_PATTERNS.some((p) => p.test(line))) sink.write(line + "\n");
|
|
2621
|
-
nl = buffer.indexOf("\n");
|
|
2622
|
-
}
|
|
2623
|
-
}
|
|
2624
|
-
if (buffer.length > 0 && !NOISE_LINE_PATTERNS.some((p) => p.test(buffer))) sink.write(buffer);
|
|
2625
|
-
}
|
|
2626
|
-
async function resolveSpecs(target) {
|
|
2627
|
-
if (!target) return listAllSpecs();
|
|
2628
|
-
if (target.includes("/")) {
|
|
2629
|
-
const { featureName, specName } = parseSpecPath(target);
|
|
2630
|
-
return [{
|
|
2631
|
-
featureName,
|
|
2632
|
-
specName
|
|
2633
|
-
}];
|
|
2634
|
-
}
|
|
2635
|
-
return (await listSpecsForFeature(target)).map((specName) => ({
|
|
2636
|
-
featureName: target,
|
|
2637
|
-
specName
|
|
2638
|
-
}));
|
|
2923
|
+
function sameShape(a, b) {
|
|
2924
|
+
return a.command === b.command && (a.selector ?? "") === (b.selector ?? "") && (a.value ?? "") === (b.value ?? "") && (a.assertType ?? "") === (b.assertType ?? "");
|
|
2639
2925
|
}
|
|
2640
2926
|
//#endregion
|
|
2641
|
-
//#region src/
|
|
2642
|
-
const traceSetupCommand = new Command("trace-setup").argument("<name>", "Setup name to trace (e.g. login)").description("Trace a setup procedure using dummy placeholder values").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").action(async (name, opts) => {
|
|
2643
|
-
await runTraceSetup(name, opts.model);
|
|
2644
|
-
});
|
|
2645
|
-
async function runTraceSetup(name, model) {
|
|
2646
|
-
header("trace-setup", name);
|
|
2647
|
-
try {
|
|
2648
|
-
meta("agent-browser", assertAgentBrowserAvailable());
|
|
2649
|
-
} catch (e) {
|
|
2650
|
-
if (e instanceof AgentBrowserUnavailableError) {
|
|
2651
|
-
error(formatAgentBrowserUnavailableMessage());
|
|
2652
|
-
process.exit(1);
|
|
2653
|
-
}
|
|
2654
|
-
throw e;
|
|
2655
|
-
}
|
|
2656
|
-
await ensureCcqaDir();
|
|
2657
|
-
const spec = parseSetupSpec(await readSetupSpecFile(name));
|
|
2658
|
-
const resolvedSpec = replacePlaceholdersWithDummies(spec);
|
|
2659
|
-
const secretsToScrub = buildSecretsToScrub(spec);
|
|
2660
|
-
meta("setup", spec.title);
|
|
2661
|
-
meta("steps", spec.steps.length);
|
|
2662
|
-
if (spec.placeholders) meta("placeholders", Object.keys(spec.placeholders).join(", "));
|
|
2663
|
-
blank();
|
|
2664
|
-
const systemPrompt = buildSetupTraceSystemPrompt(resolvedSpec);
|
|
2665
|
-
const prompt = buildSetupTracePrompt(resolvedSpec);
|
|
2666
|
-
info("Running agent-browser session...");
|
|
2667
|
-
blank();
|
|
2668
|
-
const routeSteps = [];
|
|
2669
|
-
let overallStatus = "passed";
|
|
2670
|
-
const traceActions = [];
|
|
2671
|
-
const { isError } = await invokeClaudeStreaming({
|
|
2672
|
-
prompt,
|
|
2673
|
-
systemPrompt,
|
|
2674
|
-
allowedTools: [
|
|
2675
|
-
"Bash(*)",
|
|
2676
|
-
"Read",
|
|
2677
|
-
"Grep",
|
|
2678
|
-
"Glob"
|
|
2679
|
-
],
|
|
2680
|
-
env: {
|
|
2681
|
-
PATH: pathWithAgentBrowserShim(process.env["PATH"]),
|
|
2682
|
-
ANTHROPIC_API_KEY: ""
|
|
2683
|
-
},
|
|
2684
|
-
model,
|
|
2685
|
-
onAbAction: (abAction) => {
|
|
2686
|
-
const action = parseAbAction(scrubSecrets(abAction, secretsToScrub));
|
|
2687
|
-
if (action) traceActions.push(action);
|
|
2688
|
-
},
|
|
2689
|
-
onAbActionFailed: () => {
|
|
2690
|
-
traceActions.pop();
|
|
2691
|
-
}
|
|
2692
|
-
}, (msg) => {
|
|
2693
|
-
if (msg.type !== "assistant") return;
|
|
2694
|
-
for (const block of msg.message.content ?? []) {
|
|
2695
|
-
if (block.type !== "text" || !block.text) continue;
|
|
2696
|
-
const text = block.text;
|
|
2697
|
-
const statusLine = parseStatusLine(text);
|
|
2698
|
-
if (statusLine) step(statusLine.type, statusLine.stepId, statusLine.detail);
|
|
2699
|
-
for (const line of text.split("\n")) {
|
|
2700
|
-
const trimmed = line.trim();
|
|
2701
|
-
if (trimmed.startsWith("ROUTE_STEP|")) {
|
|
2702
|
-
const routeStep = parseRouteStep(trimmed);
|
|
2703
|
-
if (routeStep) {
|
|
2704
|
-
routeSteps.push(routeStep);
|
|
2705
|
-
if (routeStep.status === "FAILED") overallStatus = "failed";
|
|
2706
|
-
}
|
|
2707
|
-
} else if (trimmed.startsWith("AB_ACTION|snapshot|") || trimmed.startsWith("AB_ACTION|assert|")) {
|
|
2708
|
-
const action = parseAbAction(scrubSecrets(trimmed, secretsToScrub));
|
|
2709
|
-
if (action) traceActions.push(action);
|
|
2710
|
-
}
|
|
2711
|
-
}
|
|
2712
|
-
}
|
|
2713
|
-
});
|
|
2714
|
-
if (isError) overallStatus = "failed";
|
|
2715
|
-
const route = {
|
|
2716
|
-
specName: name,
|
|
2717
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2718
|
-
status: overallStatus,
|
|
2719
|
-
steps: routeSteps
|
|
2720
|
-
};
|
|
2721
|
-
const [routePath, actionsPath] = await Promise.all([saveSetupRoute(name, route), saveSetupActions(name, traceActions)]);
|
|
2722
|
-
blank();
|
|
2723
|
-
meta("route", routePath);
|
|
2724
|
-
meta("saved", actionsPath);
|
|
2725
|
-
meta("actions", traceActions.length);
|
|
2726
|
-
meta("status", overallStatus.toUpperCase());
|
|
2727
|
-
hint(`run 'ccqa generate-setup ${name}' to generate and validate the setup`);
|
|
2728
|
-
}
|
|
2729
|
-
function replacePlaceholdersWithDummies(spec) {
|
|
2730
|
-
if (!spec.placeholders) return spec;
|
|
2731
|
-
const dummies = spec.placeholders;
|
|
2732
|
-
const resolve = (text) => {
|
|
2733
|
-
let result = text;
|
|
2734
|
-
for (const [key, def] of Object.entries(dummies)) result = result.replaceAll(`{{${key}}}`, resolveEnvRefs(def.dummy));
|
|
2735
|
-
return result;
|
|
2736
|
-
};
|
|
2737
|
-
return {
|
|
2738
|
-
...spec,
|
|
2739
|
-
steps: spec.steps.map((step) => ({
|
|
2740
|
-
...step,
|
|
2741
|
-
instruction: resolve(step.instruction),
|
|
2742
|
-
expected: resolve(step.expected)
|
|
2743
|
-
}))
|
|
2744
|
-
};
|
|
2745
|
-
}
|
|
2927
|
+
//#region src/claude/extract-json.ts
|
|
2746
2928
|
/**
|
|
2747
|
-
*
|
|
2748
|
-
*
|
|
2749
|
-
*
|
|
2750
|
-
* For each placeholder whose dummy contains env refs, store
|
|
2751
|
-
* <resolved-value> -> <original ${VAR} string>
|
|
2752
|
-
* so that an `ab fill ... <secret>` line records the placeholder string
|
|
2753
|
-
* instead of the secret. Empty resolved values are skipped — they would
|
|
2754
|
-
* otherwise replace incidental empty strings in the recorded actions.
|
|
2929
|
+
* Pulls a JSON object out of a Claude completion. Accepts either a fenced
|
|
2930
|
+
* ```json block or a bare `{...}` payload that constitutes the whole reply.
|
|
2931
|
+
* Returns null when neither shape is present.
|
|
2755
2932
|
*/
|
|
2756
|
-
function
|
|
2757
|
-
const
|
|
2758
|
-
if (
|
|
2759
|
-
const
|
|
2760
|
-
|
|
2761
|
-
|
|
2762
|
-
const resolved = resolveEnvRefs(def.dummy);
|
|
2763
|
-
if (!resolved) continue;
|
|
2764
|
-
map.set(resolved, def.dummy);
|
|
2765
|
-
}
|
|
2766
|
-
return map;
|
|
2767
|
-
}
|
|
2768
|
-
/** Replace every occurrence of a recorded secret with its `${VAR}` placeholder. */
|
|
2769
|
-
function scrubSecrets(line, secrets) {
|
|
2770
|
-
if (secrets.size === 0) return line;
|
|
2771
|
-
let result = line;
|
|
2772
|
-
for (const [secret, placeholder] of secrets) {
|
|
2773
|
-
if (!result.includes(secret)) continue;
|
|
2774
|
-
result = result.split(secret).join(placeholder);
|
|
2775
|
-
}
|
|
2776
|
-
return result;
|
|
2933
|
+
function extractJsonBlock(text) {
|
|
2934
|
+
const fenced = text.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
|
|
2935
|
+
if (fenced && fenced[1]) return fenced[1].trim();
|
|
2936
|
+
const trimmed = text.trim();
|
|
2937
|
+
if (trimmed.startsWith("{") && trimmed.endsWith("}")) return trimmed;
|
|
2938
|
+
return null;
|
|
2777
2939
|
}
|
|
2778
2940
|
//#endregion
|
|
2779
|
-
//#region src/
|
|
2780
|
-
|
|
2781
|
-
|
|
2782
|
-
|
|
2783
|
-
|
|
2784
|
-
|
|
2785
|
-
|
|
2786
|
-
|
|
2787
|
-
|
|
2788
|
-
|
|
2789
|
-
|
|
2790
|
-
|
|
2791
|
-
|
|
2792
|
-
|
|
2793
|
-
|
|
2794
|
-
|
|
2795
|
-
|
|
2796
|
-
|
|
2797
|
-
|
|
2798
|
-
|
|
2799
|
-
|
|
2800
|
-
|
|
2801
|
-
|
|
2802
|
-
meta("fix-mode", mode);
|
|
2803
|
-
meta("language", outputLanguage);
|
|
2804
|
-
blank();
|
|
2805
|
-
cleanedActions = await cleanupActions(actions, model);
|
|
2806
|
-
if (cleanedActions.length !== actions.length) meta("cleaned", cleanedActions.length);
|
|
2807
|
-
await writeFile(dummyPath, actionsToScript(cleanedActions, spec.title), "utf-8");
|
|
2808
|
-
meta("saved", dummyPath);
|
|
2809
|
-
}
|
|
2810
|
-
blank();
|
|
2811
|
-
const agentBrowserSession = `ccqa-generate-setup-${name}-${Date.now()}`;
|
|
2812
|
-
const runVitestForSession = (path) => runVitestResolved(path, agentBrowserSession);
|
|
2813
|
-
await closeSession(agentBrowserSession);
|
|
2814
|
-
const signalHandler = () => {
|
|
2815
|
-
closeSession(agentBrowserSession).finally(() => process.exit(130));
|
|
2816
|
-
};
|
|
2817
|
-
process.once("SIGINT", signalHandler);
|
|
2818
|
-
process.once("SIGTERM", signalHandler);
|
|
2819
|
-
try {
|
|
2820
|
-
const initialRun = await timedPhase("vitest run #1", () => runVitestForSession(dummyPath), "run");
|
|
2821
|
-
let passed = initialRun.exitCode === 0;
|
|
2822
|
-
if (!passed) passed = await runAutoFixLoop({
|
|
2823
|
-
scriptPath: dummyPath,
|
|
2824
|
-
initialRun,
|
|
2825
|
-
specMarkdown: specContent,
|
|
2826
|
-
actions: cleanedActions,
|
|
2827
|
-
maxRetries,
|
|
2828
|
-
mode,
|
|
2829
|
-
runVitest: runVitestForSession,
|
|
2830
|
-
agentBrowserSession,
|
|
2831
|
-
outputLanguage,
|
|
2832
|
-
model
|
|
2833
|
-
});
|
|
2834
|
-
if (!passed) {
|
|
2835
|
-
warn("auto-fix exhausted; setup test still failing");
|
|
2836
|
-
hint(`edit ${dummyPath} manually, then run: ccqa generate-setup ${name} --from-dummy`);
|
|
2837
|
-
process.exit(1);
|
|
2838
|
-
}
|
|
2839
|
-
await writeFile(finalPath, reversePlaceholdersInScript(await readFile(dummyPath, "utf8"), spec.placeholders), "utf-8");
|
|
2840
|
-
await unlink(dummyPath).catch(() => {});
|
|
2841
|
-
blank();
|
|
2842
|
-
meta("saved", finalPath);
|
|
2843
|
-
hint(`setup '${name}' is ready; reference it in test-spec.md with setups: [{name: ${name}, params: {...}}]`);
|
|
2844
|
-
} finally {
|
|
2845
|
-
process.off("SIGINT", signalHandler);
|
|
2846
|
-
process.off("SIGTERM", signalHandler);
|
|
2847
|
-
await closeSession(agentBrowserSession);
|
|
2848
|
-
}
|
|
2941
|
+
//#region src/prompts/draft.ts
|
|
2942
|
+
function buildNamingSystemPrompt() {
|
|
2943
|
+
return `You name a new ccqa test case based on the user's intent and the existing feature tree.
|
|
2944
|
+
|
|
2945
|
+
ccqa test cases live under \`.ccqa/features/<featureName>/test-cases/<specName>/spec.yaml\`.
|
|
2946
|
+
|
|
2947
|
+
## Naming rules
|
|
2948
|
+
|
|
2949
|
+
- featureName and specName are kebab-case ASCII (lowercase, words separated by '-').
|
|
2950
|
+
- featureName: a broad area (e.g. "tasks", "auth", "billing", "search").
|
|
2951
|
+
- specName: a short scenario name (e.g. "create-and-complete", "login-with-email", "search-by-tag").
|
|
2952
|
+
- Reuse existing featureName when the user's intent fits an existing area. Only invent a new featureName when the existing tree clearly does not cover the area.
|
|
2953
|
+
- specName must NOT collide with an existing spec under the chosen feature. If the natural name collides, pick a different one that distinguishes the new scenario from the existing ones.
|
|
2954
|
+
- Use the codebase (Read/Grep/Glob) sparingly to confirm domain vocabulary if helpful. Do not over-explore.
|
|
2955
|
+
|
|
2956
|
+
## Output (STRICT)
|
|
2957
|
+
|
|
2958
|
+
Output ONE fenced \`\`\`json block, nothing else outside it:
|
|
2959
|
+
|
|
2960
|
+
{
|
|
2961
|
+
"featureName": "<kebab-case>",
|
|
2962
|
+
"specName": "<kebab-case>",
|
|
2963
|
+
"reason": "<one short sentence: why this name and how it relates to existing specs>"
|
|
2849
2964
|
}
|
|
2850
|
-
|
|
2851
|
-
* Replace dummy values with {{placeholder}} directly in the test script text.
|
|
2852
|
-
* Longer dummy values are replaced first to avoid partial matches.
|
|
2853
|
-
*/
|
|
2854
|
-
function reversePlaceholdersInScript(script, placeholders) {
|
|
2855
|
-
if (!placeholders) return script;
|
|
2856
|
-
const entries = Object.entries(placeholders).sort((a, b) => b[1].dummy.length - a[1].dummy.length);
|
|
2857
|
-
let result = script;
|
|
2858
|
-
for (const [key, def] of entries) result = result.replaceAll(def.dummy, `{{${key}}}`);
|
|
2859
|
-
return result;
|
|
2860
|
-
}
|
|
2861
|
-
async function runVitest(scriptPath, agentBrowserSession) {
|
|
2862
|
-
const { exitCode, stdout, stderr } = await spawnVitestTeed([
|
|
2863
|
-
"run",
|
|
2864
|
-
"--config",
|
|
2865
|
-
bundledVitestConfigPath(),
|
|
2866
|
-
scriptPath
|
|
2867
|
-
], agentBrowserSession ? { env: {
|
|
2868
|
-
...process.env,
|
|
2869
|
-
AGENT_BROWSER_SESSION: agentBrowserSession
|
|
2870
|
-
} } : {});
|
|
2871
|
-
const currentScript = await readFile(scriptPath, "utf8");
|
|
2872
|
-
return {
|
|
2873
|
-
exitCode,
|
|
2874
|
-
output: stdout + stderr,
|
|
2875
|
-
currentScript
|
|
2876
|
-
};
|
|
2877
|
-
}
|
|
2878
|
-
/**
|
|
2879
|
-
* Run vitest on `test.dummy.spec.ts`, but transparently expand any `${VAR}`
|
|
2880
|
-
* env refs to real values for the duration of the run. The original file is
|
|
2881
|
-
* preserved unchanged so subsequent reverse-replace still sees the env-ref
|
|
2882
|
-
* literals. Auto-fix edits the original file (via writeFile in callers), so
|
|
2883
|
-
* we always re-read it before each invocation.
|
|
2884
|
-
*/
|
|
2885
|
-
async function runVitestResolved(scriptPath, agentBrowserSession) {
|
|
2886
|
-
const original = await readFile(scriptPath, "utf8");
|
|
2887
|
-
if (!hasEnvRef(original)) return runVitest(scriptPath, agentBrowserSession);
|
|
2888
|
-
const tmpPath = scriptPath.replace(/\.ts$/, ".__resolved.spec.ts");
|
|
2889
|
-
await writeFile(tmpPath, resolveEnvRefs(original), "utf-8");
|
|
2890
|
-
try {
|
|
2891
|
-
const { exitCode, stdout, stderr } = await spawnVitestTeed([
|
|
2892
|
-
"run",
|
|
2893
|
-
"--config",
|
|
2894
|
-
bundledVitestConfigPath(),
|
|
2895
|
-
tmpPath
|
|
2896
|
-
], agentBrowserSession ? { env: {
|
|
2897
|
-
...process.env,
|
|
2898
|
-
AGENT_BROWSER_SESSION: agentBrowserSession
|
|
2899
|
-
} } : {});
|
|
2900
|
-
return {
|
|
2901
|
-
exitCode,
|
|
2902
|
-
output: stdout + stderr,
|
|
2903
|
-
currentScript: original
|
|
2904
|
-
};
|
|
2905
|
-
} finally {
|
|
2906
|
-
await unlink(tmpPath).catch(() => {});
|
|
2907
|
-
}
|
|
2908
|
-
}
|
|
2909
|
-
async function cleanupActions(actions, model) {
|
|
2910
|
-
try {
|
|
2911
|
-
const { result, isError } = await invokeClaudeStreaming({
|
|
2912
|
-
prompt: buildCleanupPrompt(actions),
|
|
2913
|
-
disableBuiltinTools: true,
|
|
2914
|
-
maxTurns: 1,
|
|
2915
|
-
model
|
|
2916
|
-
}, () => {});
|
|
2917
|
-
if (isError || !result) return actions;
|
|
2918
|
-
const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
|
|
2919
|
-
const parsed = JSON.parse(json);
|
|
2920
|
-
if (Array.isArray(parsed) && parsed.length > 0) return parsed;
|
|
2921
|
-
} catch {}
|
|
2922
|
-
return actions;
|
|
2923
|
-
}
|
|
2924
|
-
//#endregion
|
|
2925
|
-
//#region src/claude/extract-json.ts
|
|
2926
|
-
/**
|
|
2927
|
-
* Pulls a JSON object out of a Claude completion. Accepts either a fenced
|
|
2928
|
-
* ```json block or a bare `{...}` payload that constitutes the whole reply.
|
|
2929
|
-
* Returns null when neither shape is present.
|
|
2930
|
-
*/
|
|
2931
|
-
function extractJsonBlock(text) {
|
|
2932
|
-
const fenced = text.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
|
|
2933
|
-
if (fenced && fenced[1]) return fenced[1].trim();
|
|
2934
|
-
const trimmed = text.trim();
|
|
2935
|
-
if (trimmed.startsWith("{") && trimmed.endsWith("}")) return trimmed;
|
|
2936
|
-
return null;
|
|
2937
|
-
}
|
|
2938
|
-
//#endregion
|
|
2939
|
-
//#region src/prompts/draft.ts
|
|
2940
|
-
function buildNamingSystemPrompt() {
|
|
2941
|
-
return `You name a new ccqa test case based on the user's intent and the existing feature tree.
|
|
2942
|
-
|
|
2943
|
-
ccqa test cases live under \`.ccqa/features/<featureName>/test-cases/<specName>/test-spec.md\`.
|
|
2944
|
-
|
|
2945
|
-
## Naming rules
|
|
2946
|
-
|
|
2947
|
-
- featureName and specName are kebab-case ASCII (lowercase, words separated by '-').
|
|
2948
|
-
- featureName: a broad area (e.g. "tasks", "auth", "billing", "search").
|
|
2949
|
-
- specName: a short scenario name (e.g. "create-and-complete", "login-with-email", "search-by-tag").
|
|
2950
|
-
- Reuse existing featureName when the user's intent fits an existing area. Only invent a new featureName when the existing tree clearly does not cover the area.
|
|
2951
|
-
- specName must NOT collide with an existing spec under the chosen feature. If the natural name collides, pick a different one that distinguishes the new scenario from the existing ones.
|
|
2952
|
-
- Use the codebase (Read/Grep/Glob) sparingly to confirm domain vocabulary if helpful. Do not over-explore.
|
|
2953
|
-
|
|
2954
|
-
## Output (STRICT)
|
|
2955
|
-
|
|
2956
|
-
Output ONE fenced \`\`\`json block, nothing else outside it:
|
|
2957
|
-
|
|
2958
|
-
{
|
|
2959
|
-
"featureName": "<kebab-case>",
|
|
2960
|
-
"specName": "<kebab-case>",
|
|
2961
|
-
"reason": "<one short sentence: why this name and how it relates to existing specs>"
|
|
2962
|
-
}
|
|
2963
|
-
`;
|
|
2965
|
+
`;
|
|
2964
2966
|
}
|
|
2965
2967
|
function buildNamingPrompt(intent, tree) {
|
|
2966
2968
|
return `## User intent
|
|
@@ -2970,7 +2972,7 @@ ${intent}
|
|
|
2970
2972
|
## Existing feature tree
|
|
2971
2973
|
|
|
2972
2974
|
${tree.length === 0 ? "(no existing features yet)" : tree.map((f) => {
|
|
2973
|
-
const specLines = f.specs.length === 0 ? " (no specs yet)" : f.specs.map((s) => ` - ${s.specName}
|
|
2975
|
+
const specLines = f.specs.length === 0 ? " (no specs yet)" : f.specs.map((s) => ` - ${s.specName}`).join("\n");
|
|
2974
2976
|
return `- ${f.featureName}/\n${specLines}`;
|
|
2975
2977
|
}).join("\n")}
|
|
2976
2978
|
|
|
@@ -2979,48 +2981,58 @@ ${tree.length === 0 ? "(no existing features yet)" : tree.map((f) => {
|
|
|
2979
2981
|
Pick featureName and specName for the new test case. Follow the naming rules. Avoid colliding with any existing specName under the chosen feature.
|
|
2980
2982
|
`;
|
|
2981
2983
|
}
|
|
2982
|
-
function buildDraftSystemPrompt() {
|
|
2983
|
-
return `You are a QA engineer drafting and refining a ccqa
|
|
2984
|
+
function buildDraftSystemPrompt(blocks) {
|
|
2985
|
+
return `You are a QA engineer drafting and refining a ccqa spec.yaml.
|
|
2984
2986
|
|
|
2985
2987
|
The CLI runs you in a loop: each turn the user gives an intent (first run) or a refinement instruction (later runs). You read the codebase, validate the spec, and return a single JSON report. The CLI displays a diff and asks the user whether to apply.
|
|
2986
2988
|
|
|
2987
|
-
##
|
|
2989
|
+
## spec.yaml format (STRICT)
|
|
2988
2990
|
|
|
2989
|
-
YAML
|
|
2991
|
+
Pure YAML — no markdown body, no frontmatter dashes.
|
|
2990
2992
|
|
|
2991
|
-
|
|
2992
|
-
- title
|
|
2993
|
-
-
|
|
2994
|
-
-
|
|
2995
|
-
- setups: array of { name: string, params?: Record<string,string> } (optional)
|
|
2996
|
-
- relatedPaths: array of string (optional) — glob patterns identifying source files this spec depends on. Used by \`ccqa drift --changed\` in CI to skip drift checks for unrelated changes.
|
|
2993
|
+
Top-level fields:
|
|
2994
|
+
- \`title\`: string (required) — short human-readable name for the test
|
|
2995
|
+
- \`relatedPaths\`: array of glob string (optional) — source files this spec depends on, used by \`ccqa drift --changed\`
|
|
2996
|
+
- \`steps\`: array (required, at least one)
|
|
2997
2997
|
|
|
2998
|
-
|
|
2998
|
+
A step is one of two shapes:
|
|
2999
2999
|
|
|
3000
|
+
**Action step** — a user-facing browser interaction:
|
|
3001
|
+
\`\`\`yaml
|
|
3002
|
+
- instruction: <imperative; include the URL directly or via \${ENV_VAR}>
|
|
3003
|
+
expected: <observable outcome — visible text, URL pattern, element state>
|
|
3000
3004
|
\`\`\`
|
|
3001
|
-
### Step 1: <short title>
|
|
3002
|
-
- **Instruction**: <imperative, one sentence>
|
|
3003
|
-
- **Expected**: <observable outcome>
|
|
3004
3005
|
|
|
3005
|
-
|
|
3006
|
-
|
|
3006
|
+
**Include step** — invoke a reusable block from \`.ccqa/blocks/<name>/spec.yaml\`:
|
|
3007
|
+
\`\`\`yaml
|
|
3008
|
+
- include: <block-name>
|
|
3009
|
+
params:
|
|
3010
|
+
<param-name>: <string value, can use \${ENV_VAR}>
|
|
3007
3011
|
\`\`\`
|
|
3008
3012
|
|
|
3013
|
+
## URLs
|
|
3014
|
+
|
|
3015
|
+
Each step writes the URL it opens directly inside \`instruction\` (e.g. \`"\${APP_URL}/articles を開く"\`). Use \`\${ENV_VAR}\` references for environment-specific values.
|
|
3016
|
+
|
|
3017
|
+
## Available blocks
|
|
3018
|
+
|
|
3019
|
+
${formatBlockList(blocks)}
|
|
3020
|
+
|
|
3009
3021
|
## Quality rules
|
|
3010
3022
|
|
|
3011
3023
|
- One user-facing action per step (login, click, fill, navigate, ...).
|
|
3012
|
-
-
|
|
3013
|
-
- Forbidden in
|
|
3024
|
+
- \`expected\` must be assertion-friendly: visible text, URL pattern, element state.
|
|
3025
|
+
- Forbidden in \`expected\`: timestamps, exact counts, session IDs, internal state.
|
|
3014
3026
|
- 3–8 steps is typical. Fewer means too coarse; more means too fine.
|
|
3015
3027
|
|
|
3016
3028
|
## Workflow (use Read / Grep / Glob extensively)
|
|
3017
3029
|
|
|
3018
|
-
1. Read the codebase under cwd to find concrete strings: routes, button labels, aria-labels, page titles, placeholders. Use those exact strings in
|
|
3019
|
-
2. If
|
|
3020
|
-
3. Populate \`relatedPaths\`
|
|
3030
|
+
1. Read the codebase under cwd to find concrete strings: routes, button labels, aria-labels, page titles, placeholders. Use those exact strings in \`expected\`.
|
|
3031
|
+
2. If you use \`include:\` steps, verify each \`params\` key matches a declared param of the block (see the Available blocks list above).
|
|
3032
|
+
3. Populate \`relatedPaths\` with **provisional** glob patterns pointing at the source files this spec touches: the route/page file for each URL the spec visits, plus the component files (or their parent feature directory) that render the aria-labels, placeholders, or visible texts the spec asserts on. Prefer directory globs (e.g. \`src/features/tasks/**\`) when several files in one area are involved. Be conservative — include a path if you're unsure rather than omit it. \`ccqa trace\` will refine this list later from real browser observations.
|
|
3021
3033
|
4. Validate the (current or proposed) spec on four axes — emit one issue per finding:
|
|
3022
|
-
- **assertable**: each
|
|
3023
|
-
- **
|
|
3034
|
+
- **assertable**: each \`expected\` can be verified against a string/URL/state that exists in code.
|
|
3035
|
+
- **blocks**: every \`include\` resolves to a real block; every \`params\` key is declared on that block; every required param is provided.
|
|
3024
3036
|
- **granularity**: not too coarse (multiple actions per step) nor too fine (snapshot-only steps); order is logical.
|
|
3025
3037
|
- **unimplemented**: any feature mentioned in the spec that you cannot find in code.
|
|
3026
3038
|
|
|
@@ -3035,13 +3047,13 @@ Schema:
|
|
|
3035
3047
|
"issues": [
|
|
3036
3048
|
{
|
|
3037
3049
|
"severity": "OK" | "WARN" | "ERROR",
|
|
3038
|
-
"category": "assertable" | "
|
|
3050
|
+
"category": "assertable" | "blocks" | "granularity" | "unimplemented",
|
|
3039
3051
|
"stepId": "step-01" | null,
|
|
3040
3052
|
"message": "<one-line summary>",
|
|
3041
3053
|
"detail": "<optional, multiline explanation>"
|
|
3042
3054
|
}
|
|
3043
3055
|
],
|
|
3044
|
-
"patch": "<COMPLETE rewritten
|
|
3056
|
+
"patch": "<COMPLETE rewritten spec.yaml, or empty string if no changes>"
|
|
3045
3057
|
}
|
|
3046
3058
|
\`\`\`
|
|
3047
3059
|
|
|
@@ -3049,17 +3061,25 @@ Schema:
|
|
|
3049
3061
|
|
|
3050
3062
|
- \`patch\` must be the COMPLETE file content if non-empty (never a diff fragment).
|
|
3051
3063
|
- The CLI replaces the file atomically with \`patch\`.
|
|
3064
|
+
- The patch must be valid YAML matching the schema above. The CLI re-parses it before applying; if it fails validation, the patch is rejected.
|
|
3052
3065
|
- For **create** mode: produce a fresh spec from the user intent.
|
|
3053
3066
|
- For **refine** mode with a non-empty user instruction: apply the user's request, plus fix any issues it introduces. Preserve the user's wording elsewhere.
|
|
3054
3067
|
- For **refine** mode with an empty user instruction: only fix issues you find against the current spec; if everything is fine, return \`patch: ""\`.
|
|
3055
3068
|
- If \`patch\` is the same as the current spec, return \`patch: ""\` instead.
|
|
3056
3069
|
`;
|
|
3057
3070
|
}
|
|
3071
|
+
function formatBlockList(blocks) {
|
|
3072
|
+
if (blocks.length === 0) return "(no blocks defined yet — only action steps are available.)";
|
|
3073
|
+
return blocks.map((b) => {
|
|
3074
|
+
const paramLines = b.params.length === 0 ? " params: (none)" : b.params.map((p) => ` - ${p.name}${p.required ? "" : " (optional)"}${p.secret ? " [secret]" : ""}`).join("\n");
|
|
3075
|
+
return `- \`${b.name}\` — ${b.title}\n${paramLines}`;
|
|
3076
|
+
}).join("\n");
|
|
3077
|
+
}
|
|
3058
3078
|
function buildDraftPrompt(input) {
|
|
3059
3079
|
const { mode, existing, userInput } = input;
|
|
3060
3080
|
if (mode === "create") return `## Mode
|
|
3061
3081
|
|
|
3062
|
-
create — no spec exists yet at the target path. Produce a fresh
|
|
3082
|
+
create — no spec exists yet at the target path. Produce a fresh spec.yaml.
|
|
3063
3083
|
|
|
3064
3084
|
## User intent
|
|
3065
3085
|
|
|
@@ -3067,7 +3087,7 @@ ${userInput}
|
|
|
3067
3087
|
|
|
3068
3088
|
## Task
|
|
3069
3089
|
|
|
3070
|
-
Read the codebase under cwd. Discover concrete strings (routes, labels, titles). Produce a complete
|
|
3090
|
+
Read the codebase under cwd. Discover concrete strings (routes, labels, titles). Produce a complete spec.yaml as the \`patch\` field, plus any issues you'd flag about your own draft.
|
|
3071
3091
|
`;
|
|
3072
3092
|
return `## Mode
|
|
3073
3093
|
|
|
@@ -3075,47 +3095,76 @@ refine — a spec already exists. Apply the user's instruction (if any) and vali
|
|
|
3075
3095
|
|
|
3076
3096
|
## Current spec
|
|
3077
3097
|
|
|
3078
|
-
\`\`\`
|
|
3098
|
+
\`\`\`yaml
|
|
3079
3099
|
${existing}\`\`\`
|
|
3080
3100
|
|
|
3081
|
-
${userInput ? `## User refinement instruction\n\n${userInput}\n` : `## User refinement instruction\n\n(empty — re-validate the current spec against the codebase; only emit a non-empty patch if something is actually wrong)\n`}
|
|
3082
|
-
## Task
|
|
3101
|
+
${userInput ? `## User refinement instruction\n\n${userInput}\n` : `## User refinement instruction\n\n(empty — re-validate the current spec against the codebase; only emit a non-empty patch if something is actually wrong)\n`}## Task
|
|
3083
3102
|
|
|
3084
|
-
1. Read the codebase under cwd and any referenced
|
|
3103
|
+
1. Read the codebase under cwd and any referenced blocks (\`.ccqa/blocks/<name>/spec.yaml\`).
|
|
3085
3104
|
2. If the user's instruction is non-empty, apply it to the spec.
|
|
3086
3105
|
3. Validate the resulting spec on the four axes. Emit issues.
|
|
3087
3106
|
4. Return the complete updated spec as \`patch\`. If no changes are needed, return \`patch: ""\`.
|
|
3088
3107
|
`;
|
|
3089
3108
|
}
|
|
3090
3109
|
//#endregion
|
|
3110
|
+
//#region src/prompts/drift.ts
|
|
3111
|
+
function buildDriftSystemPrompt(blocks) {
|
|
3112
|
+
return `${buildDraftSystemPrompt(blocks)}
|
|
3113
|
+
|
|
3114
|
+
## Drift mode
|
|
3115
|
+
|
|
3116
|
+
You are running non-interactively in CI. The user will not see or apply the patch — only the \`issues\` array.
|
|
3117
|
+
|
|
3118
|
+
- Always set \`patch\` to "" in your response.
|
|
3119
|
+
- Focus issue messages on what is **out of sync** between the spec and the current codebase: missing aria-labels, renamed routes, removed buttons, placeholders that no longer exist, include references that point to non-existent blocks.
|
|
3120
|
+
- Do NOT raise issues about stylistic preferences in the spec wording.
|
|
3121
|
+
- Treat \`category: unimplemented\` as the primary signal for drift: anything the spec asserts that you cannot find in code is a drift finding.
|
|
3122
|
+
|
|
3123
|
+
## Drift severity policy (STRICT)
|
|
3124
|
+
|
|
3125
|
+
The CLI exits non-zero when any issue has \`severity: "ERROR"\` (default) or — with \`--severity warn\` — when any \`WARN\` is present. Pick severity by **whether a deterministic replay of this spec would fail today**, not by how confident you are in your own analysis.
|
|
3126
|
+
|
|
3127
|
+
### CRITICAL: spec ↔ source mismatch is ERROR, not "vague phrasing" WARN
|
|
3128
|
+
|
|
3129
|
+
The most common false negative is treating a concrete spec/source mismatch as a WARN about "expected phrasing." It is not. Apply this decision rule **before** picking severity:
|
|
3130
|
+
|
|
3131
|
+
1. **Pick the concrete strings the spec asserts** in each step's \`expected\` (visible text, aria-labels, button labels, route paths). For \`expected\` like "the Dashboard page is visible", the spec is asserting that the literal string "Dashboard" — or the page conceptually identified by that label — is rendered.
|
|
3132
|
+
2. **Search the source** for those exact strings (\`Grep\` / \`Read\`) at the location the step references (the relevant page/component/route).
|
|
3133
|
+
3. Classify:
|
|
3134
|
+
- **ERROR** — the source instead renders a *different* string in that location (e.g. spec says "Dashboard", the breadcrumb in \`DashboardPage.tsx\` now renders "Overview"). A replay against the current source would fail; a replay against a stale staging environment would pass and *hide* the drift — exactly the case drift CI exists to catch. Cite both sides in \`detail\`: the spec line and the file:line of the source mismatch.
|
|
3135
|
+
- **WARN (vague phrasing)** — the source's actual string IS present somewhere relevant; the \`expected\` just paraphrases it more loosely (e.g. spec says "the Save button is visible" and the source has both visible "Save" text and \`aria-label="Save"\`). Replay still passes; the spec could just be tightened.
|
|
3136
|
+
- **OK** — the spec's exact string appears in source at the relevant location.
|
|
3137
|
+
|
|
3138
|
+
Use **ERROR** when the spec would break on replay:
|
|
3139
|
+
- A selector the spec relies on (\`aria-label\`, \`placeholder\`, \`data-testid\`, button text) **does not exist anywhere in the source**.
|
|
3140
|
+
- A URL / route the spec navigates to is no longer defined.
|
|
3141
|
+
- An \`expected\` asserts a string or visible text that is no longer rendered by the relevant component.
|
|
3142
|
+
- The source renders a *different* string in the place the spec describes (per the decision rule above).
|
|
3143
|
+
- An \`include\` step references a block that does not exist under \`.ccqa/blocks/<name>/spec.yaml\`, or a \`params\` key is not declared on that block.
|
|
3144
|
+
- The spec references a feature/page that has been removed from the codebase.
|
|
3145
|
+
|
|
3146
|
+
Use **WARN** when the spec is still likely to work, but quality could improve:
|
|
3147
|
+
- The \`expected\` paraphrases a string that **still exists** in source (the literal target is findable, just imprecisely worded).
|
|
3148
|
+
- A step bundles multiple actions, or a needed intermediate verification step is missing.
|
|
3149
|
+
- Stable signals exist that the spec could leverage but currently doesn't.
|
|
3150
|
+
- You are unsure whether a referenced string exists (give the user the benefit of the doubt; do not hard-fail CI on uncertainty).
|
|
3151
|
+
|
|
3152
|
+
Use **OK** for axes you actively verified and found no issue.
|
|
3153
|
+
|
|
3154
|
+
If you cannot decide between ERROR and WARN, choose WARN. Reserve ERROR for findings you can back up with a specific file path or grep result that proves the drift.
|
|
3155
|
+
|
|
3156
|
+
Conversely: when you DO have a citation showing a concrete spec/source mismatch (per the decision rule above), you MUST use ERROR — "vague phrasing" WARN is not a safe fallback for an actual drift.
|
|
3157
|
+
`;
|
|
3158
|
+
}
|
|
3159
|
+
function buildDriftUserPrompt(existing) {
|
|
3160
|
+
return buildDraftPrompt({
|
|
3161
|
+
mode: "refine",
|
|
3162
|
+
existing,
|
|
3163
|
+
userInput: ""
|
|
3164
|
+
});
|
|
3165
|
+
}
|
|
3166
|
+
//#endregion
|
|
3091
3167
|
//#region src/types.ts
|
|
3092
|
-
const TestStepSchema = z.object({
|
|
3093
|
-
id: z.string(),
|
|
3094
|
-
title: z.string(),
|
|
3095
|
-
instruction: z.string(),
|
|
3096
|
-
expected: z.string()
|
|
3097
|
-
});
|
|
3098
|
-
const SetupRefSchema = z.object({
|
|
3099
|
-
name: z.string(),
|
|
3100
|
-
params: z.record(z.string(), z.string()).optional()
|
|
3101
|
-
});
|
|
3102
|
-
z.object({
|
|
3103
|
-
title: z.string(),
|
|
3104
|
-
baseUrl: z.string(),
|
|
3105
|
-
prerequisites: z.string().optional(),
|
|
3106
|
-
setups: z.array(SetupRefSchema).optional(),
|
|
3107
|
-
relatedPaths: z.array(z.string()).optional(),
|
|
3108
|
-
steps: z.array(TestStepSchema)
|
|
3109
|
-
});
|
|
3110
|
-
const PlaceholderDefSchema = z.object({
|
|
3111
|
-
dummy: z.string(),
|
|
3112
|
-
description: z.string().optional()
|
|
3113
|
-
});
|
|
3114
|
-
z.object({
|
|
3115
|
-
title: z.string(),
|
|
3116
|
-
placeholders: z.record(z.string(), PlaceholderDefSchema).optional(),
|
|
3117
|
-
steps: z.array(TestStepSchema)
|
|
3118
|
-
});
|
|
3119
3168
|
const RouteStepSchema = z.object({
|
|
3120
3169
|
title: z.string(),
|
|
3121
3170
|
action: z.string(),
|
|
@@ -3141,7 +3190,7 @@ const DraftIssueSchema = z.object({
|
|
|
3141
3190
|
]),
|
|
3142
3191
|
category: z.enum([
|
|
3143
3192
|
"assertable",
|
|
3144
|
-
"
|
|
3193
|
+
"blocks",
|
|
3145
3194
|
"granularity",
|
|
3146
3195
|
"unimplemented"
|
|
3147
3196
|
]),
|
|
@@ -3153,20 +3202,499 @@ const DraftReportSchema = z.object({
|
|
|
3153
3202
|
issues: z.array(DraftIssueSchema),
|
|
3154
3203
|
patch: z.string()
|
|
3155
3204
|
});
|
|
3205
|
+
const DRAFT_CATEGORY_LABEL = {
|
|
3206
|
+
assertable: "Assertability",
|
|
3207
|
+
blocks: "Block references",
|
|
3208
|
+
granularity: "Step granularity",
|
|
3209
|
+
unimplemented: "Unimplemented checks"
|
|
3210
|
+
};
|
|
3156
3211
|
const DraftNamingSchema = z.object({
|
|
3157
3212
|
featureName: z.string().min(1),
|
|
3158
3213
|
specName: z.string().min(1),
|
|
3159
3214
|
reason: z.string().optional()
|
|
3160
3215
|
});
|
|
3161
3216
|
//#endregion
|
|
3217
|
+
//#region src/drift/analyze.ts
|
|
3218
|
+
const DEFAULT_CONCURRENCY$1 = 3;
|
|
3219
|
+
/**
|
|
3220
|
+
* Run drift checks against a list of pre-collected targets. Pure library
|
|
3221
|
+
* function: no commander, no process.exit, no stdout writes. Callers handle
|
|
3222
|
+
* presentation. `cli/drift` does the full sweep with `--changed` scoping;
|
|
3223
|
+
* `cli/run` calls this with just the failing specs after vitest.
|
|
3224
|
+
*/
|
|
3225
|
+
async function analyzeDrift(input) {
|
|
3226
|
+
const { targets, cwd, blocks, concurrency = DEFAULT_CONCURRENCY$1, model, onSpecStart } = input;
|
|
3227
|
+
const results = new Array(targets.length);
|
|
3228
|
+
let cursor = 0;
|
|
3229
|
+
const worker = async () => {
|
|
3230
|
+
while (true) {
|
|
3231
|
+
const idx = cursor++;
|
|
3232
|
+
if (idx >= targets.length) return;
|
|
3233
|
+
const target = targets[idx];
|
|
3234
|
+
onSpecStart?.(target);
|
|
3235
|
+
results[idx] = await checkSpec(target, {
|
|
3236
|
+
cwd,
|
|
3237
|
+
blocks,
|
|
3238
|
+
model
|
|
3239
|
+
});
|
|
3240
|
+
}
|
|
3241
|
+
};
|
|
3242
|
+
const pool = Array.from({ length: Math.min(concurrency, targets.length) }, () => worker());
|
|
3243
|
+
await Promise.all(pool);
|
|
3244
|
+
return results;
|
|
3245
|
+
}
|
|
3246
|
+
async function checkSpec(target, opts) {
|
|
3247
|
+
const { featureName, specName } = target;
|
|
3248
|
+
const existing = await tryReadSpecFile(featureName, specName, opts.cwd);
|
|
3249
|
+
if (existing === null) return {
|
|
3250
|
+
target,
|
|
3251
|
+
ok: false,
|
|
3252
|
+
issues: [],
|
|
3253
|
+
error: `spec file disappeared after enumeration: ${featureName}/${specName}`
|
|
3254
|
+
};
|
|
3255
|
+
const { result, isError } = await invokeClaudeStreaming({
|
|
3256
|
+
prompt: buildDriftUserPrompt(existing),
|
|
3257
|
+
systemPrompt: buildDriftSystemPrompt(opts.blocks),
|
|
3258
|
+
allowedTools: [
|
|
3259
|
+
"Read",
|
|
3260
|
+
"Grep",
|
|
3261
|
+
"Glob"
|
|
3262
|
+
],
|
|
3263
|
+
silenceBashLog: true,
|
|
3264
|
+
cwd: opts.cwd,
|
|
3265
|
+
...opts.model ? { model: opts.model } : {}
|
|
3266
|
+
}, (_msg) => {});
|
|
3267
|
+
if (isError) return {
|
|
3268
|
+
target,
|
|
3269
|
+
ok: false,
|
|
3270
|
+
issues: [],
|
|
3271
|
+
error: "Claude returned an error result"
|
|
3272
|
+
};
|
|
3273
|
+
const json = extractJsonBlock(result);
|
|
3274
|
+
if (!json) return {
|
|
3275
|
+
target,
|
|
3276
|
+
ok: false,
|
|
3277
|
+
issues: [],
|
|
3278
|
+
error: "Claude did not return a json block"
|
|
3279
|
+
};
|
|
3280
|
+
let report;
|
|
3281
|
+
try {
|
|
3282
|
+
report = DraftReportSchema.parse(JSON.parse(json));
|
|
3283
|
+
} catch (e) {
|
|
3284
|
+
return {
|
|
3285
|
+
target,
|
|
3286
|
+
ok: false,
|
|
3287
|
+
issues: [],
|
|
3288
|
+
error: `failed to parse drift report: ${e.message}`
|
|
3289
|
+
};
|
|
3290
|
+
}
|
|
3291
|
+
return {
|
|
3292
|
+
target,
|
|
3293
|
+
ok: true,
|
|
3294
|
+
issues: report.issues
|
|
3295
|
+
};
|
|
3296
|
+
}
|
|
3297
|
+
//#endregion
|
|
3298
|
+
//#region src/drift/format.ts
|
|
3299
|
+
/**
|
|
3300
|
+
* Render drift results as a string. The CLI commands and the `run` failure
|
|
3301
|
+
* hook are the only callers; both want the formatted output returned so
|
|
3302
|
+
* they can prefix / interleave / pipe it as needed.
|
|
3303
|
+
*/
|
|
3304
|
+
function renderDrift(results, format, cwd) {
|
|
3305
|
+
if (format === "json") return renderJson(results);
|
|
3306
|
+
if (format === "github") return renderGithub(results, cwd);
|
|
3307
|
+
return renderText(results);
|
|
3308
|
+
}
|
|
3309
|
+
const HEAVY_RULE = "═".repeat(72);
|
|
3310
|
+
function renderText(results) {
|
|
3311
|
+
const out = [];
|
|
3312
|
+
for (const r of results) {
|
|
3313
|
+
out.push("");
|
|
3314
|
+
const heading = `══ ${r.target.featureName}/${r.target.specName} `;
|
|
3315
|
+
const tail = "═".repeat(Math.max(3, 72 - heading.length));
|
|
3316
|
+
out.push(`${heading}${tail}`);
|
|
3317
|
+
if (r.error) {
|
|
3318
|
+
out.push(` ERROR ${r.error}`);
|
|
3319
|
+
continue;
|
|
3320
|
+
}
|
|
3321
|
+
const errors = r.issues.filter((i) => i.severity === "ERROR");
|
|
3322
|
+
const warnings = r.issues.filter((i) => i.severity === "WARN");
|
|
3323
|
+
const passed = r.issues.filter((i) => i.severity === "OK");
|
|
3324
|
+
if (errors.length === 0 && warnings.length === 0) {
|
|
3325
|
+
const label = passed.length === 1 ? "check" : "checks";
|
|
3326
|
+
const detail = passed.length > 0 ? `all ${passed.length} ${label} passed` : "no issues";
|
|
3327
|
+
out.push(` ✓ ${detail}`);
|
|
3328
|
+
continue;
|
|
3329
|
+
}
|
|
3330
|
+
for (const issue of errors) appendFinding(out, "ERROR", issue);
|
|
3331
|
+
for (const issue of warnings) appendFinding(out, "WARN", issue);
|
|
3332
|
+
if (passed.length > 0) {
|
|
3333
|
+
const names = passed.map((i) => DRAFT_CATEGORY_LABEL[i.category]).join(", ");
|
|
3334
|
+
out.push("");
|
|
3335
|
+
out.push(` ✓ passed (${passed.length}): ${names}`);
|
|
3336
|
+
}
|
|
3337
|
+
}
|
|
3338
|
+
out.push("");
|
|
3339
|
+
out.push(HEAVY_RULE);
|
|
3340
|
+
const totals = summarize(results);
|
|
3341
|
+
out.push(` specs ${results.length} (${totals.errored} errored)`);
|
|
3342
|
+
out.push(` findings ${totals.error} error, ${totals.warn} warn, ${totals.ok} ok`);
|
|
3343
|
+
out.push("");
|
|
3344
|
+
return out.join("\n");
|
|
3345
|
+
}
|
|
3346
|
+
function appendFinding(out, level, issue) {
|
|
3347
|
+
const stepPart = issue.stepId ? ` ${issue.stepId}` : "";
|
|
3348
|
+
out.push("");
|
|
3349
|
+
out.push(` ${level} ${DRAFT_CATEGORY_LABEL[issue.category]}${stepPart}`);
|
|
3350
|
+
out.push(` ${issue.message}`);
|
|
3351
|
+
if (issue.detail) out.push(` └ ${issue.detail.replace(/\n/g, "\n ")}`);
|
|
3352
|
+
}
|
|
3353
|
+
function renderJson(results) {
|
|
3354
|
+
const payload = { specs: results.map((r) => ({
|
|
3355
|
+
feature: r.target.featureName,
|
|
3356
|
+
spec: r.target.specName,
|
|
3357
|
+
ok: r.ok,
|
|
3358
|
+
...r.error ? { error: r.error } : {},
|
|
3359
|
+
issues: r.issues.map((i) => ({
|
|
3360
|
+
severity: i.severity,
|
|
3361
|
+
category: i.category,
|
|
3362
|
+
stepId: i.stepId,
|
|
3363
|
+
message: i.message,
|
|
3364
|
+
...i.detail ? { detail: i.detail } : {}
|
|
3365
|
+
}))
|
|
3366
|
+
})) };
|
|
3367
|
+
return `${JSON.stringify(payload, null, 2)}\n`;
|
|
3368
|
+
}
|
|
3369
|
+
function renderGithub(results, cwd) {
|
|
3370
|
+
const repoRoot = process.env["GITHUB_WORKSPACE"] ?? process.cwd();
|
|
3371
|
+
const lines = [];
|
|
3372
|
+
for (const r of results) {
|
|
3373
|
+
const file = githubRelPath(cwd, repoRoot, r.target.featureName, r.target.specName);
|
|
3374
|
+
if (r.error) {
|
|
3375
|
+
lines.push(`::error file=${file}::${escapeGhMessage(r.error)}`);
|
|
3376
|
+
continue;
|
|
3377
|
+
}
|
|
3378
|
+
for (const issue of r.issues) {
|
|
3379
|
+
if (issue.severity === "OK") continue;
|
|
3380
|
+
const level = issue.severity === "ERROR" ? "error" : "warning";
|
|
3381
|
+
const title = `${r.target.featureName}/${r.target.specName} — ${issue.category}${issue.stepId ? ` (${issue.stepId})` : ""}`;
|
|
3382
|
+
const body = issue.detail ? `${issue.message}\n${issue.detail}` : issue.message;
|
|
3383
|
+
lines.push(`::${level} file=${file},title=${escapeGhProp(title)}::${escapeGhMessage(body)}`);
|
|
3384
|
+
}
|
|
3385
|
+
}
|
|
3386
|
+
return lines.length === 0 ? "" : `${lines.join("\n")}\n`;
|
|
3387
|
+
}
|
|
3388
|
+
function githubRelPath(cwd, repoRoot, featureName, specName) {
|
|
3389
|
+
const abs = resolve(cwd, ".ccqa", "features", featureName, "test-cases", specName, "spec.yaml");
|
|
3390
|
+
const rel = relative(repoRoot, abs);
|
|
3391
|
+
return rel.startsWith("..") ? abs : rel;
|
|
3392
|
+
}
|
|
3393
|
+
function escapeGhMessage(s) {
|
|
3394
|
+
return s.replace(/%/g, "%25").replace(/\r/g, "%0D").replace(/\n/g, "%0A");
|
|
3395
|
+
}
|
|
3396
|
+
function escapeGhProp(s) {
|
|
3397
|
+
return s.replace(/%/g, "%25").replace(/\r/g, "%0D").replace(/\n/g, "%0A").replace(/,/g, "%2C").replace(/:/g, "%3A");
|
|
3398
|
+
}
|
|
3399
|
+
function summarize(results) {
|
|
3400
|
+
let error = 0;
|
|
3401
|
+
let warn = 0;
|
|
3402
|
+
let ok = 0;
|
|
3403
|
+
let errored = 0;
|
|
3404
|
+
for (const r of results) {
|
|
3405
|
+
if (r.error) errored++;
|
|
3406
|
+
for (const issue of r.issues) if (issue.severity === "ERROR") error++;
|
|
3407
|
+
else if (issue.severity === "WARN") warn++;
|
|
3408
|
+
else ok++;
|
|
3409
|
+
}
|
|
3410
|
+
return {
|
|
3411
|
+
error,
|
|
3412
|
+
warn,
|
|
3413
|
+
ok,
|
|
3414
|
+
errored
|
|
3415
|
+
};
|
|
3416
|
+
}
|
|
3417
|
+
//#endregion
|
|
3418
|
+
//#region src/drift/exit-code.ts
|
|
3419
|
+
/**
|
|
3420
|
+
* Map drift results to an exit code. Spec-level errors (Claude call failed)
|
|
3421
|
+
* always fail; otherwise ERROR severity always fails, WARN fails only when
|
|
3422
|
+
* the threshold is `warn`.
|
|
3423
|
+
*/
|
|
3424
|
+
function determineExitCode(results, threshold) {
|
|
3425
|
+
for (const r of results) {
|
|
3426
|
+
if (r.error) return 1;
|
|
3427
|
+
for (const issue of r.issues) {
|
|
3428
|
+
if (issue.severity === "ERROR") return 1;
|
|
3429
|
+
if (threshold === "warn" && issue.severity === "WARN") return 1;
|
|
3430
|
+
}
|
|
3431
|
+
}
|
|
3432
|
+
return 0;
|
|
3433
|
+
}
|
|
3434
|
+
//#endregion
|
|
3435
|
+
//#region src/drift/auth.ts
|
|
3436
|
+
/**
|
|
3437
|
+
* Probe whether the host has any credential the Anthropic SDK can pick up:
|
|
3438
|
+
* 1. ANTHROPIC_API_KEY env var (CI / scripted use)
|
|
3439
|
+
* 2. ~/.claude/.credentials.json (local Claude Code login)
|
|
3440
|
+
*
|
|
3441
|
+
* `run --drift` is opt-in, so the caller will only consult this after the
|
|
3442
|
+
* user has asked for drift. We never throw — auth absence is a normal flow
|
|
3443
|
+
* that surfaces as "drift analysis skipped".
|
|
3444
|
+
*/
|
|
3445
|
+
function driftAuthAvailable() {
|
|
3446
|
+
const key = process.env["ANTHROPIC_API_KEY"];
|
|
3447
|
+
if (typeof key === "string" && key.length > 0) return { ok: true };
|
|
3448
|
+
if (existsSync(join(homedir(), ".claude", ".credentials.json"))) return { ok: true };
|
|
3449
|
+
return {
|
|
3450
|
+
ok: false,
|
|
3451
|
+
reason: "no ANTHROPIC_API_KEY / claude login"
|
|
3452
|
+
};
|
|
3453
|
+
}
|
|
3454
|
+
//#endregion
|
|
3455
|
+
//#region src/cli/run.ts
|
|
3456
|
+
const USER_VITEST_CONFIG = resolve(".ccqa/vitest.config.ts");
|
|
3457
|
+
async function resolveVitestConfig() {
|
|
3458
|
+
try {
|
|
3459
|
+
await access(USER_VITEST_CONFIG);
|
|
3460
|
+
return USER_VITEST_CONFIG;
|
|
3461
|
+
} catch {
|
|
3462
|
+
return bundledVitestConfigPath();
|
|
3463
|
+
}
|
|
3464
|
+
}
|
|
3465
|
+
const runCommand = new Command("run").argument("[target]", "Spec to run: '<feature>/<spec>', '<feature>', or omit for all").description("Run generated agent-browser test scripts. Pass --drift to invoke a Claude-driven drift analysis on each failing spec (skipped silently when no test fails). Requires ANTHROPIC_API_KEY or a local Claude login.").option("--drift", "On vitest failure, run drift analysis on the failing specs").option("--drift-strict", "Treat drift ERROR findings as a run failure (exit 1 even if vitest passed). Implies --drift.").option("--format <fmt>", "Output format for the drift block: text | json | github", "text").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Used by --drift only. Overrides CCQA_MODEL.").action(async (target, opts) => {
|
|
3466
|
+
await runTests(target, opts);
|
|
3467
|
+
});
|
|
3468
|
+
async function runTests(target, opts) {
|
|
3469
|
+
header("run", target);
|
|
3470
|
+
const specs = await resolveSpecs(target);
|
|
3471
|
+
if (specs.length === 0) {
|
|
3472
|
+
error("no test scripts found");
|
|
3473
|
+
hint("run 'ccqa generate <feature>/<spec>' first to generate tests");
|
|
3474
|
+
process.exit(1);
|
|
3475
|
+
}
|
|
3476
|
+
const tmpDir = await mkdtemp(join(tmpdir(), "ccqa-run-"));
|
|
3477
|
+
const summaries = [];
|
|
3478
|
+
let overallExitCode = 0;
|
|
3479
|
+
const vitestConfig = await resolveVitestConfig();
|
|
3480
|
+
try {
|
|
3481
|
+
for (let i = 0; i < specs.length; i++) {
|
|
3482
|
+
const { featureName, specName } = specs[i];
|
|
3483
|
+
const scriptFile = await getTestScript(featureName, specName);
|
|
3484
|
+
if (!scriptFile) {
|
|
3485
|
+
warn(`${featureName}/${specName}: no test.spec.ts found`);
|
|
3486
|
+
continue;
|
|
3487
|
+
}
|
|
3488
|
+
run(`${featureName}/${specName}`);
|
|
3489
|
+
meta("test", scriptFile);
|
|
3490
|
+
blank();
|
|
3491
|
+
const reportFile = join(tmpDir, `report-${i}.json`);
|
|
3492
|
+
const proc = spawnVitestStreaming([
|
|
3493
|
+
"run",
|
|
3494
|
+
"--config",
|
|
3495
|
+
vitestConfig,
|
|
3496
|
+
scriptFile,
|
|
3497
|
+
"--reporter=json",
|
|
3498
|
+
`--outputFile.json=${reportFile}`
|
|
3499
|
+
]);
|
|
3500
|
+
await Promise.all([streamFiltered(proc.stdout, process.stdout), streamFiltered(proc.stderr, process.stderr)]);
|
|
3501
|
+
const exitCode = await proc.exited;
|
|
3502
|
+
if (exitCode !== 0) overallExitCode = exitCode;
|
|
3503
|
+
const report = await readReport(reportFile);
|
|
3504
|
+
summaries.push({
|
|
3505
|
+
featureName,
|
|
3506
|
+
specName,
|
|
3507
|
+
scriptFile,
|
|
3508
|
+
report,
|
|
3509
|
+
exitCode
|
|
3510
|
+
});
|
|
3511
|
+
blank();
|
|
3512
|
+
}
|
|
3513
|
+
printSummary(summaries);
|
|
3514
|
+
overallExitCode = await maybeRunDrift(summaries, opts, overallExitCode);
|
|
3515
|
+
} finally {
|
|
3516
|
+
await rm(tmpDir, {
|
|
3517
|
+
recursive: true,
|
|
3518
|
+
force: true
|
|
3519
|
+
});
|
|
3520
|
+
}
|
|
3521
|
+
process.exit(overallExitCode);
|
|
3522
|
+
}
|
|
3523
|
+
function failedSpec(s) {
|
|
3524
|
+
if (s.exitCode !== 0) return true;
|
|
3525
|
+
return (s.report?.numFailedTests ?? 0) > 0;
|
|
3526
|
+
}
|
|
3527
|
+
function parseDriftFormat(raw) {
|
|
3528
|
+
const v = raw ?? "text";
|
|
3529
|
+
if (v === "text" || v === "json" || v === "github") return v;
|
|
3530
|
+
error(`invalid --format: ${v} (expected text|json|github)`);
|
|
3531
|
+
process.exit(2);
|
|
3532
|
+
}
|
|
3533
|
+
/**
|
|
3534
|
+
* Choose which specs to drift-check. `--drift` is a fail-supplement: only the
|
|
3535
|
+
* specs that failed get a drift analysis (the goal is to *explain* a vitest
|
|
3536
|
+
* failure). `--drift-strict` is an audit: even passing specs are checked,
|
|
3537
|
+
* because the CI need is "fail loud if the spec lags behind the source",
|
|
3538
|
+
* which can absolutely happen while vitest is still green against a stale
|
|
3539
|
+
* staging environment.
|
|
3540
|
+
*/
|
|
3541
|
+
function selectDriftTargets(summaries, opts) {
|
|
3542
|
+
if (opts.driftStrict) return summaries;
|
|
3543
|
+
if (opts.drift) return summaries.filter(failedSpec);
|
|
3544
|
+
return [];
|
|
3545
|
+
}
|
|
3546
|
+
/**
|
|
3547
|
+
* Opt-in post-vitest drift hook. With `--drift`, fires only when at least
|
|
3548
|
+
* one spec failed (supplemental signal). With `--drift-strict`, fires
|
|
3549
|
+
* unconditionally so a spec/source divergence is caught even when vitest
|
|
3550
|
+
* passed. Skips silently when auth is unavailable so the run's exit code
|
|
3551
|
+
* is determined by vitest alone.
|
|
3552
|
+
*/
|
|
3553
|
+
async function maybeRunDrift(summaries, opts, currentExitCode) {
|
|
3554
|
+
const candidates = selectDriftTargets(summaries, opts);
|
|
3555
|
+
if (candidates.length === 0) return currentExitCode;
|
|
3556
|
+
const auth = driftAuthAvailable();
|
|
3557
|
+
if (!auth.ok) {
|
|
3558
|
+
info(`drift analysis skipped (${auth.reason})`);
|
|
3559
|
+
return currentExitCode;
|
|
3560
|
+
}
|
|
3561
|
+
const format = parseDriftFormat(opts.format);
|
|
3562
|
+
const cwd = process.cwd();
|
|
3563
|
+
const tree = await listFeatureTree(cwd);
|
|
3564
|
+
const targets = candidates.map((s) => {
|
|
3565
|
+
const spec = tree.find((f) => f.featureName === s.featureName)?.specs.find((sp) => sp.specName === s.specName);
|
|
3566
|
+
if (!spec) return null;
|
|
3567
|
+
const t = {
|
|
3568
|
+
featureName: s.featureName,
|
|
3569
|
+
specName: s.specName
|
|
3570
|
+
};
|
|
3571
|
+
if (spec.relatedPaths) t.relatedPaths = spec.relatedPaths;
|
|
3572
|
+
if (spec.includedBlocks) t.includedBlocks = spec.includedBlocks;
|
|
3573
|
+
return t;
|
|
3574
|
+
}).filter((t) => t !== null);
|
|
3575
|
+
if (targets.length === 0) {
|
|
3576
|
+
info("drift analysis skipped (no spec.yaml found for failing specs)");
|
|
3577
|
+
return currentExitCode;
|
|
3578
|
+
}
|
|
3579
|
+
const results = await analyzeDrift({
|
|
3580
|
+
targets,
|
|
3581
|
+
cwd,
|
|
3582
|
+
blocks: await loadAvailableBlocks(cwd),
|
|
3583
|
+
concurrency: Math.min(3, targets.length),
|
|
3584
|
+
...opts.model ? { model: opts.model } : {},
|
|
3585
|
+
onSpecStart: (t) => {
|
|
3586
|
+
if (format === "text") info(`drift: checking ${t.featureName}/${t.specName}`);
|
|
3587
|
+
}
|
|
3588
|
+
});
|
|
3589
|
+
if (format === "text") process.stdout.write(`\n${C.cyan}${C.bold}──────── drift analysis ────────${C.reset}\n`);
|
|
3590
|
+
process.stdout.write(renderDrift(results, format, cwd));
|
|
3591
|
+
if (opts.driftStrict && determineExitCode(results, "error") !== 0) return currentExitCode || 1;
|
|
3592
|
+
return currentExitCode;
|
|
3593
|
+
}
|
|
3594
|
+
async function readReport(path) {
|
|
3595
|
+
try {
|
|
3596
|
+
const raw = await readFile(path, "utf8");
|
|
3597
|
+
return JSON.parse(raw);
|
|
3598
|
+
} catch {
|
|
3599
|
+
return null;
|
|
3600
|
+
}
|
|
3601
|
+
}
|
|
3602
|
+
const useColor = process.stdout.isTTY && process.env.NO_COLOR == null;
|
|
3603
|
+
const C = {
|
|
3604
|
+
reset: useColor ? "\x1B[0m" : "",
|
|
3605
|
+
bold: useColor ? "\x1B[1m" : "",
|
|
3606
|
+
dim: useColor ? "\x1B[2m" : "",
|
|
3607
|
+
green: useColor ? "\x1B[32m" : "",
|
|
3608
|
+
red: useColor ? "\x1B[31m" : "",
|
|
3609
|
+
yellow: useColor ? "\x1B[33m" : "",
|
|
3610
|
+
cyan: useColor ? "\x1B[36m" : "",
|
|
3611
|
+
gray: useColor ? "\x1B[90m" : ""
|
|
3612
|
+
};
|
|
3613
|
+
function printSummary(summaries) {
|
|
3614
|
+
process.stdout.write(`\n${C.cyan}${C.bold}──────── ccqa summary ────────${C.reset}\n\n`);
|
|
3615
|
+
let totalTests = 0;
|
|
3616
|
+
let totalPassed = 0;
|
|
3617
|
+
let totalFailed = 0;
|
|
3618
|
+
let totalSkipped = 0;
|
|
3619
|
+
for (const s of summaries) {
|
|
3620
|
+
const header = `${C.bold}${s.featureName}/${s.specName}${C.reset}`;
|
|
3621
|
+
if (!s.report) {
|
|
3622
|
+
const icon = s.exitCode === 0 ? `${C.green}✔${C.reset}` : `${C.red}✖${C.reset}`;
|
|
3623
|
+
process.stdout.write(`${icon} ${header} ${C.dim}(no report)${C.reset}\n`);
|
|
3624
|
+
continue;
|
|
3625
|
+
}
|
|
3626
|
+
totalTests += s.report.numTotalTests;
|
|
3627
|
+
totalPassed += s.report.numPassedTests;
|
|
3628
|
+
totalFailed += s.report.numFailedTests;
|
|
3629
|
+
totalSkipped += s.report.numPendingTests;
|
|
3630
|
+
const ok = s.report.success;
|
|
3631
|
+
const icon = ok ? `${C.green}✔${C.reset}` : `${C.red}✖${C.reset}`;
|
|
3632
|
+
const countColor = ok ? C.green : C.red;
|
|
3633
|
+
process.stdout.write(`${icon} ${header} ${countColor}${s.report.numPassedTests}/${s.report.numTotalTests}${C.reset} ${C.dim}passed${C.reset}\n`);
|
|
3634
|
+
for (const file of s.report.testResults) for (const a of file.assertionResults) {
|
|
3635
|
+
const aIcon = assertionIcon(a.status);
|
|
3636
|
+
const dur = a.duration != null ? ` ${C.gray}${formatDuration(a.duration)}${C.reset}` : "";
|
|
3637
|
+
process.stdout.write(` ${aIcon} ${a.fullName}${dur}\n`);
|
|
3638
|
+
if (a.status === "failed" && a.failureMessages?.length) for (const msg of a.failureMessages) {
|
|
3639
|
+
const firstLine = msg.split("\n")[0] ?? msg;
|
|
3640
|
+
process.stdout.write(` ${C.red}${firstLine}${C.reset}\n`);
|
|
3641
|
+
}
|
|
3642
|
+
}
|
|
3643
|
+
}
|
|
3644
|
+
const specsPassed = summaries.filter((s) => s.exitCode === 0).length;
|
|
3645
|
+
const specsFailed = summaries.filter((s) => s.exitCode !== 0).length;
|
|
3646
|
+
process.stdout.write("\n");
|
|
3647
|
+
process.stdout.write(` ${C.bold}Specs${C.reset} ${summaries.length} (${C.green}${specsPassed} passed${C.reset}, ${specsFailed > 0 ? C.red : C.dim}${specsFailed} failed${C.reset})\n`);
|
|
3648
|
+
process.stdout.write(` ${C.bold}Tests${C.reset} ${totalTests} (${C.green}${totalPassed} passed${C.reset}, ${totalFailed > 0 ? C.red : C.dim}${totalFailed} failed${C.reset}, ${C.yellow}${totalSkipped} skipped${C.reset})\n`);
|
|
3649
|
+
process.stdout.write("\n");
|
|
3650
|
+
}
|
|
3651
|
+
function assertionIcon(status) {
|
|
3652
|
+
switch (status) {
|
|
3653
|
+
case "passed": return `${C.green}✔${C.reset}`;
|
|
3654
|
+
case "failed": return `${C.red}✖${C.reset}`;
|
|
3655
|
+
case "skipped":
|
|
3656
|
+
case "pending":
|
|
3657
|
+
case "todo": return `${C.yellow}◌${C.reset}`;
|
|
3658
|
+
}
|
|
3659
|
+
}
|
|
3660
|
+
function formatDuration(ms) {
|
|
3661
|
+
if (ms < 1e3) return `${Math.round(ms)}ms`;
|
|
3662
|
+
return `${(ms / 1e3).toFixed(2)}s`;
|
|
3663
|
+
}
|
|
3664
|
+
const NOISE_LINE_PATTERNS = [/^JSON report written to /];
|
|
3665
|
+
async function streamFiltered(source, sink) {
|
|
3666
|
+
source.setEncoding("utf8");
|
|
3667
|
+
let buffer = "";
|
|
3668
|
+
for await (const chunk of source) {
|
|
3669
|
+
buffer += chunk;
|
|
3670
|
+
let nl = buffer.indexOf("\n");
|
|
3671
|
+
while (nl !== -1) {
|
|
3672
|
+
const line = buffer.slice(0, nl);
|
|
3673
|
+
buffer = buffer.slice(nl + 1);
|
|
3674
|
+
if (!NOISE_LINE_PATTERNS.some((p) => p.test(line))) sink.write(line + "\n");
|
|
3675
|
+
nl = buffer.indexOf("\n");
|
|
3676
|
+
}
|
|
3677
|
+
}
|
|
3678
|
+
if (buffer.length > 0 && !NOISE_LINE_PATTERNS.some((p) => p.test(buffer))) sink.write(buffer);
|
|
3679
|
+
}
|
|
3680
|
+
async function resolveSpecs(target) {
|
|
3681
|
+
if (!target) return listAllSpecs();
|
|
3682
|
+
if (target.includes("/")) {
|
|
3683
|
+
const { featureName, specName } = parseSpecPath(target);
|
|
3684
|
+
return [{
|
|
3685
|
+
featureName,
|
|
3686
|
+
specName
|
|
3687
|
+
}];
|
|
3688
|
+
}
|
|
3689
|
+
return (await listSpecsForFeature(target)).map((specName) => ({
|
|
3690
|
+
featureName: target,
|
|
3691
|
+
specName
|
|
3692
|
+
}));
|
|
3693
|
+
}
|
|
3694
|
+
//#endregion
|
|
3162
3695
|
//#region src/cli/draft.ts
|
|
3163
|
-
const CATEGORY_LABEL
|
|
3164
|
-
|
|
3165
|
-
setups: "Setup references",
|
|
3166
|
-
granularity: "Step granularity",
|
|
3167
|
-
unimplemented: "Unimplemented checks"
|
|
3168
|
-
};
|
|
3169
|
-
const draftCommand = new Command("draft").argument("[feature/spec]", "Optional spec path (e.g. tasks/create-and-complete). If omitted, Claude proposes one from your intent.").description("Interactively draft and refine a test-spec.md with Claude Code").option("--instruction <text>", "Non-interactive single-shot instruction (skips the interactive loop)").option("--apply", "Auto-apply each generated patch without [y/N] confirmation", false).action(async (specPath, opts) => {
|
|
3696
|
+
const CATEGORY_LABEL = DRAFT_CATEGORY_LABEL;
|
|
3697
|
+
const draftCommand = new Command("draft").argument("[feature/spec]", "Optional spec path (e.g. tasks/create-and-complete). If omitted, Claude proposes one from your intent.").description("Interactively draft and refine a spec.yaml with Claude Code").option("--instruction <text>", "Non-interactive single-shot instruction (skips the interactive loop)").option("--apply", "Auto-apply each generated patch without [y/N] confirmation", false).action(async (specPath, opts) => {
|
|
3170
3698
|
await ensureCcqaDir();
|
|
3171
3699
|
let featureName;
|
|
3172
3700
|
let specName;
|
|
@@ -3216,7 +3744,7 @@ async function runDraft(featureName, specName, opts, prefilledIntent) {
|
|
|
3216
3744
|
async function runOneTurn(input) {
|
|
3217
3745
|
const { featureName, specName, existing, userInput, autoApply } = input;
|
|
3218
3746
|
const isFirstRun = existing === null;
|
|
3219
|
-
const systemPrompt = buildDraftSystemPrompt();
|
|
3747
|
+
const systemPrompt = buildDraftSystemPrompt(await loadAvailableBlocks());
|
|
3220
3748
|
const userPrompt = buildDraftPrompt({
|
|
3221
3749
|
mode: isFirstRun ? "create" : "refine",
|
|
3222
3750
|
existing: existing ?? "",
|
|
@@ -3349,24 +3877,24 @@ function printReviewBlock(issues) {
|
|
|
3349
3877
|
}
|
|
3350
3878
|
if (errors.length) {
|
|
3351
3879
|
process.stdout.write(` ERRORS (${errors.length})\n`);
|
|
3352
|
-
for (const issue of errors) writeFinding
|
|
3880
|
+
for (const issue of errors) writeFinding(issue);
|
|
3353
3881
|
process.stdout.write("\n");
|
|
3354
3882
|
}
|
|
3355
3883
|
if (warnings.length) {
|
|
3356
3884
|
process.stdout.write(` WARNINGS (${warnings.length})\n`);
|
|
3357
|
-
for (const issue of warnings) writeFinding
|
|
3885
|
+
for (const issue of warnings) writeFinding(issue);
|
|
3358
3886
|
process.stdout.write("\n");
|
|
3359
3887
|
}
|
|
3360
3888
|
if (passed.length) {
|
|
3361
|
-
const names = passed.map((i) => CATEGORY_LABEL
|
|
3889
|
+
const names = passed.map((i) => CATEGORY_LABEL[i.category]).join(", ");
|
|
3362
3890
|
process.stdout.write(` PASSED (${passed.length})\n ${names}\n`);
|
|
3363
3891
|
}
|
|
3364
3892
|
process.stdout.write(`\n${RULE}\n\n`);
|
|
3365
3893
|
return errors.length > 0;
|
|
3366
3894
|
}
|
|
3367
|
-
function writeFinding
|
|
3895
|
+
function writeFinding(issue) {
|
|
3368
3896
|
const stepPart = issue.stepId ? ` ${issue.stepId}` : "";
|
|
3369
|
-
process.stdout.write(` ${CATEGORY_LABEL
|
|
3897
|
+
process.stdout.write(` ${CATEGORY_LABEL[issue.category]}${stepPart}\n`);
|
|
3370
3898
|
process.stdout.write(` ${issue.message}\n`);
|
|
3371
3899
|
if (issue.detail) process.stdout.write(` └ ${issue.detail.replace(/\n/g, "\n ")}\n`);
|
|
3372
3900
|
}
|
|
@@ -3380,10 +3908,7 @@ async function proposeNaming(opts) {
|
|
|
3380
3908
|
const tree = await listFeatureTree();
|
|
3381
3909
|
const treeForPrompt = tree.map((f) => ({
|
|
3382
3910
|
featureName: f.featureName,
|
|
3383
|
-
specs: f.specs.map((s) => ({
|
|
3384
|
-
specName: s.specName,
|
|
3385
|
-
...s.title ? { title: s.title } : {}
|
|
3386
|
-
}))
|
|
3911
|
+
specs: f.specs.map((s) => ({ specName: s.specName }))
|
|
3387
3912
|
}));
|
|
3388
3913
|
info("Proposing a feature/spec name based on your intent...");
|
|
3389
3914
|
const { result, isError } = await invokeClaudeStreaming({
|
|
@@ -3533,49 +4058,6 @@ function truncate(s, n) {
|
|
|
3533
4058
|
return s.slice(s.length - n);
|
|
3534
4059
|
}
|
|
3535
4060
|
//#endregion
|
|
3536
|
-
//#region src/prompts/drift.ts
|
|
3537
|
-
function buildDriftSystemPrompt() {
|
|
3538
|
-
return `${buildDraftSystemPrompt()}
|
|
3539
|
-
|
|
3540
|
-
## Drift mode
|
|
3541
|
-
|
|
3542
|
-
You are running non-interactively in CI. The user will not see or apply the patch — only the \`issues\` array.
|
|
3543
|
-
|
|
3544
|
-
- Always set \`patch\` to "" in your response.
|
|
3545
|
-
- Focus issue messages on what is **out of sync** between the spec and the current codebase: missing aria-labels, renamed routes, removed buttons, placeholders that no longer exist, setup references that point to non-existent files.
|
|
3546
|
-
- Do NOT raise issues about stylistic preferences in the spec wording.
|
|
3547
|
-
- Treat \`category: unimplemented\` as the primary signal for drift: anything the spec asserts that you cannot find in code is a drift finding.
|
|
3548
|
-
|
|
3549
|
-
## Drift severity policy (STRICT)
|
|
3550
|
-
|
|
3551
|
-
The CLI exits non-zero when any issue has \`severity: "ERROR"\` (default) or — with \`--severity warn\` — when any \`WARN\` is present. Pick severity by **whether a deterministic replay of this spec would fail today**, not by how confident you are in your own analysis.
|
|
3552
|
-
|
|
3553
|
-
Use **ERROR** when the spec would break on replay:
|
|
3554
|
-
- A selector the spec relies on (\`aria-label\`, \`placeholder\`, \`data-testid\`, button text) **does not exist anywhere in the source**.
|
|
3555
|
-
- A URL / route the spec navigates to is no longer defined.
|
|
3556
|
-
- An **Expected** asserts a string or visible text that is no longer rendered by the relevant component.
|
|
3557
|
-
- A \`setups[].name\` does not resolve to \`.ccqa/setups/<name>/setup-spec.md\`, or a \`params\` key is not declared in that setup's \`placeholders\`.
|
|
3558
|
-
- The spec references a feature/page that has been removed from the codebase.
|
|
3559
|
-
|
|
3560
|
-
Use **WARN** when the spec is still likely to work, but quality could improve:
|
|
3561
|
-
- The Expected is vague ("a message appears") when a precise string exists in code.
|
|
3562
|
-
- A step bundles multiple actions, or a needed intermediate verification step is missing.
|
|
3563
|
-
- Stable signals exist that the spec could leverage but currently doesn't.
|
|
3564
|
-
- You are unsure whether a referenced string exists (give the user the benefit of the doubt; do not hard-fail CI on uncertainty).
|
|
3565
|
-
|
|
3566
|
-
Use **OK** for axes you actively verified and found no issue.
|
|
3567
|
-
|
|
3568
|
-
If you cannot decide between ERROR and WARN, choose WARN. Reserve ERROR for findings you can back up with a specific file path or grep result that proves the drift.
|
|
3569
|
-
`;
|
|
3570
|
-
}
|
|
3571
|
-
function buildDriftUserPrompt(existing) {
|
|
3572
|
-
return buildDraftPrompt({
|
|
3573
|
-
mode: "refine",
|
|
3574
|
-
existing,
|
|
3575
|
-
userInput: ""
|
|
3576
|
-
});
|
|
3577
|
-
}
|
|
3578
|
-
//#endregion
|
|
3579
4061
|
//#region src/drift/affected.ts
|
|
3580
4062
|
const execFileP = promisify(execFile);
|
|
3581
4063
|
/**
|
|
@@ -3595,10 +4077,10 @@ function resolveBaseRef(explicit) {
|
|
|
3595
4077
|
* post-rename layout.
|
|
3596
4078
|
*
|
|
3597
4079
|
* Paths are re-rooted to be relative to `cwd`, not the git repo root. In a
|
|
3598
|
-
* monorepo where `cwd` is a sub-package (e.g. `
|
|
3599
|
-
*
|
|
3600
|
-
*
|
|
3601
|
-
*
|
|
4080
|
+
* monorepo where `cwd` is a sub-package (e.g. `apps/foo`), git emits paths
|
|
4081
|
+
* relative to the repo root, but specs declare relatedPaths relative to
|
|
4082
|
+
* their own package. Changes outside `cwd` are dropped so an unrelated PR
|
|
4083
|
+
* can never accidentally scope a sub-package's specs in.
|
|
3602
4084
|
*/
|
|
3603
4085
|
async function getChangedFiles(base, cwd) {
|
|
3604
4086
|
const [{ stdout: rootOut }, { stdout: diffOut }] = await Promise.all([execFileP("git", ["rev-parse", "--show-toplevel"], { cwd }), execFileP("git", [
|
|
@@ -3837,9 +4319,8 @@ ${previews.map((p) => {
|
|
|
3837
4319
|
## Existing specs
|
|
3838
4320
|
|
|
3839
4321
|
${specs.map((s) => {
|
|
3840
|
-
const title = s.title ? ` — ${s.title}` : "";
|
|
3841
4322
|
const paths = s.relatedPaths.length === 0 ? " (no relatedPaths declared)" : s.relatedPaths.map((p) => ` - ${p}`).join("\n");
|
|
3842
|
-
return `- ${s.featureName}/${s.specName}
|
|
4323
|
+
return `- ${s.featureName}/${s.specName}\n${paths}`;
|
|
3843
4324
|
}).join("\n")}
|
|
3844
4325
|
|
|
3845
4326
|
## Task
|
|
@@ -3850,7 +4331,7 @@ Return the spec keys that might be affected by any of the new files. Conservativ
|
|
|
3850
4331
|
//#endregion
|
|
3851
4332
|
//#region src/cli/drift.ts
|
|
3852
4333
|
const DEFAULT_CONCURRENCY = 3;
|
|
3853
|
-
const driftCommand = new Command("drift").argument("[feature/spec]", "Optional spec id. If omitted, every spec under .ccqa/features/ is checked.").description("Check whether each
|
|
4334
|
+
const driftCommand = new Command("drift").argument("[feature/spec]", "Optional spec id. If omitted, every spec under .ccqa/features/ is checked.").description("Check whether each spec.yaml is still in sync with the current codebase (CI-friendly, no patches applied).").option("--format <fmt>", "Output format: text | json | github", "text").option("--severity <level>", "Exit non-zero on this severity or higher: warn | error", "error").option("--concurrency <n>", `Parallel spec checks (default: ${DEFAULT_CONCURRENCY})`).option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").option("--cwd <path>", "Working directory used as both the .ccqa root and the codebase Claude reads. Useful for monorepos. Defaults to process.cwd().").option("--changed", "Restrict drift checks to specs whose relatedPaths intersect the git diff against --base (or, in CI, $GITHUB_BASE_REF, else origin/main). New files are routed to specs via a single lightweight Claude call.").option("--base <ref>", "Base ref to diff against when --changed is set. Defaults to $GITHUB_BASE_REF (CI) or origin/main.").action(async (specPath, opts) => {
|
|
3854
4335
|
const format = parseFormat(opts.format);
|
|
3855
4336
|
const threshold = parseSeverity(opts.severity);
|
|
3856
4337
|
const concurrency = parseConcurrency(opts.concurrency);
|
|
@@ -3878,8 +4359,18 @@ const driftCommand = new Command("drift").argument("[feature/spec]", "Optional s
|
|
|
3878
4359
|
if (format === "text") meta("scoped", `${targets.length} of ${total} spec${total > 1 ? "s" : ""}`);
|
|
3879
4360
|
if (targets.length === 0) exitWithNoSpecs(format, "no specs intersect the changed file set; nothing to check");
|
|
3880
4361
|
}
|
|
3881
|
-
const
|
|
3882
|
-
|
|
4362
|
+
const blocks = await loadAvailableBlocks(cwd);
|
|
4363
|
+
const results = await analyzeDrift({
|
|
4364
|
+
targets,
|
|
4365
|
+
cwd,
|
|
4366
|
+
blocks,
|
|
4367
|
+
concurrency,
|
|
4368
|
+
...opts.model ? { model: opts.model } : {},
|
|
4369
|
+
onSpecStart: (t) => {
|
|
4370
|
+
if (format === "text") info(`checking ${t.featureName}/${t.specName}`);
|
|
4371
|
+
}
|
|
4372
|
+
});
|
|
4373
|
+
process.stdout.write(renderDrift(results, format, cwd));
|
|
3883
4374
|
process.exit(determineExitCode(results, threshold));
|
|
3884
4375
|
});
|
|
3885
4376
|
function exitWithNoSpecs(format, message) {
|
|
@@ -3905,12 +4396,21 @@ async function filterByChanged(input) {
|
|
|
3905
4396
|
const newFiles = changed.filter((f) => f.status === "added");
|
|
3906
4397
|
const existingChanges = changed.filter((f) => f.status !== "added");
|
|
3907
4398
|
const affected = /* @__PURE__ */ new Set();
|
|
4399
|
+
const touchedBlockNames = /* @__PURE__ */ new Set();
|
|
4400
|
+
for (const f of changed) {
|
|
4401
|
+
const blockName = parseBlockPath(f.path);
|
|
4402
|
+
if (blockName) touchedBlockNames.add(blockName);
|
|
4403
|
+
}
|
|
3908
4404
|
for (const t of targets) {
|
|
3909
4405
|
if (!t.relatedPaths) {
|
|
3910
4406
|
affected.add(specKey(t));
|
|
3911
4407
|
continue;
|
|
3912
4408
|
}
|
|
3913
|
-
if (existingChanges.some((f) => isPathAffectedBy(f.path, t.relatedPaths)) || newFiles.some((f) => isPathAffectedBy(f.path, t.relatedPaths)))
|
|
4409
|
+
if (existingChanges.some((f) => isPathAffectedBy(f.path, t.relatedPaths)) || newFiles.some((f) => isPathAffectedBy(f.path, t.relatedPaths))) {
|
|
4410
|
+
affected.add(specKey(t));
|
|
4411
|
+
continue;
|
|
4412
|
+
}
|
|
4413
|
+
if (t.includedBlocks?.some((name) => touchedBlockNames.has(name))) affected.add(specKey(t));
|
|
3914
4414
|
}
|
|
3915
4415
|
if (newFiles.length > 0) {
|
|
3916
4416
|
if (format === "text") info(`routing ${newFiles.length} new file(s) to specs via Claude...`);
|
|
@@ -3919,7 +4419,6 @@ async function filterByChanged(input) {
|
|
|
3919
4419
|
specs: targets.filter((t) => t.relatedPaths).map((t) => ({
|
|
3920
4420
|
featureName: t.featureName,
|
|
3921
4421
|
specName: t.specName,
|
|
3922
|
-
title: t.title,
|
|
3923
4422
|
relatedPaths: t.relatedPaths
|
|
3924
4423
|
})),
|
|
3925
4424
|
cwd,
|
|
@@ -3930,18 +4429,20 @@ async function filterByChanged(input) {
|
|
|
3930
4429
|
return targets.filter((t) => affected.has(specKey(t)));
|
|
3931
4430
|
}
|
|
3932
4431
|
async function collectTargets(specPath, cwd) {
|
|
4432
|
+
const tree = await listFeatureTree(cwd);
|
|
3933
4433
|
if (specPath) {
|
|
3934
4434
|
const { featureName, specName } = parseSpecPath(specPath);
|
|
3935
|
-
|
|
4435
|
+
const spec = tree.find((f) => f.featureName === featureName)?.specs.find((s) => s.specName === specName);
|
|
4436
|
+
if (!spec?.hasSpecFile) {
|
|
3936
4437
|
error(`spec not found: ${featureName}/${specName} (under ${cwd})`);
|
|
3937
4438
|
process.exit(1);
|
|
3938
4439
|
}
|
|
3939
4440
|
return [{
|
|
3940
4441
|
featureName,
|
|
3941
|
-
specName
|
|
4442
|
+
specName,
|
|
4443
|
+
includedBlocks: spec.includedBlocks ?? []
|
|
3942
4444
|
}];
|
|
3943
4445
|
}
|
|
3944
|
-
const tree = await listFeatureTree(cwd);
|
|
3945
4446
|
const out = [];
|
|
3946
4447
|
for (const feature of tree) for (const spec of feature.specs) {
|
|
3947
4448
|
if (!spec.hasSpecFile) continue;
|
|
@@ -3950,206 +4451,11 @@ async function collectTargets(specPath, cwd) {
|
|
|
3950
4451
|
specName: spec.specName
|
|
3951
4452
|
};
|
|
3952
4453
|
if (spec.relatedPaths) t.relatedPaths = spec.relatedPaths;
|
|
3953
|
-
if (spec.
|
|
4454
|
+
if (spec.includedBlocks) t.includedBlocks = spec.includedBlocks;
|
|
3954
4455
|
out.push(t);
|
|
3955
4456
|
}
|
|
3956
4457
|
return out;
|
|
3957
4458
|
}
|
|
3958
|
-
async function runChecks(targets, concurrency, model, cwd, format) {
|
|
3959
|
-
const results = new Array(targets.length);
|
|
3960
|
-
let cursor = 0;
|
|
3961
|
-
const worker = async () => {
|
|
3962
|
-
while (true) {
|
|
3963
|
-
const idx = cursor++;
|
|
3964
|
-
if (idx >= targets.length) return;
|
|
3965
|
-
const target = targets[idx];
|
|
3966
|
-
results[idx] = await checkSpec(target, model, cwd, format);
|
|
3967
|
-
}
|
|
3968
|
-
};
|
|
3969
|
-
const pool = Array.from({ length: Math.min(concurrency, targets.length) }, () => worker());
|
|
3970
|
-
await Promise.all(pool);
|
|
3971
|
-
return results;
|
|
3972
|
-
}
|
|
3973
|
-
async function checkSpec(target, model, cwd, format) {
|
|
3974
|
-
const { featureName, specName } = target;
|
|
3975
|
-
const existing = await tryReadSpecFile(featureName, specName, cwd);
|
|
3976
|
-
if (existing === null) return {
|
|
3977
|
-
target,
|
|
3978
|
-
ok: false,
|
|
3979
|
-
issues: [],
|
|
3980
|
-
error: `spec file disappeared after enumeration: ${featureName}/${specName}`
|
|
3981
|
-
};
|
|
3982
|
-
if (format === "text") info(`checking ${featureName}/${specName}`);
|
|
3983
|
-
const { result, isError } = await invokeClaudeStreaming({
|
|
3984
|
-
prompt: buildDriftUserPrompt(existing),
|
|
3985
|
-
systemPrompt: buildDriftSystemPrompt(),
|
|
3986
|
-
allowedTools: [
|
|
3987
|
-
"Read",
|
|
3988
|
-
"Grep",
|
|
3989
|
-
"Glob"
|
|
3990
|
-
],
|
|
3991
|
-
silenceBashLog: true,
|
|
3992
|
-
cwd,
|
|
3993
|
-
...model ? { model } : {}
|
|
3994
|
-
}, (_msg) => {});
|
|
3995
|
-
if (isError) return {
|
|
3996
|
-
target,
|
|
3997
|
-
ok: false,
|
|
3998
|
-
issues: [],
|
|
3999
|
-
error: "Claude returned an error result"
|
|
4000
|
-
};
|
|
4001
|
-
const json = extractJsonBlock(result);
|
|
4002
|
-
if (!json) return {
|
|
4003
|
-
target,
|
|
4004
|
-
ok: false,
|
|
4005
|
-
issues: [],
|
|
4006
|
-
error: "Claude did not return a json block"
|
|
4007
|
-
};
|
|
4008
|
-
let report;
|
|
4009
|
-
try {
|
|
4010
|
-
report = DraftReportSchema.parse(JSON.parse(json));
|
|
4011
|
-
} catch (e) {
|
|
4012
|
-
return {
|
|
4013
|
-
target,
|
|
4014
|
-
ok: false,
|
|
4015
|
-
issues: [],
|
|
4016
|
-
error: `failed to parse drift report: ${e.message}`
|
|
4017
|
-
};
|
|
4018
|
-
}
|
|
4019
|
-
return {
|
|
4020
|
-
target,
|
|
4021
|
-
ok: true,
|
|
4022
|
-
issues: report.issues
|
|
4023
|
-
};
|
|
4024
|
-
}
|
|
4025
|
-
function emitReport(results, format, cwd) {
|
|
4026
|
-
if (format === "json") {
|
|
4027
|
-
emitJson(results);
|
|
4028
|
-
return;
|
|
4029
|
-
}
|
|
4030
|
-
if (format === "github") {
|
|
4031
|
-
emitGithub(results, cwd);
|
|
4032
|
-
return;
|
|
4033
|
-
}
|
|
4034
|
-
emitText(results);
|
|
4035
|
-
}
|
|
4036
|
-
const CATEGORY_LABEL = {
|
|
4037
|
-
assertable: "Assertability",
|
|
4038
|
-
setups: "Setup references",
|
|
4039
|
-
granularity: "Step granularity",
|
|
4040
|
-
unimplemented: "Unimplemented checks"
|
|
4041
|
-
};
|
|
4042
|
-
const HEAVY_RULE = "═".repeat(72);
|
|
4043
|
-
function emitText(results) {
|
|
4044
|
-
for (const r of results) {
|
|
4045
|
-
blank();
|
|
4046
|
-
const heading = `══ ${r.target.featureName}/${r.target.specName} `;
|
|
4047
|
-
const tail = "═".repeat(Math.max(3, 72 - heading.length));
|
|
4048
|
-
process.stdout.write(`${heading}${tail}\n`);
|
|
4049
|
-
if (r.error) {
|
|
4050
|
-
process.stdout.write(` ERROR ${r.error}\n`);
|
|
4051
|
-
continue;
|
|
4052
|
-
}
|
|
4053
|
-
const errors = r.issues.filter((i) => i.severity === "ERROR");
|
|
4054
|
-
const warnings = r.issues.filter((i) => i.severity === "WARN");
|
|
4055
|
-
const passed = r.issues.filter((i) => i.severity === "OK");
|
|
4056
|
-
if (errors.length === 0 && warnings.length === 0) {
|
|
4057
|
-
const label = passed.length === 1 ? "check" : "checks";
|
|
4058
|
-
const detail = passed.length > 0 ? `all ${passed.length} ${label} passed` : "no issues";
|
|
4059
|
-
process.stdout.write(` ✓ ${detail}\n`);
|
|
4060
|
-
continue;
|
|
4061
|
-
}
|
|
4062
|
-
for (const issue of errors) writeFinding("ERROR", issue);
|
|
4063
|
-
for (const issue of warnings) writeFinding("WARN", issue);
|
|
4064
|
-
if (passed.length > 0) {
|
|
4065
|
-
const names = passed.map((i) => CATEGORY_LABEL[i.category]).join(", ");
|
|
4066
|
-
process.stdout.write(`\n ✓ passed (${passed.length}): ${names}\n`);
|
|
4067
|
-
}
|
|
4068
|
-
}
|
|
4069
|
-
blank();
|
|
4070
|
-
process.stdout.write(`${HEAVY_RULE}\n`);
|
|
4071
|
-
const totals = summarize(results);
|
|
4072
|
-
meta("specs", `${results.length} (${totals.errored} errored)`);
|
|
4073
|
-
meta("findings", `${totals.error} error, ${totals.warn} warn, ${totals.ok} ok`);
|
|
4074
|
-
}
|
|
4075
|
-
function writeFinding(level, issue) {
|
|
4076
|
-
const stepPart = issue.stepId ? ` ${issue.stepId}` : "";
|
|
4077
|
-
process.stdout.write(`\n ${level} ${CATEGORY_LABEL[issue.category]}${stepPart}\n`);
|
|
4078
|
-
process.stdout.write(` ${issue.message}\n`);
|
|
4079
|
-
if (issue.detail) process.stdout.write(` └ ${issue.detail.replace(/\n/g, "\n ")}\n`);
|
|
4080
|
-
}
|
|
4081
|
-
function emitJson(results) {
|
|
4082
|
-
const payload = { specs: results.map((r) => ({
|
|
4083
|
-
feature: r.target.featureName,
|
|
4084
|
-
spec: r.target.specName,
|
|
4085
|
-
ok: r.ok,
|
|
4086
|
-
...r.error ? { error: r.error } : {},
|
|
4087
|
-
issues: r.issues.map((i) => ({
|
|
4088
|
-
severity: i.severity,
|
|
4089
|
-
category: i.category,
|
|
4090
|
-
stepId: i.stepId,
|
|
4091
|
-
message: i.message,
|
|
4092
|
-
...i.detail ? { detail: i.detail } : {}
|
|
4093
|
-
}))
|
|
4094
|
-
})) };
|
|
4095
|
-
process.stdout.write(`${JSON.stringify(payload, null, 2)}\n`);
|
|
4096
|
-
}
|
|
4097
|
-
function emitGithub(results, cwd) {
|
|
4098
|
-
const repoRoot = process.env["GITHUB_WORKSPACE"] ?? process.cwd();
|
|
4099
|
-
for (const r of results) {
|
|
4100
|
-
const file = githubRelPath(cwd, repoRoot, r.target.featureName, r.target.specName);
|
|
4101
|
-
if (r.error) {
|
|
4102
|
-
process.stdout.write(`::error file=${file}::${escapeGhMessage(r.error)}\n`);
|
|
4103
|
-
continue;
|
|
4104
|
-
}
|
|
4105
|
-
for (const issue of r.issues) {
|
|
4106
|
-
if (issue.severity === "OK") continue;
|
|
4107
|
-
const level = issue.severity === "ERROR" ? "error" : "warning";
|
|
4108
|
-
const title = `${r.target.featureName}/${r.target.specName} — ${issue.category}${issue.stepId ? ` (${issue.stepId})` : ""}`;
|
|
4109
|
-
const body = issue.detail ? `${issue.message}\n${issue.detail}` : issue.message;
|
|
4110
|
-
process.stdout.write(`::${level} file=${file},title=${escapeGhProp(title)}::${escapeGhMessage(body)}\n`);
|
|
4111
|
-
}
|
|
4112
|
-
}
|
|
4113
|
-
}
|
|
4114
|
-
function githubRelPath(cwd, repoRoot, featureName, specName) {
|
|
4115
|
-
const abs = resolve(cwd, ".ccqa", "features", featureName, "test-cases", specName, "test-spec.md");
|
|
4116
|
-
const rel = relative(repoRoot, abs);
|
|
4117
|
-
return rel.startsWith("..") ? abs : rel;
|
|
4118
|
-
}
|
|
4119
|
-
function escapeGhMessage(s) {
|
|
4120
|
-
return s.replace(/%/g, "%25").replace(/\r/g, "%0D").replace(/\n/g, "%0A");
|
|
4121
|
-
}
|
|
4122
|
-
function escapeGhProp(s) {
|
|
4123
|
-
return s.replace(/%/g, "%25").replace(/\r/g, "%0D").replace(/\n/g, "%0A").replace(/,/g, "%2C").replace(/:/g, "%3A");
|
|
4124
|
-
}
|
|
4125
|
-
function summarize(results) {
|
|
4126
|
-
let error = 0;
|
|
4127
|
-
let warn = 0;
|
|
4128
|
-
let ok = 0;
|
|
4129
|
-
let errored = 0;
|
|
4130
|
-
for (const r of results) {
|
|
4131
|
-
if (r.error) errored++;
|
|
4132
|
-
for (const issue of r.issues) if (issue.severity === "ERROR") error++;
|
|
4133
|
-
else if (issue.severity === "WARN") warn++;
|
|
4134
|
-
else ok++;
|
|
4135
|
-
}
|
|
4136
|
-
return {
|
|
4137
|
-
error,
|
|
4138
|
-
warn,
|
|
4139
|
-
ok,
|
|
4140
|
-
errored
|
|
4141
|
-
};
|
|
4142
|
-
}
|
|
4143
|
-
function determineExitCode(results, threshold) {
|
|
4144
|
-
for (const r of results) {
|
|
4145
|
-
if (r.error) return 1;
|
|
4146
|
-
for (const issue of r.issues) {
|
|
4147
|
-
if (issue.severity === "ERROR") return 1;
|
|
4148
|
-
if (threshold === "warn" && issue.severity === "WARN") return 1;
|
|
4149
|
-
}
|
|
4150
|
-
}
|
|
4151
|
-
return 0;
|
|
4152
|
-
}
|
|
4153
4459
|
function parseFormat(raw) {
|
|
4154
4460
|
const v = raw ?? "text";
|
|
4155
4461
|
if (v === "text" || v === "json" || v === "github") return v;
|
|
@@ -4192,8 +4498,6 @@ program.addCommand(driftCommand);
|
|
|
4192
4498
|
program.addCommand(traceCommand);
|
|
4193
4499
|
program.addCommand(generateCommand);
|
|
4194
4500
|
program.addCommand(runCommand);
|
|
4195
|
-
program.addCommand(traceSetupCommand);
|
|
4196
|
-
program.addCommand(generateSetupCommand);
|
|
4197
4501
|
program.parse();
|
|
4198
4502
|
//#endregion
|
|
4199
4503
|
export {};
|