ccqa 0.3.9 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -301
- package/dist/bin/ccqa.mjs +2180 -1124
- package/dist/package.json +2 -2
- package/dist/runtime/test-helpers.mjs +1 -53
- package/dist/runtime/vitest.config.d.mts +10 -10
- package/dist/spawn-ab-BxjEhA5e.mjs +65 -0
- package/package.json +2 -2
package/dist/bin/ccqa.mjs
CHANGED
|
@@ -1,28 +1,41 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
+
import { n as spawnAB } from "../spawn-ab-BxjEhA5e.mjs";
|
|
2
3
|
import { createRequire } from "node:module";
|
|
3
4
|
import { Command } from "commander";
|
|
4
|
-
import { accessSync, readFileSync, statSync } from "node:fs";
|
|
5
|
+
import { accessSync, existsSync, readFileSync, statSync } from "node:fs";
|
|
5
6
|
import { fileURLToPath } from "node:url";
|
|
6
|
-
import { access, mkdir, mkdtemp, readFile, readdir, rm, stat,
|
|
7
|
-
import { delimiter, dirname, join, resolve } from "node:path";
|
|
7
|
+
import { access, mkdir, mkdtemp, readFile, readdir, rm, stat, writeFile } from "node:fs/promises";
|
|
8
8
|
import { query } from "@anthropic-ai/claude-agent-sdk";
|
|
9
|
-
import
|
|
10
|
-
import {
|
|
9
|
+
import { delimiter, dirname, join, relative, resolve } from "node:path";
|
|
10
|
+
import { parse, stringify } from "yaml";
|
|
11
|
+
import { ZodError, z } from "zod";
|
|
12
|
+
import { execFile, spawn } from "node:child_process";
|
|
11
13
|
import { createInterface } from "node:readline";
|
|
12
|
-
import { tmpdir } from "node:os";
|
|
14
|
+
import { homedir, tmpdir } from "node:os";
|
|
13
15
|
import { createInterface as createInterface$1 } from "node:readline/promises";
|
|
14
|
-
import {
|
|
16
|
+
import { promisify } from "node:util";
|
|
15
17
|
//#region src/prompts/trace.ts
|
|
16
18
|
function generateSessionName() {
|
|
17
19
|
return `ccqa-trace-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-")}`;
|
|
18
20
|
}
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
21
|
+
/**
|
|
22
|
+
* Build the trace system prompt. `input.steps` is a flat list with includes
|
|
23
|
+
* already expanded (each step carries id / source / instruction / expected).
|
|
24
|
+
* The spec opens URLs via explicit step instructions (e.g.
|
|
25
|
+
* `instruction: "${APP_URL}/articles を開く"`).
|
|
26
|
+
*
|
|
27
|
+
* In v0.4 every spec is traced from scratch — block contents are inlined
|
|
28
|
+
* into the spec's own step list at expand time, so the prompt has no
|
|
29
|
+
* special "this is a block" mode. The `source` tag on each step still
|
|
30
|
+
* distinguishes spec-native steps from inlined block steps for the
|
|
31
|
+
* `// step:` comments in the eventual codegen output.
|
|
32
|
+
*/
|
|
33
|
+
function buildTraceSystemPrompt(input) {
|
|
34
|
+
const sessionName = input.sessionName ?? generateSessionName();
|
|
35
|
+
const stepsText = input.steps.map((step) => `### ${step.id} [${step.source}]
|
|
23
36
|
- **Instruction**: ${step.instruction}
|
|
24
37
|
- **Expected**: ${step.expected}`).join("\n\n");
|
|
25
|
-
const
|
|
38
|
+
const relatedPathsBlock = buildRelatedPathsInstruction();
|
|
26
39
|
return `You are an expert QA engineer executing a browser E2E test. Execute each step precisely and record every browser action as a structured log line.
|
|
27
40
|
|
|
28
41
|
## Session
|
|
@@ -43,7 +56,8 @@ agent-browser --session SESSION uncheck "<selector>"
|
|
|
43
56
|
agent-browser --session SESSION press <Key>
|
|
44
57
|
agent-browser --session SESSION select "<selector>" "<value>"
|
|
45
58
|
agent-browser --session SESSION hover "<selector>"
|
|
46
|
-
agent-browser --session SESSION wait --text "<text>"
|
|
59
|
+
agent-browser --session SESSION wait --text "<text>" [--timeout <ms>]
|
|
60
|
+
agent-browser --session SESSION wait "<selector>" [--timeout <ms>] [--state visible|hidden]
|
|
47
61
|
agent-browser --session SESSION cookies clear
|
|
48
62
|
\`\`\`
|
|
49
63
|
|
|
@@ -78,17 +92,18 @@ agent-browser --session SESSION cookies clear
|
|
|
78
92
|
|
|
79
93
|
## Test Specification
|
|
80
94
|
|
|
81
|
-
Title: ${
|
|
82
|
-
|
|
95
|
+
Title: ${input.title}
|
|
96
|
+
|
|
97
|
+
Each step's instruction names the URL to open directly (or via \`\${ENV_VAR}\`). Open exactly the URL the step says to open.
|
|
83
98
|
|
|
84
|
-
|
|
99
|
+
## Steps
|
|
85
100
|
|
|
86
101
|
${stepsText}
|
|
87
102
|
|
|
88
103
|
## Execution Workflow
|
|
89
104
|
|
|
90
105
|
For each step:
|
|
91
|
-
1. Emit \`STEP_START|<step-id>|<step
|
|
106
|
+
1. Emit \`STEP_START|<step-id>|<short description of what this step does>\`
|
|
92
107
|
2. Run \`snapshot\` and identify selectors from the ARIA tree
|
|
93
108
|
3. Execute the action using an ALLOWED selector
|
|
94
109
|
4. Emit \`AB_ACTION|...\` for every browser action (see below)
|
|
@@ -175,6 +190,15 @@ AB_ACTION|assert|<assertType>|<selector or "">|<value or "">|<observation>
|
|
|
175
190
|
|
|
176
191
|
The selector in AB_ACTION must be one of the ALLOWED formats above.
|
|
177
192
|
|
|
193
|
+
**CRITICAL — record only successful actions.** The AB_ACTION stream is the
|
|
194
|
+
canonical replay sequence: every line in it must be reproducible on a fresh
|
|
195
|
+
browser session. Therefore:
|
|
196
|
+
|
|
197
|
+
- If you tried a selector and \`agent-browser\` returned a non-zero exit (selector not found, element not interactable, timeout): **do NOT emit \`AB_ACTION|...\`** for that attempt. Take a fresh snapshot, switch selector, and only emit the AB_ACTION for the call that finally succeeded.
|
|
198
|
+
- If you explored multiple selectors for the same logical action (e.g. tried \`[aria-label='Email']\`, it failed, then \`[placeholder='Email']\` worked): emit AB_ACTION for the **working selector only**. The failed attempt must not appear in the trace.
|
|
199
|
+
- The same rule applies to \`AB_ACTION|assert|...\` lines: only emit them for assertions you actually verified on the current page in the current snapshot. Never declare an assertion against a selector you have not just confirmed visible — even if you intended to use it earlier.
|
|
200
|
+
- If a step ultimately fails after retries: emit \`ASSERTION_FAILED\` and STOP. Do NOT leave half-recorded actions for the failed step in the AB_ACTION stream.
|
|
201
|
+
|
|
178
202
|
## Assertion Protocol
|
|
179
203
|
|
|
180
204
|
After verifying each step, emit \`AB_ACTION|assert\` lines for each signal you confirmed.
|
|
@@ -206,9 +230,36 @@ After verifying each step, emit \`AB_ACTION|assert\` lines for each signal you c
|
|
|
206
230
|
**Selector rules for assert actions — CRITICAL:**
|
|
207
231
|
- Use the **same ALLOWED formats** as browser actions — never invent aria-label values
|
|
208
232
|
- Only use \`[aria-label='...']\` if that **exact** aria-label string appears in the current ARIA snapshot output
|
|
209
|
-
- When unsure, prefer \`text_visible\`/\`text_not_visible\` (no selector needed) over guessing a selector
|
|
233
|
+
- When unsure, prefer \`text_visible\`/\`text_not_visible\` (no selector needed) over guessing a selector — but still pre-verify with \`wait --text\` per the MUST-VERIFY rule below; \`alt\`-attribute "text" will not match.
|
|
210
234
|
- For \`element_disabled\`/\`element_enabled\`: use a CSS class selector if no aria-label is confirmed in the snapshot
|
|
211
235
|
|
|
236
|
+
**MUST-VERIFY rule — STRICT (applies to every assert except \`url_contains\`):**
|
|
237
|
+
|
|
238
|
+
The \`snapshot\` output is the **accessibility tree**: a semantic view. \`agent-browser\` queries the **real DOM**. They DO NOT always match. Two known traps:
|
|
239
|
+
|
|
240
|
+
1. *Selector trap*: a snapshot row like \`textbox "Email address"\` is reachable via \`[placeholder='...']\` but **NOT** via \`[aria-label='...']\` if no \`aria-label\` attribute is actually set — the browser inferred the label from \`<label for=>\` / surrounding text / \`placeholder\`.
|
|
241
|
+
2. *Text trap*: a snapshot row like \`link "Dashboard"\` may come from \`<a><img alt="Dashboard"></a>\` — the visible "text" is an \`alt\` attribute, not a text node. \`text_visible\` (which scans visible text nodes via \`wait --text\`) will NOT find it.
|
|
242
|
+
|
|
243
|
+
Before emitting an \`AB_ACTION|assert|...\` line, **verify the assertion form actually resolves on the live page**:
|
|
244
|
+
|
|
245
|
+
\`\`\`bash
|
|
246
|
+
# element_visible / element_enabled / element_disabled / element_checked / element_unchecked
|
|
247
|
+
agent-browser --session SESSION wait "<selector>" --timeout 3000
|
|
248
|
+
|
|
249
|
+
# element_not_visible
|
|
250
|
+
agent-browser --session SESSION wait "<selector>" --state hidden --timeout 3000
|
|
251
|
+
|
|
252
|
+
# text_visible
|
|
253
|
+
agent-browser --session SESSION wait --text "<text>" --timeout 3000
|
|
254
|
+
|
|
255
|
+
# text_not_visible
|
|
256
|
+
agent-browser --session SESSION wait --text "<text>" --state hidden --timeout 3000
|
|
257
|
+
\`\`\`
|
|
258
|
+
|
|
259
|
+
Apply the "record only successful actions" rule from the AB_ACTION section above. **Additionally**, when *no* form verifies — e.g. you tried \`[aria-label='X']\`, \`[placeholder='X']\`, and \`text=X\` and they all timed out, or the "text" turned out to be an \`alt\` / aria-label — **DROP the assertion entirely**. Fewer, real assertions beat invented ones that fail at replay. Prefer swapping a failed \`text_visible\` for an \`element_visible\` against the link/button selector when the visible label came from \`alt\` / aria-label.
|
|
260
|
+
|
|
261
|
+
\`url_contains\` is exempt — it checks the current URL string, not the DOM/accessibility tree.
|
|
262
|
+
|
|
212
263
|
**Examples:**
|
|
213
264
|
\`\`\`
|
|
214
265
|
AB_ACTION|assert|url_contains|||/dashboard|Navigated to dashboard
|
|
@@ -224,7 +275,7 @@ AB_ACTION|assert|text_visible|||Success|Confirmation message appeared
|
|
|
224
275
|
Emit exactly one status line per step (outside any code block):
|
|
225
276
|
|
|
226
277
|
\`\`\`
|
|
227
|
-
STEP_START|<step-id>|<step
|
|
278
|
+
STEP_START|<step-id>|<short description of what this step does>
|
|
228
279
|
STEP_DONE|<step-id>|<what was verified>
|
|
229
280
|
ASSERTION_FAILED|<step-id>|<category: app-bug|env-issue|auth-blocked|missing-test-data|selector-drift|agent-misread>: <reason>
|
|
230
281
|
STEP_SKIPPED|<step-id>|<reason>
|
|
@@ -237,45 +288,61 @@ RUN_COMPLETED|failed|<summary>
|
|
|
237
288
|
After each step (outside any code block):
|
|
238
289
|
|
|
239
290
|
\`\`\`
|
|
240
|
-
ROUTE_STEP|<step-id>|<
|
|
291
|
+
ROUTE_STEP|<step-id>|<short description>|ACTION:<what you did>|OBSERVATION:<what you verified>|STATUS:<PASSED|FAILED|SKIPPED>
|
|
241
292
|
\`\`\`
|
|
242
293
|
|
|
243
|
-
## Start
|
|
294
|
+
${relatedPathsBlock}## Start
|
|
244
295
|
|
|
245
|
-
|
|
296
|
+
Begin by clearing cookies, then proceed straight to the first step's instruction.
|
|
246
297
|
|
|
247
298
|
\`\`\`bash
|
|
248
|
-
agent-browser --session ${sessionName} open ${spec.baseUrl}
|
|
249
|
-
\`\`\`
|
|
250
|
-
|
|
251
|
-
Emit:
|
|
252
|
-
\`\`\`
|
|
253
|
-
AB_ACTION|open|${spec.baseUrl}
|
|
254
|
-
\`\`\`` : `\`\`\`bash
|
|
255
299
|
agent-browser --session ${sessionName} cookies clear
|
|
256
|
-
agent-browser --session ${sessionName} open ${spec.baseUrl}
|
|
257
300
|
\`\`\`
|
|
258
301
|
|
|
259
302
|
Emit:
|
|
260
303
|
\`\`\`
|
|
261
304
|
AB_ACTION|cookies_clear
|
|
262
|
-
|
|
263
|
-
\`\`\``}
|
|
305
|
+
\`\`\`
|
|
264
306
|
|
|
265
|
-
Then emit \`STEP_START|step-01|...\` and
|
|
266
|
-
|
|
267
|
-
function buildTracePrompt(spec) {
|
|
268
|
-
return `Execute the test for "${spec.title}" at ${spec.baseUrl}.`;
|
|
307
|
+
Then emit \`STEP_START|step-01|...\` and execute the first step. The first step is responsible for opening the initial URL.
|
|
308
|
+
`;
|
|
269
309
|
}
|
|
270
|
-
function
|
|
271
|
-
return
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
310
|
+
function buildRelatedPathsInstruction() {
|
|
311
|
+
return `## Post-run: emit \`relatedPaths\` block
|
|
312
|
+
|
|
313
|
+
After all steps are complete (regardless of pass/fail) and **before** \`RUN_COMPLETED\`, you MUST emit a single \`RELATED_PATHS\` block. The host (not you) writes these paths into the spec — your only job is to emit the block.
|
|
314
|
+
|
|
315
|
+
\`relatedPaths\` is a list of glob patterns identifying the source files this spec depends on. CI uses them to decide whether a code change should trigger a drift check for this spec.
|
|
316
|
+
|
|
317
|
+
**Do NOT modify any source files.** You have only \`Read\`, \`Grep\`, and \`Glob\` for source inspection. The block you emit is the only output the host uses to update the spec.
|
|
318
|
+
|
|
319
|
+
**Inputs to consider:**
|
|
320
|
+
- The URLs you opened (\`AB_ACTION|open|...\`)
|
|
321
|
+
- The aria-labels, placeholders, and visible texts you clicked / filled / waited on
|
|
322
|
+
- The component / page / route files that render those strings (find them with \`Grep\`/\`Read\`/\`Glob\`)
|
|
323
|
+
|
|
324
|
+
**How to choose paths:**
|
|
325
|
+
1. For each URL the test navigates to, locate the route/page file and include it (e.g. \`src/app/tasks/page.tsx\`, \`src/pages/tasks/index.tsx\`).
|
|
326
|
+
2. For each unique aria-label / placeholder / visible text you interacted with, \`Grep\` the codebase, find the defining component, and include either the file or its parent feature directory.
|
|
327
|
+
3. Prefer **directory globs** (e.g. \`src/features/tasks/**\`) over individual files when several related components live in the same area. Otherwise list specific files.
|
|
328
|
+
4. Skip third-party files (\`node_modules/\`), build output (\`dist/\`, \`.next/\`), and generated code.
|
|
329
|
+
5. Be conservative — false positives (extra paths) are fine; false negatives (missing paths) cause drift to be missed in CI. When unsure whether a path is relevant, include it.
|
|
330
|
+
|
|
331
|
+
**Output format (STRICT — one line per path, no leading dashes, no commentary inside the block):**
|
|
332
|
+
|
|
333
|
+
\`\`\`
|
|
334
|
+
RELATED_PATHS_BEGIN
|
|
335
|
+
src/features/tasks/**
|
|
336
|
+
src/app/tasks/page.tsx
|
|
337
|
+
RELATED_PATHS_END
|
|
338
|
+
\`\`\`
|
|
339
|
+
|
|
340
|
+
Emit the block outside any other code block, on its own lines. If the test could not exercise the feature at all (e.g. blocked early), emit the block anyway with whatever paths you can identify; emit \`RELATED_PATHS_BEGIN\` immediately followed by \`RELATED_PATHS_END\` only if you genuinely could not identify any related file.
|
|
341
|
+
|
|
342
|
+
`;
|
|
276
343
|
}
|
|
277
|
-
function
|
|
278
|
-
return `Execute the
|
|
344
|
+
function buildTracePrompt(title) {
|
|
345
|
+
return `Execute the test for "${title}". Each step's instruction includes the URL or selector context it needs.`;
|
|
279
346
|
}
|
|
280
347
|
//#endregion
|
|
281
348
|
//#region src/cli/logger.ts
|
|
@@ -348,9 +415,14 @@ function resolveModel(explicit) {
|
|
|
348
415
|
return envModel && envModel.length > 0 ? envModel : void 0;
|
|
349
416
|
}
|
|
350
417
|
async function invokeClaudeStreaming(options, onEvent) {
|
|
351
|
-
const { prompt, systemPrompt, allowedTools, disableBuiltinTools = false, maxTurns, env, model, onAbAction, onAbActionFailed, silenceBashLog = false } = options;
|
|
418
|
+
const { prompt, systemPrompt, allowedTools, disableBuiltinTools = false, maxTurns, env, model, cwd, onAbAction, onAbActionFailed, silenceBashLog = false } = options;
|
|
352
419
|
const resolvedModel = resolveModel(model);
|
|
353
420
|
let lastAbToolUseId = null;
|
|
421
|
+
const claimAbToolUse = (toolUseId) => {
|
|
422
|
+
if (toolUseId !== lastAbToolUseId) return false;
|
|
423
|
+
lastAbToolUseId = null;
|
|
424
|
+
return true;
|
|
425
|
+
};
|
|
354
426
|
const sdkOptions = {
|
|
355
427
|
systemPrompt,
|
|
356
428
|
maxTurns,
|
|
@@ -358,6 +430,7 @@ async function invokeClaudeStreaming(options, onEvent) {
|
|
|
358
430
|
permissionMode: "bypassPermissions",
|
|
359
431
|
allowDangerouslySkipPermissions: true,
|
|
360
432
|
...resolvedModel ? { model: resolvedModel } : {},
|
|
433
|
+
...cwd ? { cwd } : {},
|
|
361
434
|
...env ? { env: {
|
|
362
435
|
...process.env,
|
|
363
436
|
...env
|
|
@@ -384,13 +457,17 @@ async function invokeClaudeStreaming(options, onEvent) {
|
|
|
384
457
|
} else lastAbToolUseId = null;
|
|
385
458
|
return {};
|
|
386
459
|
}] }],
|
|
460
|
+
PostToolUse: [{ hooks: [async (input) => {
|
|
461
|
+
if (input.hook_event_name !== "PostToolUse") return {};
|
|
462
|
+
if (input.tool_name !== "Bash") return {};
|
|
463
|
+
if (!isBashToolResponseError(input.tool_response)) return {};
|
|
464
|
+
if (claimAbToolUse(input.tool_use_id) && onAbActionFailed) onAbActionFailed();
|
|
465
|
+
return {};
|
|
466
|
+
}] }],
|
|
387
467
|
PostToolUseFailure: [{ hooks: [async (input) => {
|
|
388
468
|
if (input.hook_event_name !== "PostToolUseFailure") return {};
|
|
389
469
|
if (input.tool_name !== "Bash") return {};
|
|
390
|
-
if (input.tool_use_id
|
|
391
|
-
onAbActionFailed();
|
|
392
|
-
lastAbToolUseId = null;
|
|
393
|
-
}
|
|
470
|
+
if (claimAbToolUse(input.tool_use_id) && onAbActionFailed) onAbActionFailed();
|
|
394
471
|
return {};
|
|
395
472
|
}] }]
|
|
396
473
|
} : void 0
|
|
@@ -460,6 +537,26 @@ function isBlockedAbSubcommand(cmd) {
|
|
|
460
537
|
const sub = extractAbSubcommand(cmd);
|
|
461
538
|
return sub !== null && BLOCKED_AB_SUBCOMMANDS.has(sub);
|
|
462
539
|
}
|
|
540
|
+
/**
|
|
541
|
+
* Detects "the Bash tool returned an error" from a SDK PostToolUse hook's
|
|
542
|
+
* `tool_response`. The SDK can shape this two ways depending on how Claude
|
|
543
|
+
* Code reports Bash failures:
|
|
544
|
+
*
|
|
545
|
+
* - `{ is_error: true, ... }` — the canonical Bash failure shape
|
|
546
|
+
* - `{ output, exitCode, killed?, ... }` — the BashOutput shape; treat
|
|
547
|
+
* non-zero exit / kill as error
|
|
548
|
+
*
|
|
549
|
+
* We accept either. Anything else (including missing fields) is treated as a
|
|
550
|
+
* successful response so we never roll back over an unrelated tool call.
|
|
551
|
+
*/
|
|
552
|
+
function isBashToolResponseError(tool_response) {
|
|
553
|
+
if (tool_response === null || typeof tool_response !== "object") return false;
|
|
554
|
+
const r = tool_response;
|
|
555
|
+
if (r["is_error"] === true) return true;
|
|
556
|
+
if (typeof r["exitCode"] === "number" && r["exitCode"] !== 0) return true;
|
|
557
|
+
if (r["killed"] === true) return true;
|
|
558
|
+
return false;
|
|
559
|
+
}
|
|
463
560
|
/** Returns true if any argument to an agent-browser command uses a @ref selector (e.g. @e14). */
|
|
464
561
|
function hasRefSelector(cmd) {
|
|
465
562
|
const abIdx = cmd.indexOf("agent-browser");
|
|
@@ -521,20 +618,261 @@ async function* replayMockMessages(path) {
|
|
|
521
618
|
}
|
|
522
619
|
}
|
|
523
620
|
//#endregion
|
|
621
|
+
//#region src/runtime/env-vars.ts
|
|
622
|
+
const ENV_VAR_RE = /\$\{([A-Z_][A-Z0-9_]*)\}|\$([A-Z_][A-Z0-9_]*)/g;
|
|
623
|
+
const ANY_VAR_RE = /\$\{([A-Za-z_][A-Za-z0-9_]*)\}|\$([A-Za-z_][A-Za-z0-9_]*)/g;
|
|
624
|
+
/**
|
|
625
|
+
* Replace every `$NAME` / `${NAME}` reference in `value` using `lookup`. When
|
|
626
|
+
* `lookup` returns `undefined`, the original reference text is preserved
|
|
627
|
+
* (callers that want empty-string substitution should wrap with `?? ""`).
|
|
628
|
+
*/
|
|
629
|
+
function substituteVars(value, lookup) {
|
|
630
|
+
ANY_VAR_RE.lastIndex = 0;
|
|
631
|
+
return value.replace(ANY_VAR_RE, (match, braced, plain) => {
|
|
632
|
+
const replacement = lookup(braced ?? plain ?? "");
|
|
633
|
+
return replacement === void 0 ? match : replacement;
|
|
634
|
+
});
|
|
635
|
+
}
|
|
636
|
+
/**
|
|
637
|
+
* Resolve every `$VAR` / `${VAR}` reference against the current process env.
|
|
638
|
+
*
|
|
639
|
+
* Missing variables expand to the empty string, mirroring `sh` behaviour.
|
|
640
|
+
* Throwing would force ccqa to be invoked with every var set even for
|
|
641
|
+
* unused blocks, which is more user-hostile than letting the test fail
|
|
642
|
+
* downstream with a clearer message ("login form rejected: empty password").
|
|
643
|
+
*/
|
|
644
|
+
function resolveEnvRefs(value) {
|
|
645
|
+
return value.replace(ENV_VAR_RE, (_, braced, plain) => {
|
|
646
|
+
const name = braced ?? plain ?? "";
|
|
647
|
+
return process.env[name] ?? "";
|
|
648
|
+
});
|
|
649
|
+
}
|
|
650
|
+
/**
|
|
651
|
+
* Embed `$VAR` / `${VAR}` as a JS template-literal expression that reads
|
|
652
|
+
* `process.env.VAR ?? ""` at runtime. Used by `ccqa generate` so the test
|
|
653
|
+
* script never bakes in the secret value.
|
|
654
|
+
*
|
|
655
|
+
* Returns a JavaScript string-literal expression (template literal when env
|
|
656
|
+
* refs are present, plain string literal otherwise).
|
|
657
|
+
*
|
|
658
|
+
* Examples:
|
|
659
|
+
* "${PASSWORD}" -> '`${process.env.PASSWORD ?? ""}`'
|
|
660
|
+
* "user-${SUFFIX}@x.com" -> '`user-${process.env.SUFFIX ?? ""}@x.com`'
|
|
661
|
+
* "literal value" -> '"literal value"'
|
|
662
|
+
*/
|
|
663
|
+
function envRefsToJsExpression(value) {
|
|
664
|
+
return refsToJsExpression(value, () => null);
|
|
665
|
+
}
|
|
666
|
+
/**
|
|
667
|
+
* Generalised version of `envRefsToJsExpression`. Each `$NAME` / `${NAME}`
|
|
668
|
+
* reference in `value` is passed to `nameToExpr(name)` first:
|
|
669
|
+
*
|
|
670
|
+
* - If it returns a string, that string is interpolated as a JS expression
|
|
671
|
+
* (no quoting / no `?? ""` wrap — the caller decides the shape).
|
|
672
|
+
* - If it returns `null`, the reference is treated as a missing env var
|
|
673
|
+
* and expands to `process.env.<NAME> ?? ""` (the legacy behaviour).
|
|
674
|
+
*
|
|
675
|
+
* Used by the block codegen path: param names map to `params.<name>`,
|
|
676
|
+
* everything else falls through to `process.env.X ?? ""`.
|
|
677
|
+
*/
|
|
678
|
+
function refsToJsExpression(value, nameToExpr) {
|
|
679
|
+
ANY_VAR_RE.lastIndex = 0;
|
|
680
|
+
if (!ANY_VAR_RE.test(value)) return JSON.stringify(value);
|
|
681
|
+
const escaped = value.replace(/\\/g, "\\\\").replace(/`/g, "\\`").replace(/\$\{/g, (_match, offset, source) => {
|
|
682
|
+
const probe = new RegExp(ANY_VAR_RE.source, "g");
|
|
683
|
+
let m;
|
|
684
|
+
while ((m = probe.exec(source)) !== null) if (m.index === offset) return "${";
|
|
685
|
+
return "\\${";
|
|
686
|
+
});
|
|
687
|
+
ANY_VAR_RE.lastIndex = 0;
|
|
688
|
+
return `\`${escaped.replace(ANY_VAR_RE, (_match, braced, plain) => {
|
|
689
|
+
const name = braced ?? plain ?? "";
|
|
690
|
+
const expr = nameToExpr(name);
|
|
691
|
+
return expr !== null ? `\${${expr}}` : `\${process.env.${name} ?? ""}`;
|
|
692
|
+
})}\``;
|
|
693
|
+
}
|
|
694
|
+
//#endregion
|
|
695
|
+
//#region src/spec/yaml-schema.ts
|
|
696
|
+
/**
|
|
697
|
+
* An action step: one user-facing browser interaction. `instruction` and
|
|
698
|
+
* `expected` are the natural-language description handed to Claude during
|
|
699
|
+
* `ccqa trace`. URLs live inside `instruction`, either verbatim or via
|
|
700
|
+
* `${ENV_VAR}` references (resolved at runtime).
|
|
701
|
+
*/
|
|
702
|
+
const ActionStepSchema = z.object({
|
|
703
|
+
instruction: z.string().min(1),
|
|
704
|
+
expected: z.string().min(1)
|
|
705
|
+
}).strict();
|
|
706
|
+
/**
|
|
707
|
+
* An include step: invokes a reusable block (`.ccqa/blocks/<name>/spec.yaml`).
|
|
708
|
+
* `params` values are plain strings; env refs (`${VAR}`) inside them are
|
|
709
|
+
* resolved at expand time the same way step instructions are.
|
|
710
|
+
*/
|
|
711
|
+
const IncludeStepSchema = z.object({
|
|
712
|
+
include: z.string().min(1),
|
|
713
|
+
params: z.record(z.string(), z.string()).optional()
|
|
714
|
+
}).strict();
|
|
715
|
+
/**
|
|
716
|
+
* A spec step is either an action step or an include step. The two are
|
|
717
|
+
* discriminated by the presence of the `include` key — see `isIncludeStep`.
|
|
718
|
+
*/
|
|
719
|
+
const StepSchema = z.union([ActionStepSchema, IncludeStepSchema]);
|
|
720
|
+
/** Top-level spec schema. `.strict()` rejects any unknown key. */
|
|
721
|
+
const TestSpecSchema = z.object({
|
|
722
|
+
title: z.string().min(1),
|
|
723
|
+
relatedPaths: z.array(z.string().min(1)).optional(),
|
|
724
|
+
steps: z.array(StepSchema).min(1)
|
|
725
|
+
}).strict();
|
|
726
|
+
/**
|
|
727
|
+
* A block param declaration. `required` defaults to true; only explicit
|
|
728
|
+
* `required: false` makes it optional. `secret: true` flags the value as
|
|
729
|
+
* sensitive — codegen renders such values as `process.env.<NAME> ?? ""`
|
|
730
|
+
* template literals so the secret never ends up baked into test.spec.ts.
|
|
731
|
+
* `dummy` is a placeholder value surfaced by the draft / drift prompts
|
|
732
|
+
* (which see the block in isolation, before any include site exists);
|
|
733
|
+
* `description` is the param's semantic role, also consumed by those
|
|
734
|
+
* prompts and by spec authors browsing the block.
|
|
735
|
+
*/
|
|
736
|
+
const BlockParamSchema = z.object({
|
|
737
|
+
name: z.string().min(1),
|
|
738
|
+
required: z.boolean().optional(),
|
|
739
|
+
secret: z.boolean().optional(),
|
|
740
|
+
dummy: z.string().optional(),
|
|
741
|
+
description: z.string().optional()
|
|
742
|
+
}).strict();
|
|
743
|
+
/**
|
|
744
|
+
* Block schema. Block steps are restricted to ActionStep — nested blocks are
|
|
745
|
+
* forbidden. Including a block from inside another block fails parsing here
|
|
746
|
+
* (the store layer maps the cryptic "Unrecognized key: 'include'" error into
|
|
747
|
+
* a targeted nested-block message).
|
|
748
|
+
*/
|
|
749
|
+
const BlockSpecSchema = z.object({
|
|
750
|
+
title: z.string().min(1),
|
|
751
|
+
params: z.array(BlockParamSchema).optional(),
|
|
752
|
+
steps: z.array(ActionStepSchema).min(1)
|
|
753
|
+
}).strict();
|
|
754
|
+
/** Runtime predicate for the StepSchema union. */
|
|
755
|
+
function isIncludeStep(step) {
|
|
756
|
+
return "include" in step;
|
|
757
|
+
}
|
|
758
|
+
/** Returns true if a block param is required (default: true). */
|
|
759
|
+
function isParamRequired(param) {
|
|
760
|
+
return param.required !== false;
|
|
761
|
+
}
|
|
762
|
+
//#endregion
|
|
763
|
+
//#region src/spec/parser.ts
|
|
764
|
+
/** Parse a spec.yaml. Schema rejections are rewritten with actionable messages. */
|
|
765
|
+
function parseTestSpec(content, source = "spec.yaml") {
|
|
766
|
+
const raw = parseYamlOrThrow(content, source);
|
|
767
|
+
try {
|
|
768
|
+
return TestSpecSchema.parse(raw);
|
|
769
|
+
} catch (e) {
|
|
770
|
+
throw enrichZodError(e, source, false);
|
|
771
|
+
}
|
|
772
|
+
}
|
|
773
|
+
/**
|
|
774
|
+
* Parse a block's spec.yaml. Block-specific errors include the targeted
|
|
775
|
+
* nested-block message (the underlying zod failure on an `include` key
|
|
776
|
+
* inside a block step is hard to read).
|
|
777
|
+
*/
|
|
778
|
+
function parseBlockSpec(content, source = "block spec.yaml") {
|
|
779
|
+
const raw = parseYamlOrThrow(content, source);
|
|
780
|
+
try {
|
|
781
|
+
return BlockSpecSchema.parse(raw);
|
|
782
|
+
} catch (e) {
|
|
783
|
+
throw enrichZodError(e, source, true);
|
|
784
|
+
}
|
|
785
|
+
}
|
|
786
|
+
function parseYamlOrThrow(content, source) {
|
|
787
|
+
try {
|
|
788
|
+
return parse(content);
|
|
789
|
+
} catch (e) {
|
|
790
|
+
throw new Error(`Failed to parse YAML (${source}): ${e.message}`);
|
|
791
|
+
}
|
|
792
|
+
}
|
|
793
|
+
function enrichZodError(error, source, isBlock) {
|
|
794
|
+
if (!(error instanceof ZodError)) return error;
|
|
795
|
+
const lines = [`Invalid ${source}:`];
|
|
796
|
+
for (const issue of error.issues) {
|
|
797
|
+
const path = issue.path.join(".") || "(root)";
|
|
798
|
+
const message = humanizeIssue(issue, isBlock);
|
|
799
|
+
lines.push(` - ${path}: ${message}`);
|
|
800
|
+
}
|
|
801
|
+
return new Error(lines.join("\n"));
|
|
802
|
+
}
|
|
803
|
+
function humanizeIssue(issue, isBlock) {
|
|
804
|
+
if (issue.code === "unrecognized_keys") {
|
|
805
|
+
const keys = Array.isArray(issue.keys) ? issue.keys : [];
|
|
806
|
+
if (isBlock && keys.includes("include")) return `Nested blocks are not supported — flatten by inlining the included block's steps into this block.`;
|
|
807
|
+
return `Unknown keys: ${keys.join(", ")}`;
|
|
808
|
+
}
|
|
809
|
+
return issue.message;
|
|
810
|
+
}
|
|
811
|
+
//#endregion
|
|
812
|
+
//#region src/spec/expand.ts
|
|
813
|
+
/**
|
|
814
|
+
* Walk the spec's top-level steps, inlining any `- include: <block>` reference
|
|
815
|
+
* as the block's own steps in order. The result is a flat `step-NN`-numbered
|
|
816
|
+
* sequence — block boundaries survive only as the `source` tag, so trace and
|
|
817
|
+
* codegen never need a separate block code path.
|
|
818
|
+
*/
|
|
819
|
+
function expandSpec(spec, options) {
|
|
820
|
+
const out = [];
|
|
821
|
+
let counter = 0;
|
|
822
|
+
const allocId = () => {
|
|
823
|
+
counter += 1;
|
|
824
|
+
return `step-${String(counter).padStart(2, "0")}`;
|
|
825
|
+
};
|
|
826
|
+
for (const step of spec.steps) if (isIncludeStep(step)) {
|
|
827
|
+
const block = resolveBlock(step.include, step.params ?? {}, options.blocks);
|
|
828
|
+
for (const blockStep of block.steps) out.push({
|
|
829
|
+
id: allocId(),
|
|
830
|
+
source: step.include,
|
|
831
|
+
instruction: substituteVars(blockStep.instruction, block.lookup),
|
|
832
|
+
expected: substituteVars(blockStep.expected, block.lookup)
|
|
833
|
+
});
|
|
834
|
+
} else out.push({
|
|
835
|
+
id: allocId(),
|
|
836
|
+
source: "spec",
|
|
837
|
+
instruction: step.instruction,
|
|
838
|
+
expected: step.expected
|
|
839
|
+
});
|
|
840
|
+
return out;
|
|
841
|
+
}
|
|
842
|
+
function resolveBlock(blockName, rawParams, blocks) {
|
|
843
|
+
const block = blocks.get(blockName);
|
|
844
|
+
if (!block) throw new Error(`Unknown block: "${blockName}". Define it under .ccqa/blocks/${blockName}/spec.yaml.`);
|
|
845
|
+
const declaredParams = new Map((block.params ?? []).map((p) => [p.name, p]));
|
|
846
|
+
for (const key of Object.keys(rawParams)) if (!declaredParams.has(key)) throw new Error(`Block "${blockName}" received unknown param "${key}". Declared params: ${[...declaredParams.keys()].join(", ") || "(none)"}.`);
|
|
847
|
+
for (const [pname, def] of declaredParams) if (isParamRequired(def) && !(pname in rawParams)) throw new Error(`Block "${blockName}" is missing required param "${pname}".`);
|
|
848
|
+
const lookup = (name) => {
|
|
849
|
+
if (Object.prototype.hasOwnProperty.call(rawParams, name)) return rawParams[name];
|
|
850
|
+
};
|
|
851
|
+
return {
|
|
852
|
+
steps: block.steps,
|
|
853
|
+
lookup
|
|
854
|
+
};
|
|
855
|
+
}
|
|
856
|
+
/**
|
|
857
|
+
* Collect every block name referenced by a spec (top-level only — blocks
|
|
858
|
+
* cannot nest). Used by the store / drift layers to know which blocks to
|
|
859
|
+
* load or invalidate.
|
|
860
|
+
*/
|
|
861
|
+
function collectIncludedBlockNames(spec) {
|
|
862
|
+
const names = /* @__PURE__ */ new Set();
|
|
863
|
+
for (const step of spec.steps) if (isIncludeStep(step)) names.add(step.include);
|
|
864
|
+
return [...names];
|
|
865
|
+
}
|
|
866
|
+
//#endregion
|
|
524
867
|
//#region src/store/index.ts
|
|
525
868
|
const CCQA_DIR = ".ccqa";
|
|
869
|
+
const SPEC_FILE = "spec.yaml";
|
|
526
870
|
function getCcqaDir(cwd = process.cwd()) {
|
|
527
871
|
return join(cwd, CCQA_DIR);
|
|
528
872
|
}
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
* - "tasks/create-and-complete"
|
|
533
|
-
* - "features/tasks/test-cases/create-and-complete"
|
|
534
|
-
* - ".ccqa/features/tasks/test-cases/create-and-complete"
|
|
535
|
-
* All forms resolve to { featureName: "tasks", specName: "create-and-complete" }.
|
|
536
|
-
* Trailing slashes are tolerated.
|
|
537
|
-
*/
|
|
873
|
+
function specKey(ref) {
|
|
874
|
+
return `${ref.featureName}/${ref.specName}`;
|
|
875
|
+
}
|
|
538
876
|
function parseSpecPath(specPath) {
|
|
539
877
|
const parts = specPath.replace(/^\.\/+/, "").replace(/\/+$/, "").split("/").filter((p) => p.length > 0);
|
|
540
878
|
if (parts[0] === ".ccqa") parts.shift();
|
|
@@ -556,23 +894,44 @@ function getSpecDir(featureName, specName, cwd) {
|
|
|
556
894
|
}
|
|
557
895
|
async function ensureCcqaDir(cwd) {
|
|
558
896
|
await mkdir(join(getCcqaDir(cwd), "features"), { recursive: true });
|
|
897
|
+
await mkdir(join(getCcqaDir(cwd), "blocks"), { recursive: true });
|
|
559
898
|
}
|
|
560
899
|
async function readSpecFile(featureName, specName, cwd) {
|
|
561
|
-
const specPath = join(getSpecDir(featureName, specName, cwd),
|
|
900
|
+
const specPath = join(getSpecDir(featureName, specName, cwd), SPEC_FILE);
|
|
562
901
|
return readFile(specPath, "utf-8").catch(() => {
|
|
563
902
|
throw new Error(`Spec file not found: ${specPath}`);
|
|
564
903
|
});
|
|
565
904
|
}
|
|
566
905
|
async function tryReadSpecFile(featureName, specName, cwd) {
|
|
567
|
-
return readFile(join(getSpecDir(featureName, specName, cwd),
|
|
906
|
+
return readFile(join(getSpecDir(featureName, specName, cwd), SPEC_FILE), "utf-8").catch(() => null);
|
|
568
907
|
}
|
|
569
908
|
async function saveSpecFile(featureName, specName, content, cwd) {
|
|
570
909
|
const specDir = getSpecDir(featureName, specName, cwd);
|
|
571
910
|
await mkdir(specDir, { recursive: true });
|
|
572
|
-
const specPath = join(specDir,
|
|
911
|
+
const specPath = join(specDir, SPEC_FILE);
|
|
573
912
|
await writeFile(specPath, content.endsWith("\n") ? content : content + "\n", "utf-8");
|
|
574
913
|
return specPath;
|
|
575
914
|
}
|
|
915
|
+
/**
|
|
916
|
+
* Replace (or insert) the `relatedPaths` key in the spec. Preserves every
|
|
917
|
+
* other top-level field and the entire steps array. Returns the absolute
|
|
918
|
+
* path that was written, or null if the spec file does not exist.
|
|
919
|
+
*/
|
|
920
|
+
async function updateSpecRelatedPaths(featureName, specName, relatedPaths, cwd) {
|
|
921
|
+
const specPath = join(getSpecDir(featureName, specName, cwd), SPEC_FILE);
|
|
922
|
+
const existing = await readFile(specPath, "utf-8").catch(() => null);
|
|
923
|
+
if (existing === null) return null;
|
|
924
|
+
await writeFile(specPath, stringify(stripUndefined({
|
|
925
|
+
...parseTestSpec(existing, specPath),
|
|
926
|
+
relatedPaths: relatedPaths.length > 0 ? relatedPaths : void 0
|
|
927
|
+
}), { lineWidth: 0 }), "utf-8");
|
|
928
|
+
return specPath;
|
|
929
|
+
}
|
|
930
|
+
function stripUndefined(obj) {
|
|
931
|
+
const out = {};
|
|
932
|
+
for (const [k, v] of Object.entries(obj)) if (v !== void 0) out[k] = v;
|
|
933
|
+
return out;
|
|
934
|
+
}
|
|
576
935
|
async function saveRoute(featureName, specName, route, cwd) {
|
|
577
936
|
const specDir = getSpecDir(featureName, specName, cwd);
|
|
578
937
|
await mkdir(specDir, { recursive: true });
|
|
@@ -587,38 +946,72 @@ async function saveTraceActions(featureName, specName, actions, cwd) {
|
|
|
587
946
|
await writeFile(actionsPath, JSON.stringify(actions, null, 2), "utf-8");
|
|
588
947
|
return actionsPath;
|
|
589
948
|
}
|
|
590
|
-
function
|
|
591
|
-
return join(getCcqaDir(cwd), "
|
|
949
|
+
function getBlocksDir(cwd) {
|
|
950
|
+
return join(getCcqaDir(cwd), "blocks");
|
|
592
951
|
}
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
952
|
+
/**
|
|
953
|
+
* Inverse of `getBlockDir`. Given a file path that appears in a git diff,
|
|
954
|
+
* return the block name if the path points at the block's spec.yaml, else
|
|
955
|
+
* null. Used by `drift --changed` to invalidate specs whose included blocks
|
|
956
|
+
* were edited. (v0.4 inlines blocks into every spec's own trace, so the
|
|
957
|
+
* block directory holds only spec.yaml — no per-block actions.json / route
|
|
958
|
+
* lives here anymore.)
|
|
959
|
+
*/
|
|
960
|
+
function parseBlockPath(path) {
|
|
961
|
+
return path.match(/(?:^|\/)\.ccqa\/blocks\/([^/]+)\/spec\.yaml$/)?.[1] ?? null;
|
|
598
962
|
}
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
963
|
+
/**
|
|
964
|
+
* Load every block under `.ccqa/blocks/<name>/spec.yaml`. Used by the trace /
|
|
965
|
+
* generate / drift entry points to validate include references at parse time.
|
|
966
|
+
*
|
|
967
|
+
* A malformed block is fatal — surfaces as a thrown Error with the path that
|
|
968
|
+
* failed. Missing block directories (no `spec.yaml`) are silently skipped so
|
|
969
|
+
* stray files don't break the loader.
|
|
970
|
+
*/
|
|
971
|
+
async function loadAllBlocks(cwd) {
|
|
972
|
+
const dir = getBlocksDir(cwd);
|
|
973
|
+
const names = await readdir(dir).catch(() => []);
|
|
974
|
+
const entries = await Promise.all(names.map(async (name) => {
|
|
975
|
+
const path = join(dir, name, SPEC_FILE);
|
|
976
|
+
const content = await readFile(path, "utf-8").catch(() => null);
|
|
977
|
+
return content === null ? null : [name, parseBlockSpec(content, path)];
|
|
978
|
+
}));
|
|
979
|
+
return new Map(entries.filter((e) => e !== null));
|
|
605
980
|
}
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
981
|
+
/**
|
|
982
|
+
* Project the parsed blocks into the shape the draft / drift prompts consume.
|
|
983
|
+
* Co-located with `loadAllBlocks` so callers don't have to remember the
|
|
984
|
+
* isParamRequired / secret-default mapping.
|
|
985
|
+
*/
|
|
986
|
+
async function loadAvailableBlocks(cwd) {
|
|
987
|
+
return [...(await loadAllBlocks(cwd)).entries()].map(([name, block]) => ({
|
|
988
|
+
name,
|
|
989
|
+
title: block.title,
|
|
990
|
+
params: (block.params ?? []).map((p) => ({
|
|
991
|
+
name: p.name,
|
|
992
|
+
required: isParamRequired(p),
|
|
993
|
+
secret: p.secret === true
|
|
994
|
+
}))
|
|
995
|
+
}));
|
|
615
996
|
}
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
997
|
+
/**
|
|
998
|
+
* Probe for orphaned files left over from earlier ccqa versions inside
|
|
999
|
+
* `.ccqa/blocks/<name>/`. Both pre-v0.4 `test.spec.ts` (function-export
|
|
1000
|
+
* blocks) and the short-lived `actions.json` / `route.md` (recorded-block
|
|
1001
|
+
* variant) are dead in the new "blocks are pure spec templates" model and
|
|
1002
|
+
* should be deleted manually. Returns the absolute paths.
|
|
1003
|
+
*/
|
|
1004
|
+
async function findStaleBlockArtifacts(cwd) {
|
|
1005
|
+
const dir = getBlocksDir(cwd);
|
|
1006
|
+
const names = await readdir(dir).catch(() => []);
|
|
1007
|
+
return (await Promise.all(names.flatMap((name) => [
|
|
1008
|
+
"test.spec.ts",
|
|
1009
|
+
"actions.json",
|
|
1010
|
+
"route.md"
|
|
1011
|
+
].map(async (f) => {
|
|
1012
|
+
const path = join(dir, name, f);
|
|
1013
|
+
return await stat(path).then(() => true).catch(() => false) ? path : null;
|
|
1014
|
+
})))).filter((p) => p !== null);
|
|
622
1015
|
}
|
|
623
1016
|
async function getTraceActions(featureName, specName, cwd) {
|
|
624
1017
|
const path = join(getSpecDir(featureName, specName, cwd), "actions.json");
|
|
@@ -660,8 +1053,7 @@ async function listSpecsForFeature(featureName, cwd) {
|
|
|
660
1053
|
}
|
|
661
1054
|
/**
|
|
662
1055
|
* Lists every feature/spec dir under .ccqa/features/, regardless of whether
|
|
663
|
-
* the spec is fully drafted yet.
|
|
664
|
-
* feature/spec names that fit the existing structure.
|
|
1056
|
+
* the spec is fully drafted yet. Each spec file is read at most once.
|
|
665
1057
|
*/
|
|
666
1058
|
async function listFeatureTree(cwd) {
|
|
667
1059
|
const featuresDir = join(getCcqaDir(cwd), "features");
|
|
@@ -672,16 +1064,27 @@ async function listFeatureTree(cwd) {
|
|
|
672
1064
|
return {
|
|
673
1065
|
featureName,
|
|
674
1066
|
specs: await Promise.all(specDirs.map(async (specName) => {
|
|
675
|
-
const
|
|
1067
|
+
const specFile = join(testCasesDir, specName, SPEC_FILE);
|
|
1068
|
+
const content = await readFile(specFile, "utf-8").catch(() => null);
|
|
676
1069
|
if (content === null) return {
|
|
677
1070
|
specName,
|
|
678
1071
|
hasSpecFile: false
|
|
679
1072
|
};
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
1073
|
+
try {
|
|
1074
|
+
const spec = parseTestSpec(content, specFile);
|
|
1075
|
+
const entry = {
|
|
1076
|
+
specName,
|
|
1077
|
+
hasSpecFile: true,
|
|
1078
|
+
includedBlocks: collectIncludedBlockNames(spec)
|
|
1079
|
+
};
|
|
1080
|
+
if (spec.relatedPaths) entry.relatedPaths = spec.relatedPaths;
|
|
1081
|
+
return entry;
|
|
1082
|
+
} catch {
|
|
1083
|
+
return {
|
|
1084
|
+
specName,
|
|
1085
|
+
hasSpecFile: true
|
|
1086
|
+
};
|
|
1087
|
+
}
|
|
685
1088
|
}))
|
|
686
1089
|
};
|
|
687
1090
|
}));
|
|
@@ -706,196 +1109,67 @@ function routeToMarkdown(route) {
|
|
|
706
1109
|
return lines.join("\n");
|
|
707
1110
|
}
|
|
708
1111
|
//#endregion
|
|
709
|
-
//#region src/
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
};
|
|
721
|
-
}
|
|
722
|
-
function parseSetupSpec(content) {
|
|
723
|
-
const { data, content: body } = matter(content);
|
|
724
|
-
const steps = parseSteps(body);
|
|
725
|
-
const placeholders = parsePlaceholders(data["placeholders"]);
|
|
726
|
-
return {
|
|
727
|
-
title: String(data["title"] ?? "Untitled"),
|
|
728
|
-
placeholders: Object.keys(placeholders).length > 0 ? placeholders : void 0,
|
|
729
|
-
steps
|
|
730
|
-
};
|
|
731
|
-
}
|
|
732
|
-
function parsePlaceholders(raw) {
|
|
733
|
-
if (!raw || typeof raw !== "object") return {};
|
|
734
|
-
const result = {};
|
|
735
|
-
for (const [key, val] of Object.entries(raw)) if (val && typeof val === "object" && "dummy" in val) {
|
|
736
|
-
const v = val;
|
|
737
|
-
result[key] = {
|
|
738
|
-
dummy: String(v["dummy"]),
|
|
739
|
-
description: v["description"] ? String(v["description"]) : void 0
|
|
740
|
-
};
|
|
741
|
-
}
|
|
742
|
-
return result;
|
|
743
|
-
}
|
|
744
|
-
function parseSetupRefs(raw) {
|
|
745
|
-
if (!Array.isArray(raw)) return void 0;
|
|
746
|
-
const refs = [];
|
|
747
|
-
for (const item of raw) if (typeof item === "object" && item !== null && "name" in item) {
|
|
748
|
-
const i = item;
|
|
749
|
-
refs.push({
|
|
750
|
-
name: String(i["name"]),
|
|
751
|
-
params: i["params"] && typeof i["params"] === "object" ? Object.fromEntries(Object.entries(i["params"]).map(([k, v]) => [k, String(v)])) : void 0
|
|
752
|
-
});
|
|
753
|
-
}
|
|
754
|
-
return refs.length > 0 ? refs : void 0;
|
|
755
|
-
}
|
|
756
|
-
function parsePrerequisites(body) {
|
|
757
|
-
const match = body.match(/##\s+Prerequisites\s+([\s\S]*?)(?=##|$)/);
|
|
758
|
-
if (!match || !match[1]) return null;
|
|
759
|
-
return match[1].trim();
|
|
760
|
-
}
|
|
761
|
-
function parseSteps(body) {
|
|
762
|
-
const stepBlocks = body.split(/###\s+Step\s+\d+:/);
|
|
763
|
-
const steps = [];
|
|
764
|
-
for (let i = 1; i < stepBlocks.length; i++) {
|
|
765
|
-
const block = stepBlocks[i];
|
|
766
|
-
if (!block) continue;
|
|
767
|
-
const titleMatch = block.match(/^(.+)/);
|
|
768
|
-
const instructionMatch = block.match(/\*\*Instruction\*\*:\s*(.+)/);
|
|
769
|
-
const expectedMatch = block.match(/\*\*Expected\*\*:\s*(.+)/);
|
|
770
|
-
if (!titleMatch || !instructionMatch || !expectedMatch) continue;
|
|
771
|
-
steps.push({
|
|
772
|
-
id: `step-${String(i).padStart(2, "0")}`,
|
|
773
|
-
title: titleMatch[1]?.trim() ?? "",
|
|
774
|
-
instruction: instructionMatch[1]?.trim() ?? "",
|
|
775
|
-
expected: expectedMatch[1]?.trim() ?? ""
|
|
776
|
-
});
|
|
777
|
-
}
|
|
778
|
-
return steps;
|
|
1112
|
+
//#region src/cli/stale-blocks.ts
|
|
1113
|
+
/**
|
|
1114
|
+
* Hint when stale per-block artifacts (`test.spec.ts`, `actions.json`,
|
|
1115
|
+
* `route.md`) from earlier ccqa versions are still present. v0.4 treats
|
|
1116
|
+
* blocks as pure spec templates — they no longer have their own executable
|
|
1117
|
+
* or recorded artifacts, so these files are dead code and should be deleted
|
|
1118
|
+
* manually. Shared by `trace` and `generate`.
|
|
1119
|
+
*/
|
|
1120
|
+
async function warnStaleBlockArtifacts() {
|
|
1121
|
+
const stale = await findStaleBlockArtifacts();
|
|
1122
|
+
if (stale.length === 0) return;
|
|
1123
|
+
for (const p of stale) hint(`stale block artifact detected: ${p} — v0.4 no longer uses these; delete it manually.`);
|
|
779
1124
|
}
|
|
780
1125
|
//#endregion
|
|
781
|
-
//#region src/
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
1126
|
+
//#region src/drift/parse-related-paths.ts
|
|
1127
|
+
/**
|
|
1128
|
+
* Pull a `RELATED_PATHS_BEGIN ... RELATED_PATHS_END` block out of the trace
|
|
1129
|
+
* agent's combined text output. Lines inside the block become entries; blank
|
|
1130
|
+
* lines, bullet markers, and code fences are tolerated. Returns null when the
|
|
1131
|
+
* agent did not emit a block at all so the caller can warn instead of silently
|
|
1132
|
+
* clearing the spec's existing relatedPaths.
|
|
1133
|
+
*/
|
|
1134
|
+
function parseRelatedPathsBlock(text) {
|
|
1135
|
+
const match = text.match(/RELATED_PATHS_BEGIN\s*\n?([\s\S]*?)\n?RELATED_PATHS_END/);
|
|
1136
|
+
if (!match || match[1] === void 0) return null;
|
|
1137
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1138
|
+
const out = [];
|
|
1139
|
+
for (const raw of match[1].split("\n")) {
|
|
1140
|
+
const line = raw.replace(/^```.*$/, "").trim();
|
|
1141
|
+
if (!line) continue;
|
|
1142
|
+
const cleaned = line.replace(/^[-*]\s+/, "").trim();
|
|
1143
|
+
if (!cleaned || seen.has(cleaned)) continue;
|
|
1144
|
+
seen.add(cleaned);
|
|
1145
|
+
out.push(cleaned);
|
|
794
1146
|
}
|
|
795
|
-
return
|
|
1147
|
+
return out;
|
|
796
1148
|
}
|
|
797
1149
|
//#endregion
|
|
798
|
-
//#region src/runtime/
|
|
1150
|
+
//#region src/runtime/agent-browser-bin.ts
|
|
799
1151
|
const require$2 = createRequire(import.meta.url);
|
|
800
|
-
function
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
1152
|
+
function hasAgentBrowserShim(dir) {
|
|
1153
|
+
try {
|
|
1154
|
+
statSync(join(dir, "agent-browser"));
|
|
1155
|
+
return true;
|
|
1156
|
+
} catch {
|
|
1157
|
+
return false;
|
|
1158
|
+
}
|
|
806
1159
|
}
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
}
|
|
820
|
-
async function spawnVitestTeed(args, opts = {}) {
|
|
821
|
-
const child = spawnVitestChild(args, opts, "pipe");
|
|
822
|
-
const [stdout, stderr, exitCode] = await Promise.all([
|
|
823
|
-
teeDrain(child.stdout, process.stdout),
|
|
824
|
-
teeDrain(child.stderr, process.stderr),
|
|
825
|
-
waitExit(child)
|
|
826
|
-
]);
|
|
827
|
-
return {
|
|
828
|
-
exitCode,
|
|
829
|
-
stdout,
|
|
830
|
-
stderr
|
|
831
|
-
};
|
|
832
|
-
}
|
|
833
|
-
function spawnVitestStreaming(args, opts = {}) {
|
|
834
|
-
const child = spawnVitestChild(args, opts, "pipe");
|
|
835
|
-
return {
|
|
836
|
-
child,
|
|
837
|
-
stdout: child.stdout,
|
|
838
|
-
stderr: child.stderr,
|
|
839
|
-
exited: waitExit(child)
|
|
840
|
-
};
|
|
841
|
-
}
|
|
842
|
-
function spawnVitestChild(args, opts, stdio) {
|
|
843
|
-
const vitestBin = resolveVitestBin();
|
|
844
|
-
return spawn(process.execPath, [vitestBin, ...args], {
|
|
845
|
-
cwd: opts.cwd,
|
|
846
|
-
env: opts.env ?? process.env,
|
|
847
|
-
stdio: [
|
|
848
|
-
"ignore",
|
|
849
|
-
stdio,
|
|
850
|
-
stdio
|
|
851
|
-
]
|
|
852
|
-
});
|
|
853
|
-
}
|
|
854
|
-
async function drain(stream) {
|
|
855
|
-
stream.setEncoding("utf8");
|
|
856
|
-
let buf = "";
|
|
857
|
-
for await (const chunk of stream) buf += chunk;
|
|
858
|
-
return buf;
|
|
859
|
-
}
|
|
860
|
-
async function teeDrain(stream, sink) {
|
|
861
|
-
stream.setEncoding("utf8");
|
|
862
|
-
let buf = "";
|
|
863
|
-
for await (const chunk of stream) {
|
|
864
|
-
buf += chunk;
|
|
865
|
-
sink.write(chunk);
|
|
866
|
-
}
|
|
867
|
-
return buf;
|
|
868
|
-
}
|
|
869
|
-
function waitExit(child) {
|
|
870
|
-
return new Promise((resolvePromise, rejectPromise) => {
|
|
871
|
-
child.once("exit", (code) => resolvePromise(code ?? 0));
|
|
872
|
-
child.once("error", rejectPromise);
|
|
873
|
-
});
|
|
874
|
-
}
|
|
875
|
-
//#endregion
|
|
876
|
-
//#region src/runtime/agent-browser-bin.ts
|
|
877
|
-
const require$1 = createRequire(import.meta.url);
|
|
878
|
-
function hasAgentBrowserShim(dir) {
|
|
879
|
-
try {
|
|
880
|
-
statSync(join(dir, "agent-browser"));
|
|
881
|
-
return true;
|
|
882
|
-
} catch {
|
|
883
|
-
return false;
|
|
884
|
-
}
|
|
885
|
-
}
|
|
886
|
-
/**
|
|
887
|
-
* Walks up from `start` looking for a `node_modules/.bin/agent-browser` shim.
|
|
888
|
-
* Returns the .bin directory containing the shim, or null if none is found.
|
|
889
|
-
*/
|
|
890
|
-
function findNodeModulesBin(start) {
|
|
891
|
-
let cur = start;
|
|
892
|
-
while (true) {
|
|
893
|
-
const candidate = join(cur, "node_modules", ".bin");
|
|
894
|
-
if (hasAgentBrowserShim(candidate)) return candidate;
|
|
895
|
-
const parent = dirname(cur);
|
|
896
|
-
if (parent === cur) return null;
|
|
897
|
-
cur = parent;
|
|
898
|
-
}
|
|
1160
|
+
/**
|
|
1161
|
+
* Walks up from `start` looking for a `node_modules/.bin/agent-browser` shim.
|
|
1162
|
+
* Returns the .bin directory containing the shim, or null if none is found.
|
|
1163
|
+
*/
|
|
1164
|
+
function findNodeModulesBin(start) {
|
|
1165
|
+
let cur = start;
|
|
1166
|
+
while (true) {
|
|
1167
|
+
const candidate = join(cur, "node_modules", ".bin");
|
|
1168
|
+
if (hasAgentBrowserShim(candidate)) return candidate;
|
|
1169
|
+
const parent = dirname(cur);
|
|
1170
|
+
if (parent === cur) return null;
|
|
1171
|
+
cur = parent;
|
|
1172
|
+
}
|
|
899
1173
|
}
|
|
900
1174
|
/**
|
|
901
1175
|
* Resolves the directory containing the `agent-browser` shim that npm/pnpm
|
|
@@ -908,10 +1182,10 @@ function findNodeModulesBin(start) {
|
|
|
908
1182
|
function resolveAgentBrowserBinDir() {
|
|
909
1183
|
const fromCwd = findNodeModulesBin(process.cwd());
|
|
910
1184
|
if (fromCwd) return fromCwd;
|
|
911
|
-
const fromSelf = findNodeModulesBin(dirname(require$
|
|
1185
|
+
const fromSelf = findNodeModulesBin(dirname(require$2.resolve("agent-browser/package.json")));
|
|
912
1186
|
if (fromSelf) return fromSelf;
|
|
913
1187
|
try {
|
|
914
|
-
const candidate = join(dirname(require$
|
|
1188
|
+
const candidate = join(dirname(require$2.resolve("agent-browser/package.json")), "node_modules", ".bin");
|
|
915
1189
|
if (hasAgentBrowserShim(candidate)) return candidate;
|
|
916
1190
|
} catch {}
|
|
917
1191
|
return null;
|
|
@@ -971,54 +1245,197 @@ function formatAgentBrowserUnavailableMessage() {
|
|
|
971
1245
|
].join("\n");
|
|
972
1246
|
}
|
|
973
1247
|
//#endregion
|
|
974
|
-
//#region src/runtime/
|
|
975
|
-
const
|
|
1248
|
+
//#region src/runtime/replay-validate.ts
|
|
1249
|
+
const SHORT_TIMEOUT_MS = 5e3;
|
|
1250
|
+
const ASSERT_TIMEOUT_MS = 1e4;
|
|
976
1251
|
/**
|
|
977
|
-
*
|
|
1252
|
+
* Convert one recorded action into the `agent-browser` arg list that would
|
|
1253
|
+
* exercise it. Returns `null` for actions that should not be validated
|
|
1254
|
+
* (snapshot has no side effect; assert types whose codegen forms aren't
|
|
1255
|
+
* directly verifiable here fall through to the caller's `unverifiable`
|
|
1256
|
+
* fallback).
|
|
978
1257
|
*/
|
|
979
|
-
function
|
|
980
|
-
|
|
981
|
-
|
|
1258
|
+
function actionToAbArgs(action, sessionName) {
|
|
1259
|
+
const base = ["--session", sessionName];
|
|
1260
|
+
const sub = (s) => s === void 0 ? "" : resolveEnvRefs(s);
|
|
1261
|
+
switch (action.command) {
|
|
1262
|
+
case "cookies_clear": return [
|
|
1263
|
+
...base,
|
|
1264
|
+
"cookies",
|
|
1265
|
+
"clear"
|
|
1266
|
+
];
|
|
1267
|
+
case "open": return [
|
|
1268
|
+
...base,
|
|
1269
|
+
"open",
|
|
1270
|
+
sub(action.value).replace(/^["']|["']$/g, "")
|
|
1271
|
+
];
|
|
1272
|
+
case "click": return [
|
|
1273
|
+
...base,
|
|
1274
|
+
"click",
|
|
1275
|
+
sub(action.selector)
|
|
1276
|
+
];
|
|
1277
|
+
case "dblclick": return [
|
|
1278
|
+
...base,
|
|
1279
|
+
"dblclick",
|
|
1280
|
+
sub(action.selector)
|
|
1281
|
+
];
|
|
1282
|
+
case "fill":
|
|
1283
|
+
case "type": return [
|
|
1284
|
+
...base,
|
|
1285
|
+
"fill",
|
|
1286
|
+
sub(action.selector),
|
|
1287
|
+
sub(action.value)
|
|
1288
|
+
];
|
|
1289
|
+
case "check": return [
|
|
1290
|
+
...base,
|
|
1291
|
+
"check",
|
|
1292
|
+
sub(action.selector)
|
|
1293
|
+
];
|
|
1294
|
+
case "uncheck": return [
|
|
1295
|
+
...base,
|
|
1296
|
+
"uncheck",
|
|
1297
|
+
sub(action.selector)
|
|
1298
|
+
];
|
|
1299
|
+
case "press": return [
|
|
1300
|
+
...base,
|
|
1301
|
+
"press",
|
|
1302
|
+
sub(action.value)
|
|
1303
|
+
];
|
|
1304
|
+
case "select": return [
|
|
1305
|
+
...base,
|
|
1306
|
+
"select",
|
|
1307
|
+
sub(action.selector),
|
|
1308
|
+
sub(action.value)
|
|
1309
|
+
];
|
|
1310
|
+
case "hover": return [
|
|
1311
|
+
...base,
|
|
1312
|
+
"hover",
|
|
1313
|
+
sub(action.selector)
|
|
1314
|
+
];
|
|
1315
|
+
case "scroll": {
|
|
1316
|
+
const args = [action.direction ?? "down", ...action.pixels ? [action.pixels] : []];
|
|
1317
|
+
return [
|
|
1318
|
+
...base,
|
|
1319
|
+
"scroll",
|
|
1320
|
+
...args
|
|
1321
|
+
];
|
|
1322
|
+
}
|
|
1323
|
+
case "drag": return [
|
|
1324
|
+
...base,
|
|
1325
|
+
"drag",
|
|
1326
|
+
sub(action.selector),
|
|
1327
|
+
sub(action.target)
|
|
1328
|
+
];
|
|
1329
|
+
case "wait": {
|
|
1330
|
+
const raw = sub(action.selector);
|
|
1331
|
+
if (!raw) return null;
|
|
1332
|
+
if (/^\d+$/.test(raw)) return null;
|
|
1333
|
+
if (raw.startsWith("text=")) return [
|
|
1334
|
+
...base,
|
|
1335
|
+
"wait",
|
|
1336
|
+
"--text",
|
|
1337
|
+
raw.slice(5),
|
|
1338
|
+
"--timeout",
|
|
1339
|
+
String(SHORT_TIMEOUT_MS)
|
|
1340
|
+
];
|
|
1341
|
+
return [
|
|
1342
|
+
...base,
|
|
1343
|
+
"wait",
|
|
1344
|
+
raw,
|
|
1345
|
+
"--timeout",
|
|
1346
|
+
String(SHORT_TIMEOUT_MS)
|
|
1347
|
+
];
|
|
1348
|
+
}
|
|
1349
|
+
case "snapshot": return null;
|
|
1350
|
+
case "assert": return assertToAbArgs(action, sub, sessionName);
|
|
1351
|
+
}
|
|
982
1352
|
}
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
1353
|
+
function assertToAbArgs(action, sub, sessionName) {
|
|
1354
|
+
const base = ["--session", sessionName];
|
|
1355
|
+
const val = sub(action.value ?? action.observation);
|
|
1356
|
+
const sel = sub(action.selector ?? action.observation);
|
|
1357
|
+
switch (action.assertType) {
|
|
1358
|
+
case "text_visible":
|
|
1359
|
+
if (!val) return null;
|
|
1360
|
+
return [
|
|
1361
|
+
...base,
|
|
1362
|
+
"wait",
|
|
1363
|
+
"--text",
|
|
1364
|
+
val,
|
|
1365
|
+
"--timeout",
|
|
1366
|
+
String(ASSERT_TIMEOUT_MS)
|
|
1367
|
+
];
|
|
1368
|
+
case "text_not_visible": return null;
|
|
1369
|
+
case "element_visible":
|
|
1370
|
+
if (!sel) return null;
|
|
1371
|
+
return [
|
|
1372
|
+
...base,
|
|
1373
|
+
"wait",
|
|
1374
|
+
sel,
|
|
1375
|
+
"--timeout",
|
|
1376
|
+
String(ASSERT_TIMEOUT_MS)
|
|
1377
|
+
];
|
|
1378
|
+
case "element_not_visible": return null;
|
|
1379
|
+
case "url_contains": return null;
|
|
1380
|
+
case "element_enabled":
|
|
1381
|
+
case "element_disabled":
|
|
1382
|
+
case "element_checked":
|
|
1383
|
+
case "element_unchecked":
|
|
1384
|
+
if (!sel || sel.startsWith("text=") || sel.startsWith("[aria-label=")) return null;
|
|
1385
|
+
return [
|
|
1386
|
+
...base,
|
|
1387
|
+
"wait",
|
|
1388
|
+
sel,
|
|
1389
|
+
"--timeout",
|
|
1390
|
+
String(ASSERT_TIMEOUT_MS)
|
|
1391
|
+
];
|
|
1392
|
+
default: return null;
|
|
1393
|
+
}
|
|
1394
|
+
}
|
|
1395
|
+
function validateActions(actions, opts) {
|
|
1396
|
+
const kept = [];
|
|
1397
|
+
const dropped = [];
|
|
1398
|
+
let skipUntilSideEffect = false;
|
|
1399
|
+
for (let i = 0; i < actions.length; i++) {
|
|
1400
|
+
const action = actions[i];
|
|
1401
|
+
if (skipUntilSideEffect && isPassiveCommand(action.command)) {
|
|
1402
|
+
dropped.push({
|
|
1403
|
+
index: i,
|
|
1404
|
+
action,
|
|
1405
|
+
reason: "skipped after a preceding action failed"
|
|
1406
|
+
});
|
|
1407
|
+
continue;
|
|
1408
|
+
}
|
|
1409
|
+
skipUntilSideEffect = false;
|
|
1410
|
+
const args = actionToAbArgs(action, opts.sessionName);
|
|
1411
|
+
if (args === null) {
|
|
1412
|
+
kept.push(action);
|
|
1413
|
+
continue;
|
|
1414
|
+
}
|
|
1415
|
+
const result = spawnAB(args);
|
|
1416
|
+
if (result.status === 0) {
|
|
1417
|
+
kept.push(action);
|
|
1418
|
+
continue;
|
|
1419
|
+
}
|
|
1420
|
+
dropped.push({
|
|
1421
|
+
index: i,
|
|
1422
|
+
action,
|
|
1423
|
+
reason: (result.stderr.trim() || result.stdout.trim() || `agent-browser exit ${result.status ?? "?"}`).slice(0, 200)
|
|
1424
|
+
});
|
|
1425
|
+
skipUntilSideEffect = true;
|
|
1426
|
+
}
|
|
1427
|
+
return {
|
|
1428
|
+
kept,
|
|
1429
|
+
dropped
|
|
1430
|
+
};
|
|
996
1431
|
}
|
|
997
1432
|
/**
|
|
998
|
-
*
|
|
999
|
-
*
|
|
1000
|
-
*
|
|
1001
|
-
*
|
|
1002
|
-
* Returns a JavaScript string-literal expression (template literal when env
|
|
1003
|
-
* refs are present, plain string literal otherwise).
|
|
1004
|
-
*
|
|
1005
|
-
* Examples:
|
|
1006
|
-
* "${PASSWORD}" -> '`${process.env.PASSWORD ?? ""}`'
|
|
1007
|
-
* "user-${SUFFIX}@x.com" -> '`user-${process.env.SUFFIX ?? ""}@x.com`'
|
|
1008
|
-
* "literal value" -> '"literal value"'
|
|
1433
|
+
* Passive (read-only) commands whose only effect is observation. When a
|
|
1434
|
+
* preceding action fails, dropping these too is the right move because
|
|
1435
|
+
* they were trying to observe state the failed action would have set up.
|
|
1009
1436
|
*/
|
|
1010
|
-
function
|
|
1011
|
-
|
|
1012
|
-
const escaped = value.replace(/\\/g, "\\\\").replace(/`/g, "\\`").replace(/\$\{/g, (match, offset, source) => {
|
|
1013
|
-
ENV_VAR_RE.lastIndex = 0;
|
|
1014
|
-
let m;
|
|
1015
|
-
while ((m = ENV_VAR_RE.exec(source)) !== null) if (m.index === offset) return "${";
|
|
1016
|
-
return "\\${";
|
|
1017
|
-
});
|
|
1018
|
-
ENV_VAR_RE.lastIndex = 0;
|
|
1019
|
-
return `\`${escaped.replace(ENV_VAR_RE, (_, braced, plain) => {
|
|
1020
|
-
return `\${process.env.${braced ?? plain ?? ""} ?? ""}`;
|
|
1021
|
-
})}\``;
|
|
1437
|
+
function isPassiveCommand(cmd) {
|
|
1438
|
+
return cmd === "snapshot" || cmd === "wait" || cmd === "assert";
|
|
1022
1439
|
}
|
|
1023
1440
|
//#endregion
|
|
1024
1441
|
//#region src/cli/trace.ts
|
|
@@ -1038,29 +1455,35 @@ async function runTrace(featureName, specName, model) {
|
|
|
1038
1455
|
throw e;
|
|
1039
1456
|
}
|
|
1040
1457
|
await ensureCcqaDir();
|
|
1458
|
+
await warnStaleBlockArtifacts();
|
|
1041
1459
|
const spec = parseTestSpec(await readSpecFile(featureName, specName));
|
|
1042
|
-
const
|
|
1460
|
+
const expanded = expandSpec(spec, { blocks: await loadAllBlocks() });
|
|
1043
1461
|
meta("spec", spec.title);
|
|
1044
|
-
meta("
|
|
1045
|
-
|
|
1046
|
-
meta("
|
|
1462
|
+
meta("steps", expanded.length);
|
|
1463
|
+
const includes = collectIncludedBlockNames(spec);
|
|
1464
|
+
if (includes.length > 0) meta("blocks", includes.join(", "));
|
|
1047
1465
|
blank();
|
|
1048
1466
|
const sessionName = generateSessionName();
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
}
|
|
1054
|
-
const systemPrompt = buildTraceSystemPrompt(spec, {
|
|
1055
|
-
sessionName,
|
|
1056
|
-
skipCookiesClear: hasSetups
|
|
1467
|
+
const systemPrompt = buildTraceSystemPrompt({
|
|
1468
|
+
title: spec.title,
|
|
1469
|
+
steps: expanded,
|
|
1470
|
+
sessionName
|
|
1057
1471
|
});
|
|
1058
|
-
const prompt = buildTracePrompt(spec);
|
|
1472
|
+
const prompt = buildTracePrompt(spec.title);
|
|
1059
1473
|
info("Running agent-browser session...");
|
|
1060
1474
|
blank();
|
|
1061
1475
|
const routeSteps = [];
|
|
1062
1476
|
let overallStatus = "passed";
|
|
1063
1477
|
const traceActions = [];
|
|
1478
|
+
let currentStepId;
|
|
1479
|
+
let relatedPathsBuffer = null;
|
|
1480
|
+
const withStepId = (action) => {
|
|
1481
|
+
if (!action) return null;
|
|
1482
|
+
return currentStepId ? {
|
|
1483
|
+
...action,
|
|
1484
|
+
stepId: currentStepId
|
|
1485
|
+
} : action;
|
|
1486
|
+
};
|
|
1064
1487
|
const { isError } = await invokeClaudeStreaming({
|
|
1065
1488
|
prompt,
|
|
1066
1489
|
systemPrompt,
|
|
@@ -1076,7 +1499,7 @@ async function runTrace(featureName, specName, model) {
|
|
|
1076
1499
|
},
|
|
1077
1500
|
model,
|
|
1078
1501
|
onAbAction: (abAction) => {
|
|
1079
|
-
const action = parseAbAction(abAction);
|
|
1502
|
+
const action = withStepId(parseAbAction(abAction));
|
|
1080
1503
|
if (action) traceActions.push(action);
|
|
1081
1504
|
},
|
|
1082
1505
|
onAbActionFailed: () => {
|
|
@@ -1087,10 +1510,19 @@ async function runTrace(featureName, specName, model) {
|
|
|
1087
1510
|
for (const block of msg.message.content ?? []) {
|
|
1088
1511
|
if (block.type !== "text" || !block.text) continue;
|
|
1089
1512
|
const text = block.text;
|
|
1090
|
-
|
|
1091
|
-
|
|
1513
|
+
if (relatedPathsBuffer !== null) relatedPathsBuffer += text + "\n";
|
|
1514
|
+
else {
|
|
1515
|
+
const idx = text.indexOf("RELATED_PATHS_BEGIN");
|
|
1516
|
+
if (idx !== -1) relatedPathsBuffer = text.slice(idx) + "\n";
|
|
1517
|
+
}
|
|
1092
1518
|
for (const line of text.split("\n")) {
|
|
1093
1519
|
const trimmed = line.trim();
|
|
1520
|
+
const status = parseStatusLine(line);
|
|
1521
|
+
if (status) {
|
|
1522
|
+
if (status.type === "STEP_START" && status.stepId) currentStepId = status.stepId;
|
|
1523
|
+
step(status.type, status.stepId, status.detail);
|
|
1524
|
+
continue;
|
|
1525
|
+
}
|
|
1094
1526
|
if (trimmed.startsWith("ROUTE_STEP|")) {
|
|
1095
1527
|
const routeStep = parseRouteStep(trimmed);
|
|
1096
1528
|
if (routeStep) {
|
|
@@ -1098,56 +1530,51 @@ async function runTrace(featureName, specName, model) {
|
|
|
1098
1530
|
if (routeStep.status === "FAILED") overallStatus = "failed";
|
|
1099
1531
|
}
|
|
1100
1532
|
} else if (trimmed.startsWith("AB_ACTION|snapshot|") || trimmed.startsWith("AB_ACTION|assert|")) {
|
|
1101
|
-
const action = parseAbAction(trimmed);
|
|
1533
|
+
const action = withStepId(parseAbAction(trimmed));
|
|
1102
1534
|
if (action) traceActions.push(action);
|
|
1103
1535
|
}
|
|
1104
1536
|
}
|
|
1105
1537
|
}
|
|
1106
1538
|
});
|
|
1107
1539
|
if (isError) overallStatus = "failed";
|
|
1540
|
+
const validatedActions = validateAndReport(traceActions);
|
|
1108
1541
|
const route = {
|
|
1109
1542
|
specName,
|
|
1110
1543
|
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1111
1544
|
status: overallStatus,
|
|
1112
1545
|
steps: routeSteps
|
|
1113
1546
|
};
|
|
1114
|
-
const [routePath, actionsPath] = await Promise.all([saveRoute(featureName, specName, route), saveTraceActions(featureName, specName,
|
|
1547
|
+
const [routePath, actionsPath] = await Promise.all([saveRoute(featureName, specName, route), saveTraceActions(featureName, specName, validatedActions)]);
|
|
1115
1548
|
blank();
|
|
1116
1549
|
meta("route", routePath);
|
|
1117
1550
|
meta("saved", actionsPath);
|
|
1118
|
-
meta("actions",
|
|
1551
|
+
meta("actions", validatedActions.length);
|
|
1119
1552
|
meta("status", overallStatus.toUpperCase());
|
|
1553
|
+
const relatedPaths = relatedPathsBuffer !== null ? parseRelatedPathsBlock(relatedPathsBuffer) : null;
|
|
1554
|
+
if (relatedPaths !== null) {
|
|
1555
|
+
const written = await updateSpecRelatedPaths(featureName, specName, relatedPaths);
|
|
1556
|
+
if (written) meta("relatedPaths", `${relatedPaths.length} path(s) written to ${written}`);
|
|
1557
|
+
} else warn("trace did not emit a RELATED_PATHS block; drift --changed cannot scope this spec");
|
|
1120
1558
|
hint(`run 'ccqa generate ${featureName}/${specName}' to generate a test script`);
|
|
1121
1559
|
}
|
|
1122
1560
|
/**
|
|
1123
|
-
*
|
|
1124
|
-
*
|
|
1561
|
+
* Run the post-trace replay validation and emit user-visible drop reports.
|
|
1562
|
+
* Splitting this out keeps `runTrace` readable; the function is pure aside
|
|
1563
|
+
* from `log.*` and the agent-browser invocations inside `validateActions`.
|
|
1125
1564
|
*/
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
const tmpPath = join(getSetupDir(ref.name), `_run.spec.ts`);
|
|
1136
|
-
await writeFile(tmpPath, script, "utf-8");
|
|
1137
|
-
try {
|
|
1138
|
-
const { exitCode, stdout, stderr } = await spawnVitestCaptured([
|
|
1139
|
-
"run",
|
|
1140
|
-
"--config",
|
|
1141
|
-
bundledVitestConfigPath(),
|
|
1142
|
-
tmpPath
|
|
1143
|
-
]);
|
|
1144
|
-
process.stdout.write(stdout);
|
|
1145
|
-
if (stderr) process.stderr.write(stderr);
|
|
1146
|
-
if (exitCode !== 0) throw new Error(`Setup '${ref.name}' failed (exit ${exitCode})`);
|
|
1147
|
-
} finally {
|
|
1148
|
-
await unlink(tmpPath).catch(() => {});
|
|
1149
|
-
}
|
|
1565
|
+
function validateAndReport(actions) {
|
|
1566
|
+
if (actions.length === 0) return actions;
|
|
1567
|
+
const sessionName = `${generateSessionName()}-validate`;
|
|
1568
|
+
blank();
|
|
1569
|
+
info("post-trace validation (replaying recorded actions)...");
|
|
1570
|
+
const { kept, dropped } = validateActions(actions, { sessionName });
|
|
1571
|
+
if (dropped.length === 0) {
|
|
1572
|
+
meta("validated", `${kept.length}/${actions.length} kept`);
|
|
1573
|
+
return kept;
|
|
1150
1574
|
}
|
|
1575
|
+
for (const d of dropped) warn(`dropped action #${d.index + 1} (${d.action.command}${d.action.selector ? " " + d.action.selector : ""}): ${d.reason}`);
|
|
1576
|
+
meta("validated", `${kept.length}/${actions.length} kept (${dropped.length} dropped)`);
|
|
1577
|
+
return kept;
|
|
1151
1578
|
}
|
|
1152
1579
|
function parseStatusLine(text) {
|
|
1153
1580
|
for (const line of text.split("\n")) {
|
|
@@ -1244,21 +1671,32 @@ function parseAbAction(line) {
|
|
|
1244
1671
|
}
|
|
1245
1672
|
//#endregion
|
|
1246
1673
|
//#region src/codegen/actions-to-script.ts
|
|
1247
|
-
function actionsToScript(
|
|
1674
|
+
function actionsToScript(input) {
|
|
1675
|
+
const { actions, testName, stepMarkers = [] } = input;
|
|
1248
1676
|
const parts = [...[
|
|
1249
1677
|
`import { test } from "vitest";`,
|
|
1250
1678
|
`import { spawnSync } from "node:child_process";`,
|
|
1251
|
-
`import {
|
|
1679
|
+
`import { ${[
|
|
1680
|
+
"ab",
|
|
1681
|
+
"abWait",
|
|
1682
|
+
"abAssertTextVisible",
|
|
1683
|
+
"abAssertVisible",
|
|
1684
|
+
"abAssertNotVisible",
|
|
1685
|
+
"abAssertUrl",
|
|
1686
|
+
"abAssertEnabled",
|
|
1687
|
+
"abAssertDisabled",
|
|
1688
|
+
"abAssertChecked",
|
|
1689
|
+
"abAssertUnchecked"
|
|
1690
|
+
].join(", ")} } from "ccqa/test-helpers";`,
|
|
1252
1691
|
"",
|
|
1253
|
-
`// Single session shared across
|
|
1254
|
-
`//
|
|
1255
|
-
`//
|
|
1692
|
+
`// Single session shared across the run. Use ||= so an outer harness`,
|
|
1693
|
+
`// (e.g. ccqa generate's auto-fix loop) can pre-set the session name`,
|
|
1694
|
+
`// and inspect the same session after the run finishes.`,
|
|
1256
1695
|
`process.env.AGENT_BROWSER_SESSION ||= \`ccqa-run-\${Date.now()}\`;`,
|
|
1257
1696
|
""
|
|
1258
1697
|
]];
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
parts.push(`test(${JSON.stringify(title)}, () => {`, body, "}, 5 * 60 * 1000);", "");
|
|
1698
|
+
const body = actionsToLines(actions, stepMarkers).map((l) => ` ${l}`).join("\n");
|
|
1699
|
+
parts.push(`test(${JSON.stringify(testName)}, () => {`, body, "}, 5 * 60 * 1000);", "");
|
|
1262
1700
|
return parts.join("\n");
|
|
1263
1701
|
}
|
|
1264
1702
|
/** Commands that interact with page elements and need the page to be loaded */
|
|
@@ -1273,11 +1711,18 @@ const ELEMENT_COMMANDS = new Set([
|
|
|
1273
1711
|
"hover",
|
|
1274
1712
|
"drag"
|
|
1275
1713
|
]);
|
|
1276
|
-
function actionsToLines(actions) {
|
|
1714
|
+
function actionsToLines(actions, stepMarkers) {
|
|
1277
1715
|
const lines = [];
|
|
1278
1716
|
let prevLine = null;
|
|
1279
1717
|
let prevCommand = null;
|
|
1280
|
-
|
|
1718
|
+
const markerByIndex = new Map(stepMarkers.map((m) => [m.actionIndex, m]));
|
|
1719
|
+
for (let i = 0; i < actions.length; i++) {
|
|
1720
|
+
const marker = markerByIndex.get(i);
|
|
1721
|
+
if (marker) {
|
|
1722
|
+
if (lines.length > 0) lines.push("");
|
|
1723
|
+
lines.push(`// step: ${marker.stepId} [${marker.source}]`);
|
|
1724
|
+
}
|
|
1725
|
+
const action = actions[i];
|
|
1281
1726
|
const line = actionToLine(action);
|
|
1282
1727
|
if (line === null) continue;
|
|
1283
1728
|
if (line === prevLine) continue;
|
|
@@ -1296,16 +1741,16 @@ function actionToLine(action) {
|
|
|
1296
1741
|
if ("selector" in action && isRefSelector(action.selector)) return null;
|
|
1297
1742
|
switch (action.command) {
|
|
1298
1743
|
case "cookies_clear": return `ab("cookies", "clear");`;
|
|
1299
|
-
case "open": return `ab("open", ${
|
|
1744
|
+
case "open": return `ab("open", ${jExpr((action.value ?? "").replace(/^["']|["']$/g, ""))});`;
|
|
1300
1745
|
case "snapshot": return action.observation ? `// ${action.observation}` : null;
|
|
1301
1746
|
case "click": return `ab("click", ${j(action.selector)});`;
|
|
1302
1747
|
case "dblclick": return `ab("dblclick", ${j(action.selector)});`;
|
|
1303
|
-
case "fill": return `ab("fill", ${j(action.selector)}, ${
|
|
1304
|
-
case "type": return `ab("fill", ${j(action.selector)}, ${
|
|
1748
|
+
case "fill": return `ab("fill", ${j(action.selector)}, ${jExpr(action.value)});`;
|
|
1749
|
+
case "type": return `ab("fill", ${j(action.selector)}, ${jExpr(action.value)});`;
|
|
1305
1750
|
case "check": return `ab("check", ${j(action.selector)});`;
|
|
1306
1751
|
case "uncheck": return `ab("uncheck", ${j(action.selector)});`;
|
|
1307
|
-
case "press": return `ab("press", ${
|
|
1308
|
-
case "select": return `ab("select", ${j(action.selector)}, ${
|
|
1752
|
+
case "press": return `ab("press", ${jExpr(action.value)});`;
|
|
1753
|
+
case "select": return `ab("select", ${j(action.selector)}, ${jExpr(action.value)});`;
|
|
1309
1754
|
case "hover": return `ab("hover", ${j(action.selector)});`;
|
|
1310
1755
|
case "scroll": return `ab("scroll", ${[action.direction ?? "down", ...action.pixels ? [action.pixels] : []].map(j).join(", ")});`;
|
|
1311
1756
|
case "drag": return `ab("drag", ${j(action.selector)}, ${j(action.target)});`;
|
|
@@ -1321,10 +1766,10 @@ function actionToLine(action) {
|
|
|
1321
1766
|
let assertLine = null;
|
|
1322
1767
|
switch (action.assertType) {
|
|
1323
1768
|
case "text_visible":
|
|
1324
|
-
if (val) assertLine = `abAssertTextVisible(${
|
|
1769
|
+
if (val) assertLine = `abAssertTextVisible(${jExpr(val)});`;
|
|
1325
1770
|
break;
|
|
1326
1771
|
case "text_not_visible":
|
|
1327
|
-
if (val) assertLine = `abAssertNotVisible(${
|
|
1772
|
+
if (val) assertLine = `abAssertNotVisible(${jExpr("text=" + val)}, 180_000);`;
|
|
1328
1773
|
break;
|
|
1329
1774
|
case "element_visible":
|
|
1330
1775
|
if (sel) assertLine = `abAssertVisible(${j(sel)});`;
|
|
@@ -1333,7 +1778,7 @@ function actionToLine(action) {
|
|
|
1333
1778
|
if (sel) assertLine = `abAssertNotVisible(${j(sel)});`;
|
|
1334
1779
|
break;
|
|
1335
1780
|
case "url_contains":
|
|
1336
|
-
if (val) assertLine = `abAssertUrl(${
|
|
1781
|
+
if (val) assertLine = `abAssertUrl(${jExpr(val)});`;
|
|
1337
1782
|
break;
|
|
1338
1783
|
case "element_enabled":
|
|
1339
1784
|
if (sel && !sel.startsWith("text=") && !sel.startsWith("[aria-label=")) assertLine = `abAssertEnabled(${j(sel)});`;
|
|
@@ -1356,6 +1801,14 @@ function actionToLine(action) {
|
|
|
1356
1801
|
}
|
|
1357
1802
|
/** JSON.stringify — produces a quoted string literal safe for embedding in TS source. */
|
|
1358
1803
|
const j = (s) => JSON.stringify(s);
|
|
1804
|
+
/**
|
|
1805
|
+
* Like `j`, but recognises `$VAR` / `${VAR}` env-ref forms in the value and
|
|
1806
|
+
* emits them as `${process.env.VAR ?? ""}` template-literal substitutions
|
|
1807
|
+
* instead of baking the literal `$VAR` string into the script. Used for
|
|
1808
|
+
* values that came from a spec or block param: form fills, opened URLs,
|
|
1809
|
+
* assertion texts/URLs.
|
|
1810
|
+
*/
|
|
1811
|
+
const jExpr = (s) => envRefsToJsExpression(s);
|
|
1359
1812
|
//#endregion
|
|
1360
1813
|
//#region src/prompts/codegen.ts
|
|
1361
1814
|
function buildCleanupPrompt(actions) {
|
|
@@ -1388,6 +1841,109 @@ ${actions.map((a, i) => {
|
|
|
1388
1841
|
}).join("\n")}`;
|
|
1389
1842
|
}
|
|
1390
1843
|
//#endregion
|
|
1844
|
+
//#region src/codegen/cleanup.ts
|
|
1845
|
+
/**
|
|
1846
|
+
* Best-effort cleanup of a recorded action list. Hands the actions to
|
|
1847
|
+
* Claude with the cleanup prompt and parses the returned JSON array; on
|
|
1848
|
+
* any failure (Claude error, malformed JSON, empty array) falls back to
|
|
1849
|
+
* the original input so the caller can always proceed.
|
|
1850
|
+
*
|
|
1851
|
+
* Note: the prompt deliberately does not surface the `stepId` field.
|
|
1852
|
+
* Callers that need to preserve stepIds across cleanup (only `ccqa generate`
|
|
1853
|
+
* today) must re-attach them after this returns.
|
|
1854
|
+
*/
|
|
1855
|
+
async function cleanupActions$1(actions, model) {
|
|
1856
|
+
try {
|
|
1857
|
+
const { result, isError } = await invokeClaudeStreaming({
|
|
1858
|
+
prompt: buildCleanupPrompt(actions),
|
|
1859
|
+
disableBuiltinTools: true,
|
|
1860
|
+
maxTurns: 1,
|
|
1861
|
+
model
|
|
1862
|
+
}, () => {});
|
|
1863
|
+
if (isError || !result) return actions;
|
|
1864
|
+
const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
|
|
1865
|
+
const parsed = JSON.parse(json);
|
|
1866
|
+
if (Array.isArray(parsed) && parsed.length > 0) return parsed;
|
|
1867
|
+
} catch {}
|
|
1868
|
+
return actions;
|
|
1869
|
+
}
|
|
1870
|
+
//#endregion
|
|
1871
|
+
//#region src/runtime/bundled-config.ts
|
|
1872
|
+
const CANDIDATES = [
|
|
1873
|
+
"../runtime/vitest.config.mjs",
|
|
1874
|
+
"./vitest.config.mjs",
|
|
1875
|
+
"./vitest.config.ts"
|
|
1876
|
+
];
|
|
1877
|
+
function bundledVitestConfigPath() {
|
|
1878
|
+
for (const rel of CANDIDATES) {
|
|
1879
|
+
const candidate = fileURLToPath(new URL(rel, import.meta.url));
|
|
1880
|
+
try {
|
|
1881
|
+
accessSync(candidate);
|
|
1882
|
+
return candidate;
|
|
1883
|
+
} catch {}
|
|
1884
|
+
}
|
|
1885
|
+
return fileURLToPath(new URL("./vitest.config.ts", import.meta.url));
|
|
1886
|
+
}
|
|
1887
|
+
//#endregion
|
|
1888
|
+
//#region src/runtime/spawn-vitest.ts
|
|
1889
|
+
const require$1 = createRequire(import.meta.url);
|
|
1890
|
+
function resolveVitestBin() {
|
|
1891
|
+
const pkgPath = require$1.resolve("vitest/package.json");
|
|
1892
|
+
const pkg = require$1(pkgPath);
|
|
1893
|
+
const binRel = typeof pkg.bin === "string" ? pkg.bin : pkg.bin?.vitest;
|
|
1894
|
+
if (!binRel) throw new Error(`vitest package.json has no bin entry (resolved at ${pkgPath})`);
|
|
1895
|
+
return resolve(dirname(pkgPath), binRel);
|
|
1896
|
+
}
|
|
1897
|
+
async function spawnVitestTeed(args, opts = {}) {
|
|
1898
|
+
const child = spawnVitestChild(args, opts, "pipe");
|
|
1899
|
+
const [stdout, stderr, exitCode] = await Promise.all([
|
|
1900
|
+
teeDrain(child.stdout, process.stdout),
|
|
1901
|
+
teeDrain(child.stderr, process.stderr),
|
|
1902
|
+
waitExit(child)
|
|
1903
|
+
]);
|
|
1904
|
+
return {
|
|
1905
|
+
exitCode,
|
|
1906
|
+
stdout,
|
|
1907
|
+
stderr
|
|
1908
|
+
};
|
|
1909
|
+
}
|
|
1910
|
+
function spawnVitestStreaming(args, opts = {}) {
|
|
1911
|
+
const child = spawnVitestChild(args, opts, "pipe");
|
|
1912
|
+
return {
|
|
1913
|
+
child,
|
|
1914
|
+
stdout: child.stdout,
|
|
1915
|
+
stderr: child.stderr,
|
|
1916
|
+
exited: waitExit(child)
|
|
1917
|
+
};
|
|
1918
|
+
}
|
|
1919
|
+
function spawnVitestChild(args, opts, stdio) {
|
|
1920
|
+
const vitestBin = resolveVitestBin();
|
|
1921
|
+
return spawn(process.execPath, [vitestBin, ...args], {
|
|
1922
|
+
cwd: opts.cwd,
|
|
1923
|
+
env: opts.env ?? process.env,
|
|
1924
|
+
stdio: [
|
|
1925
|
+
"ignore",
|
|
1926
|
+
stdio,
|
|
1927
|
+
stdio
|
|
1928
|
+
]
|
|
1929
|
+
});
|
|
1930
|
+
}
|
|
1931
|
+
async function teeDrain(stream, sink) {
|
|
1932
|
+
stream.setEncoding("utf8");
|
|
1933
|
+
let buf = "";
|
|
1934
|
+
for await (const chunk of stream) {
|
|
1935
|
+
buf += chunk;
|
|
1936
|
+
sink.write(chunk);
|
|
1937
|
+
}
|
|
1938
|
+
return buf;
|
|
1939
|
+
}
|
|
1940
|
+
function waitExit(child) {
|
|
1941
|
+
return new Promise((resolvePromise, rejectPromise) => {
|
|
1942
|
+
child.once("exit", (code) => resolvePromise(code ?? 0));
|
|
1943
|
+
child.once("error", rejectPromise);
|
|
1944
|
+
});
|
|
1945
|
+
}
|
|
1946
|
+
//#endregion
|
|
1391
1947
|
//#region src/diagnose/apply.ts
|
|
1392
1948
|
function applyDiagnosis(script, diagnosis) {
|
|
1393
1949
|
switch (diagnosis.type) {
|
|
@@ -1438,6 +1994,7 @@ function applyTiming(script, fixes) {
|
|
|
1438
1994
|
summary: summary.join("; ")
|
|
1439
1995
|
};
|
|
1440
1996
|
}
|
|
1997
|
+
const REMOVABLE_ASSERT_RE = /\b(?:abAssert\w*|abWait)\b/;
|
|
1441
1998
|
function applyOverAssertion(script, lineNumbers) {
|
|
1442
1999
|
if (lineNumbers.length === 0) return {
|
|
1443
2000
|
applied: false,
|
|
@@ -1450,13 +2007,13 @@ function applyOverAssertion(script, lineNumbers) {
|
|
|
1450
2007
|
const idx = line - 1;
|
|
1451
2008
|
if (idx < 0 || idx >= lines.length) continue;
|
|
1452
2009
|
const content = lines[idx];
|
|
1453
|
-
if (
|
|
2010
|
+
if (!REMOVABLE_ASSERT_RE.test(content)) continue;
|
|
1454
2011
|
removed.push(`line ${line}: ${content.trim()}`);
|
|
1455
2012
|
lines.splice(idx, 1);
|
|
1456
2013
|
}
|
|
1457
2014
|
if (removed.length === 0) return {
|
|
1458
2015
|
applied: false,
|
|
1459
|
-
reason: "no abAssert lines matched the proposed line numbers"
|
|
2016
|
+
reason: "no abAssert/abWait lines matched the proposed line numbers"
|
|
1460
2017
|
};
|
|
1461
2018
|
return {
|
|
1462
2019
|
applied: true,
|
|
@@ -1502,7 +2059,7 @@ function previewDiff(before, after) {
|
|
|
1502
2059
|
//#endregion
|
|
1503
2060
|
//#region src/diagnose/prompt.ts
|
|
1504
2061
|
function buildDiagnosePrompt(input) {
|
|
1505
|
-
const { script,
|
|
2062
|
+
const { script, specYaml, actions, failureLog, pageSnapshot, outputLanguage = "en" } = input;
|
|
1506
2063
|
const numbered = script.split("\n").map((l, i) => `${i + 1}: ${l}`).join("\n");
|
|
1507
2064
|
return `You are diagnosing a failing E2E test. The test was generated from a recorded trace of the original interaction. Compare the failing run against the original spec and recorded actions to determine WHY the test failed and what the right fix is.
|
|
1508
2065
|
|
|
@@ -1593,11 +2150,11 @@ Pick exactly ONE category. The output JSON must follow the shape for that catego
|
|
|
1593
2150
|
- Your **final** assistant message must start with \`{\` and end with \`}\` — a single JSON object, nothing before or after. No prose preamble like "Confirmed: ...", no markdown fences, no commentary, no tool calls in the same turn. If you have an analysis sentence, put it in the \`reasoning\` field.
|
|
1594
2151
|
- Line numbers refer to the numbered test script below (1-based).
|
|
1595
2152
|
- For SELECTOR_DRIFT, \`oldSelector\` must match a substring of the script at that line; \`newSelector\` must be backed by a concrete file:line you read with Grep/Read (do not invent). Cite the evidence in \`reasoning\`.
|
|
1596
|
-
- For OVER_ASSERTION, only include lines that contain assert calls (\`abAssert*\`).
|
|
1597
|
-
- Cross-check assertions against the spec
|
|
2153
|
+
- For OVER_ASSERTION, only include lines that contain assert calls (\`abAssert*\`) or existence-checking waits (\`abWait\`); a recorded \`abWait("[selector]")\` is an implicit existence assertion and a valid removal candidate when the spec never required that element to be present.
|
|
2154
|
+
- Cross-check assertions against the spec YAML. If the spec doesn't require the assertion, OVER_ASSERTION is the better diagnosis than SELECTOR_DRIFT.
|
|
1598
2155
|
|
|
1599
|
-
## Test Spec (
|
|
1600
|
-
${
|
|
2156
|
+
## Test Spec (spec.yaml)
|
|
2157
|
+
${specYaml}
|
|
1601
2158
|
|
|
1602
2159
|
## Recorded Actions (actions.json summary)
|
|
1603
2160
|
${actions.map((a, i) => {
|
|
@@ -1808,8 +2365,7 @@ function normaliseSleepFixes(raw) {
|
|
|
1808
2365
|
const line = typeof item["line"] === "number" ? item["line"] : null;
|
|
1809
2366
|
if (line === null) continue;
|
|
1810
2367
|
const reason = typeof item["reason"] === "string" ? item["reason"] : "";
|
|
1811
|
-
|
|
1812
|
-
if (kind === "insert" || typeof item["seconds"] === "number" && item["increase_to"] === void 0) {
|
|
2368
|
+
if (item["kind"] === "insert") {
|
|
1813
2369
|
const seconds = typeof item["seconds"] === "number" ? item["seconds"] : null;
|
|
1814
2370
|
if (seconds === null) continue;
|
|
1815
2371
|
out.push({
|
|
@@ -1818,9 +2374,7 @@ function normaliseSleepFixes(raw) {
|
|
|
1818
2374
|
seconds,
|
|
1819
2375
|
reason
|
|
1820
2376
|
});
|
|
1821
|
-
|
|
1822
|
-
}
|
|
1823
|
-
if (kind === "increase" || typeof item["increase_to"] === "number") {
|
|
2377
|
+
} else if (item["kind"] === "increase") {
|
|
1824
2378
|
const increaseTo = typeof item["increase_to"] === "number" ? item["increase_to"] : null;
|
|
1825
2379
|
if (increaseTo === null) continue;
|
|
1826
2380
|
out.push({
|
|
@@ -1829,7 +2383,6 @@ function normaliseSleepFixes(raw) {
|
|
|
1829
2383
|
increase_to: increaseTo,
|
|
1830
2384
|
reason
|
|
1831
2385
|
});
|
|
1832
|
-
continue;
|
|
1833
2386
|
}
|
|
1834
2387
|
}
|
|
1835
2388
|
return out;
|
|
@@ -2014,7 +2567,7 @@ const DEFAULT_CONFIDENCE_THRESHOLD = .8;
|
|
|
2014
2567
|
* or the diagnose loop chose to bail out early.
|
|
2015
2568
|
*/
|
|
2016
2569
|
async function runAutoFixLoop(input) {
|
|
2017
|
-
const { scriptPath, initialRun,
|
|
2570
|
+
const { scriptPath, initialRun, specYaml, actions, maxRetries, mode, runVitest, agentBrowserSession, outputLanguage, model } = input;
|
|
2018
2571
|
let { exitCode, output, currentScript } = initialRun;
|
|
2019
2572
|
if (exitCode === 0) return true;
|
|
2020
2573
|
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
@@ -2025,7 +2578,7 @@ async function runAutoFixLoop(input) {
|
|
|
2025
2578
|
else fix("page snapshot unavailable; continuing without it");
|
|
2026
2579
|
const fixed = await diagnoseAndFix({
|
|
2027
2580
|
script: currentScript,
|
|
2028
|
-
|
|
2581
|
+
specYaml,
|
|
2029
2582
|
actions,
|
|
2030
2583
|
failureLog: output,
|
|
2031
2584
|
pageSnapshot: pageSnapshot ?? void 0,
|
|
@@ -2046,10 +2599,10 @@ async function runAutoFixLoop(input) {
|
|
|
2046
2599
|
return false;
|
|
2047
2600
|
}
|
|
2048
2601
|
async function diagnoseAndFix(input) {
|
|
2049
|
-
const { script,
|
|
2602
|
+
const { script, specYaml, actions, failureLog, pageSnapshot, mode, outputLanguage, model } = input;
|
|
2050
2603
|
const outcome = await timedPhase("diagnose", () => diagnose({
|
|
2051
2604
|
script,
|
|
2052
|
-
|
|
2605
|
+
specYaml,
|
|
2053
2606
|
actions,
|
|
2054
2607
|
failureLog,
|
|
2055
2608
|
pageSnapshot,
|
|
@@ -2084,7 +2637,7 @@ async function diagnoseAndFix(input) {
|
|
|
2084
2637
|
return apply.script;
|
|
2085
2638
|
}
|
|
2086
2639
|
if (decision === "skip-low-confidence") {
|
|
2087
|
-
fix(`confidence ${result.confidence.toFixed(2)} below threshold ${DEFAULT_CONFIDENCE_THRESHOLD}; skipping (
|
|
2640
|
+
fix(`confidence ${result.confidence.toFixed(2)} below threshold ${DEFAULT_CONFIDENCE_THRESHOLD}; skipping (mode: ${mode})`);
|
|
2088
2641
|
handoffToUser(result, outcome.raw, outputLanguage);
|
|
2089
2642
|
return null;
|
|
2090
2643
|
}
|
|
@@ -2108,10 +2661,15 @@ async function diagnoseAndFix(input) {
|
|
|
2108
2661
|
process.exit(1);
|
|
2109
2662
|
}
|
|
2110
2663
|
}
|
|
2664
|
+
/**
|
|
2665
|
+
* Map a diagnosis to one of three actions. `auto` previously bypassed the
|
|
2666
|
+
* confidence threshold; it no longer does — a low-confidence guess can
|
|
2667
|
+
* corrupt working code, and CI wants "apply obvious fixes, fail loudly on
|
|
2668
|
+
* the rest" rather than "apply every guess".
|
|
2669
|
+
*/
|
|
2111
2670
|
function decide(result, mode) {
|
|
2112
|
-
if (mode === "auto") return "apply-auto";
|
|
2113
2671
|
const highConfidence = result.confidence >= DEFAULT_CONFIDENCE_THRESHOLD;
|
|
2114
|
-
if (mode === "non-interactive") return highConfidence ? "apply-auto" : "skip-low-confidence";
|
|
2672
|
+
if (mode === "auto" || mode === "non-interactive") return highConfidence ? "apply-auto" : "skip-low-confidence";
|
|
2115
2673
|
return highConfidence ? "apply-auto" : "interactive";
|
|
2116
2674
|
}
|
|
2117
2675
|
function reportDiagnosis(result) {
|
|
@@ -2148,27 +2706,27 @@ function handoffMessage(diagnosis, language) {
|
|
|
2148
2706
|
}
|
|
2149
2707
|
function handoffEn(diagnosis) {
|
|
2150
2708
|
switch (diagnosis.type) {
|
|
2151
|
-
case "DATA_MISSING": return [`application-side issue: required data is missing. ${diagnosis.reason}`, "next step: seed the data (or update
|
|
2709
|
+
case "DATA_MISSING": return [`application-side issue: required data is missing. ${diagnosis.reason}`, "next step: seed the data (or update spec.yaml prerequisites), then re-run trace + generate."];
|
|
2152
2710
|
case "UNKNOWN": return [`could not classify the failure. ${diagnosis.reason}`, "next step: read the failure log above, decide whether the test or the app is wrong, and fix manually. consider re-running ccqa trace if the recorded flow no longer matches the live app."];
|
|
2153
2711
|
case "SELECTOR_DRIFT": return [
|
|
2154
2712
|
`selector likely drifted but auto-apply was not safe.`,
|
|
2155
2713
|
`proposed: line ${diagnosis.line}: "${diagnosis.oldSelector}" → "${diagnosis.newSelector}" (${diagnosis.reason}).`,
|
|
2156
2714
|
"next step: confirm in the live app and either accept the proposal manually, or re-run ccqa trace to recapture the new selector."
|
|
2157
2715
|
];
|
|
2158
|
-
case "OVER_ASSERTION": return [`assertion may not be required by the spec. lines: ${diagnosis.lines.join(", ")} (${diagnosis.reason}).`, "next step: cross-check
|
|
2716
|
+
case "OVER_ASSERTION": return [`assertion may not be required by the spec. lines: ${diagnosis.lines.join(", ")} (${diagnosis.reason}).`, "next step: cross-check spec.yaml. either delete the assertion from the test, or tighten the spec to require it."];
|
|
2159
2717
|
case "TIMING_ISSUE": return [`timing fix proposed but couldn't be applied automatically.`, "next step: insert a sleep manually before the failing line, or re-run with a higher confidence trace."];
|
|
2160
2718
|
}
|
|
2161
2719
|
}
|
|
2162
2720
|
function handoffJa(diagnosis) {
|
|
2163
2721
|
switch (diagnosis.type) {
|
|
2164
|
-
case "DATA_MISSING": return [`アプリ側の問題: 必要なデータが不足しています。${diagnosis.reason}`, "次のステップ: データを seed する(または
|
|
2722
|
+
case "DATA_MISSING": return [`アプリ側の問題: 必要なデータが不足しています。${diagnosis.reason}`, "次のステップ: データを seed する(または spec.yaml の prerequisites を更新)してから ccqa trace + generate をやり直してください。"];
|
|
2165
2723
|
case "UNKNOWN": return [`失敗を分類できませんでした。${diagnosis.reason}`, "次のステップ: 上の失敗ログを確認し、テストとアプリのどちらが原因か判断して手動で修正してください。記録した手順がアプリの現状と合わない場合は ccqa trace の再実行を検討してください。"];
|
|
2166
2724
|
case "SELECTOR_DRIFT": return [
|
|
2167
2725
|
"selector が変わった可能性が高いですが、自動適用は安全でないと判断しました。",
|
|
2168
2726
|
`提案: 行 ${diagnosis.line}: "${diagnosis.oldSelector}" → "${diagnosis.newSelector}" (${diagnosis.reason})`,
|
|
2169
2727
|
"次のステップ: アプリで新 selector を確認し、手動で適用するか ccqa trace をやり直して新しい selector を取り直してください。"
|
|
2170
2728
|
];
|
|
2171
|
-
case "OVER_ASSERTION": return [`spec が要求していない assertion の可能性があります。対象行: ${diagnosis.lines.join(", ")} (${diagnosis.reason})`, "次のステップ:
|
|
2729
|
+
case "OVER_ASSERTION": return [`spec が要求していない assertion の可能性があります。対象行: ${diagnosis.lines.join(", ")} (${diagnosis.reason})`, "次のステップ: spec.yaml と照合して、テスト側の assertion を削るか、spec 側を更新してください。"];
|
|
2172
2730
|
case "TIMING_ISSUE": return ["timing 関連の修正案は出ましたが、自動適用できませんでした。", "次のステップ: 失敗行の前に手動で sleep を入れるか、より信頼度の高い trace を取り直してください。"];
|
|
2173
2731
|
}
|
|
2174
2732
|
}
|
|
@@ -2204,18 +2762,24 @@ async function runGenerate(featureName, specName, maxRetries, mode, force, useSn
|
|
|
2204
2762
|
meta("actions", actions.length);
|
|
2205
2763
|
const specContent = await readSpecFile(featureName, specName);
|
|
2206
2764
|
const spec = parseTestSpec(specContent);
|
|
2207
|
-
const
|
|
2208
|
-
|
|
2765
|
+
const expanded = expandSpec(spec, { blocks: await loadAllBlocks() });
|
|
2766
|
+
await warnStaleBlockArtifacts();
|
|
2767
|
+
meta("steps", expanded.length);
|
|
2209
2768
|
meta("fix-mode", mode);
|
|
2210
2769
|
meta("language", outputLanguage);
|
|
2211
2770
|
blank();
|
|
2212
|
-
const cleanedActions = await cleanupActions
|
|
2771
|
+
const cleanedActions = await cleanupActions(actions, model);
|
|
2213
2772
|
if (cleanedActions.length !== actions.length) meta("cleaned", cleanedActions.length);
|
|
2214
|
-
const
|
|
2773
|
+
const markers = buildStepMarkers(expanded, cleanedActions);
|
|
2774
|
+
const scriptPath = await saveTestScript(featureName, specName, actionsToScript({
|
|
2775
|
+
actions: cleanedActions,
|
|
2776
|
+
testName: spec.title,
|
|
2777
|
+
stepMarkers: markers
|
|
2778
|
+
}));
|
|
2215
2779
|
meta("saved", scriptPath);
|
|
2216
2780
|
blank();
|
|
2217
2781
|
const agentBrowserSession = useSnapshot ? `ccqa-generate-${Date.now()}` : void 0;
|
|
2218
|
-
const runVitestForSession = (path) => runVitest
|
|
2782
|
+
const runVitestForSession = (path) => runVitest(path, agentBrowserSession);
|
|
2219
2783
|
let signalHandler = null;
|
|
2220
2784
|
if (agentBrowserSession) {
|
|
2221
2785
|
await closeSession(agentBrowserSession);
|
|
@@ -2234,7 +2798,7 @@ async function runGenerate(featureName, specName, maxRetries, mode, force, useSn
|
|
|
2234
2798
|
if (await runAutoFixLoop({
|
|
2235
2799
|
scriptPath,
|
|
2236
2800
|
initialRun,
|
|
2237
|
-
|
|
2801
|
+
specYaml: specContent,
|
|
2238
2802
|
actions: cleanedActions,
|
|
2239
2803
|
maxRetries,
|
|
2240
2804
|
mode,
|
|
@@ -2256,6 +2820,30 @@ async function runGenerate(featureName, specName, maxRetries, mode, force, useSn
|
|
|
2256
2820
|
if (agentBrowserSession) await closeSession(agentBrowserSession);
|
|
2257
2821
|
}
|
|
2258
2822
|
}
|
|
2823
|
+
/**
|
|
2824
|
+
* Build the per-step markers consumed by `actionsToScript`. Each action's
|
|
2825
|
+
* `stepId` (assigned at trace time from the last `STEP_START|...` line)
|
|
2826
|
+
* groups contiguous actions; we emit one marker at the first action of
|
|
2827
|
+
* each contiguous run. Unknown step ids are skipped rather than mis-labelled.
|
|
2828
|
+
*/
|
|
2829
|
+
function buildStepMarkers(steps, actions) {
|
|
2830
|
+
const stepById = new Map(steps.map((s) => [s.id, s]));
|
|
2831
|
+
const markers = [];
|
|
2832
|
+
let lastEmittedStepId = null;
|
|
2833
|
+
for (let i = 0; i < actions.length; i++) {
|
|
2834
|
+
const id = actions[i].stepId;
|
|
2835
|
+
if (!id || id === lastEmittedStepId) continue;
|
|
2836
|
+
const step = stepById.get(id);
|
|
2837
|
+
if (!step) continue;
|
|
2838
|
+
markers.push({
|
|
2839
|
+
actionIndex: i,
|
|
2840
|
+
stepId: step.id,
|
|
2841
|
+
source: step.source
|
|
2842
|
+
});
|
|
2843
|
+
lastEmittedStepId = id;
|
|
2844
|
+
}
|
|
2845
|
+
return markers;
|
|
2846
|
+
}
|
|
2259
2847
|
async function confirmOverwrite(path) {
|
|
2260
2848
|
if (!process.stdin.isTTY) {
|
|
2261
2849
|
warn(`${path} exists and stdin is not a TTY; refusing to overwrite. Pass --force to allow.`);
|
|
@@ -2275,67 +2863,7 @@ async function confirmOverwrite(path) {
|
|
|
2275
2863
|
rl.close();
|
|
2276
2864
|
}
|
|
2277
2865
|
}
|
|
2278
|
-
async function
|
|
2279
|
-
if (!setups?.length) return [];
|
|
2280
|
-
const result = [];
|
|
2281
|
-
for (const ref of setups) {
|
|
2282
|
-
const scriptPath = join(getSetupDir(ref.name), "test.spec.ts");
|
|
2283
|
-
const resolved = replacePlaceholders(extractTestBody(await readFile(scriptPath, "utf-8").catch(() => {
|
|
2284
|
-
throw new Error(`Setup test script not found: ${scriptPath}. Run \`ccqa generate-setup ${ref.name}\` first.`);
|
|
2285
|
-
})), ref.params ?? {});
|
|
2286
|
-
result.push({
|
|
2287
|
-
name: ref.name,
|
|
2288
|
-
body: resolved
|
|
2289
|
-
});
|
|
2290
|
-
}
|
|
2291
|
-
return result;
|
|
2292
|
-
}
|
|
2293
|
-
/**
|
|
2294
|
-
* Extract the test body (statements inside the test callback) from a setup
|
|
2295
|
-
* test script.
|
|
2296
|
-
*
|
|
2297
|
-
* Locates the first arrow callback (`=> {`) after a top-level `test(` call
|
|
2298
|
-
* and returns the text between the matching `{` and `}`. Handles both
|
|
2299
|
-
* single-line and multi-line `test(...)` formatting (the latter is what
|
|
2300
|
-
* prettier produces).
|
|
2301
|
-
*
|
|
2302
|
-
* Brace tracking is naive (string/regex/comment literals are not parsed
|
|
2303
|
-
* specially), but setup test scripts are themselves generated by ccqa and
|
|
2304
|
-
* follow a fixed shape, so this is sufficient in practice.
|
|
2305
|
-
*/
|
|
2306
|
-
function extractTestBody(script) {
|
|
2307
|
-
const testCallMatch = /\btest\s*\(/.exec(script);
|
|
2308
|
-
if (!testCallMatch) return "";
|
|
2309
|
-
const arrowIdx = script.indexOf("=> {", testCallMatch.index);
|
|
2310
|
-
if (arrowIdx === -1) return "";
|
|
2311
|
-
const bodyStart = arrowIdx + 4;
|
|
2312
|
-
let depth = 1;
|
|
2313
|
-
let i = bodyStart;
|
|
2314
|
-
for (; i < script.length; i++) {
|
|
2315
|
-
const ch = script[i];
|
|
2316
|
-
if (ch === "{") depth++;
|
|
2317
|
-
else if (ch === "}") {
|
|
2318
|
-
depth--;
|
|
2319
|
-
if (depth === 0) break;
|
|
2320
|
-
}
|
|
2321
|
-
}
|
|
2322
|
-
if (depth !== 0) return "";
|
|
2323
|
-
return script.slice(bodyStart, i).replace(/^\n/, "").replace(/\n\s*$/, "");
|
|
2324
|
-
}
|
|
2325
|
-
function replacePlaceholders(body, params) {
|
|
2326
|
-
let result = body;
|
|
2327
|
-
for (const [key, value] of Object.entries(params)) if (hasEnvRef(value)) {
|
|
2328
|
-
const expr = envRefsToJsExpression(value);
|
|
2329
|
-
const re = new RegExp(`(["'])\\{\\{${escapeRegExp(key)}\\}\\}\\1`, "g");
|
|
2330
|
-
result = result.replace(re, expr);
|
|
2331
|
-
result = result.replaceAll(`{{${key}}}`, value);
|
|
2332
|
-
} else result = result.replaceAll(`{{${key}}}`, value);
|
|
2333
|
-
return result;
|
|
2334
|
-
}
|
|
2335
|
-
function escapeRegExp(s) {
|
|
2336
|
-
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
2337
|
-
}
|
|
2338
|
-
async function runVitest$1(scriptPath, agentBrowserSession) {
|
|
2866
|
+
async function runVitest(scriptPath, agentBrowserSession) {
|
|
2339
2867
|
const { exitCode, stdout, stderr } = await spawnVitestTeed([
|
|
2340
2868
|
"run",
|
|
2341
2869
|
"--config",
|
|
@@ -2352,557 +2880,159 @@ async function runVitest$1(scriptPath, agentBrowserSession) {
|
|
|
2352
2880
|
currentScript
|
|
2353
2881
|
};
|
|
2354
2882
|
}
|
|
2355
|
-
async function cleanupActions
|
|
2356
|
-
|
|
2357
|
-
|
|
2358
|
-
prompt: buildCleanupPrompt(actions),
|
|
2359
|
-
disableBuiltinTools: true,
|
|
2360
|
-
maxTurns: 1,
|
|
2361
|
-
model
|
|
2362
|
-
}, () => {});
|
|
2363
|
-
if (isError || !result) return actions;
|
|
2364
|
-
const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
|
|
2365
|
-
const parsed = JSON.parse(json);
|
|
2366
|
-
if (Array.isArray(parsed) && parsed.length > 0) return parsed;
|
|
2367
|
-
} catch {}
|
|
2368
|
-
return actions;
|
|
2369
|
-
}
|
|
2370
|
-
//#endregion
|
|
2371
|
-
//#region src/cli/run.ts
|
|
2372
|
-
const USER_VITEST_CONFIG = resolve(".ccqa/vitest.config.ts");
|
|
2373
|
-
async function resolveVitestConfig() {
|
|
2374
|
-
try {
|
|
2375
|
-
await access(USER_VITEST_CONFIG);
|
|
2376
|
-
return USER_VITEST_CONFIG;
|
|
2377
|
-
} catch {
|
|
2378
|
-
return bundledVitestConfigPath();
|
|
2379
|
-
}
|
|
2883
|
+
async function cleanupActions(actions, model) {
|
|
2884
|
+
const cleaned = await cleanupActions$1(actions, model);
|
|
2885
|
+
return cleaned === actions ? actions : reattachStepIds(cleaned, actions);
|
|
2380
2886
|
}
|
|
2381
|
-
|
|
2382
|
-
|
|
2383
|
-
|
|
2384
|
-
|
|
2385
|
-
|
|
2386
|
-
|
|
2387
|
-
|
|
2388
|
-
|
|
2389
|
-
|
|
2390
|
-
|
|
2391
|
-
|
|
2392
|
-
|
|
2393
|
-
|
|
2394
|
-
|
|
2395
|
-
|
|
2396
|
-
|
|
2397
|
-
|
|
2398
|
-
|
|
2399
|
-
|
|
2400
|
-
|
|
2401
|
-
|
|
2402
|
-
|
|
2403
|
-
|
|
2404
|
-
|
|
2405
|
-
|
|
2406
|
-
|
|
2407
|
-
|
|
2408
|
-
const proc = spawnVitestStreaming([
|
|
2409
|
-
"run",
|
|
2410
|
-
"--config",
|
|
2411
|
-
vitestConfig,
|
|
2412
|
-
scriptFile,
|
|
2413
|
-
"--reporter=json",
|
|
2414
|
-
`--outputFile.json=${reportFile}`
|
|
2415
|
-
]);
|
|
2416
|
-
await Promise.all([streamFiltered(proc.stdout, process.stdout), streamFiltered(proc.stderr, process.stderr)]);
|
|
2417
|
-
const exitCode = await proc.exited;
|
|
2418
|
-
if (exitCode !== 0) overallExitCode = exitCode;
|
|
2419
|
-
const report = await readReport(reportFile);
|
|
2420
|
-
summaries.push({
|
|
2421
|
-
featureName,
|
|
2422
|
-
specName,
|
|
2423
|
-
scriptFile,
|
|
2424
|
-
report,
|
|
2425
|
-
exitCode
|
|
2426
|
-
});
|
|
2427
|
-
blank();
|
|
2887
|
+
/**
|
|
2888
|
+
* The Claude cleanup pass returns a pruned array without the `stepId` field
|
|
2889
|
+
* (the prompt deliberately doesn't expose it — that would make the prompt
|
|
2890
|
+
* easier to misformat). Re-attach stepIds here by replaying the cleaned
|
|
2891
|
+
* stream against the original and matching the next compatible action.
|
|
2892
|
+
*
|
|
2893
|
+
* Algorithm: walk both arrays in lockstep. For each cleaned action, scan
|
|
2894
|
+
* forward in `original` (from the last-matched cursor) for the next entry
|
|
2895
|
+
* with the same `command` + `selector` + `value` + `assertType` shape, and
|
|
2896
|
+
* borrow its `stepId`. Cleaned actions Claude invented from thin air (rare,
|
|
2897
|
+
* and explicitly forbidden by the prompt) end up with no stepId — codegen
|
|
2898
|
+
* just won't emit a step marker for that index, which is the same outcome
|
|
2899
|
+
* as a wholly stepId-less actions.json.
|
|
2900
|
+
*
|
|
2901
|
+
* The matching is forward-only so that if cleanup keeps two identical fills
|
|
2902
|
+
* (e.g. typing the same value twice intentionally), they're paired to the
|
|
2903
|
+
* first and second occurrence in the original — not both to the first.
|
|
2904
|
+
*/
|
|
2905
|
+
function reattachStepIds(cleaned, original) {
|
|
2906
|
+
let cursor = 0;
|
|
2907
|
+
const out = [];
|
|
2908
|
+
for (const c of cleaned) {
|
|
2909
|
+
let matched = null;
|
|
2910
|
+
for (let i = cursor; i < original.length; i++) if (sameShape(c, original[i])) {
|
|
2911
|
+
matched = original[i];
|
|
2912
|
+
cursor = i + 1;
|
|
2913
|
+
break;
|
|
2428
2914
|
}
|
|
2429
|
-
|
|
2430
|
-
|
|
2431
|
-
|
|
2432
|
-
recursive: true,
|
|
2433
|
-
force: true
|
|
2915
|
+
if (matched?.stepId) out.push({
|
|
2916
|
+
...c,
|
|
2917
|
+
stepId: matched.stepId
|
|
2434
2918
|
});
|
|
2919
|
+
else out.push(c);
|
|
2435
2920
|
}
|
|
2436
|
-
|
|
2437
|
-
}
|
|
2438
|
-
async function readReport(path) {
|
|
2439
|
-
try {
|
|
2440
|
-
const raw = await readFile(path, "utf8");
|
|
2441
|
-
return JSON.parse(raw);
|
|
2442
|
-
} catch {
|
|
2443
|
-
return null;
|
|
2444
|
-
}
|
|
2921
|
+
return out;
|
|
2445
2922
|
}
|
|
2446
|
-
|
|
2447
|
-
|
|
2448
|
-
reset: useColor ? "\x1B[0m" : "",
|
|
2449
|
-
bold: useColor ? "\x1B[1m" : "",
|
|
2450
|
-
dim: useColor ? "\x1B[2m" : "",
|
|
2451
|
-
green: useColor ? "\x1B[32m" : "",
|
|
2452
|
-
red: useColor ? "\x1B[31m" : "",
|
|
2453
|
-
yellow: useColor ? "\x1B[33m" : "",
|
|
2454
|
-
cyan: useColor ? "\x1B[36m" : "",
|
|
2455
|
-
gray: useColor ? "\x1B[90m" : ""
|
|
2456
|
-
};
|
|
2457
|
-
function printSummary(summaries) {
|
|
2458
|
-
process.stdout.write(`\n${C.cyan}${C.bold}──────── ccqa summary ────────${C.reset}\n\n`);
|
|
2459
|
-
let totalTests = 0;
|
|
2460
|
-
let totalPassed = 0;
|
|
2461
|
-
let totalFailed = 0;
|
|
2462
|
-
let totalSkipped = 0;
|
|
2463
|
-
for (const s of summaries) {
|
|
2464
|
-
const header = `${C.bold}${s.featureName}/${s.specName}${C.reset}`;
|
|
2465
|
-
if (!s.report) {
|
|
2466
|
-
const icon = s.exitCode === 0 ? `${C.green}✔${C.reset}` : `${C.red}✖${C.reset}`;
|
|
2467
|
-
process.stdout.write(`${icon} ${header} ${C.dim}(no report)${C.reset}\n`);
|
|
2468
|
-
continue;
|
|
2469
|
-
}
|
|
2470
|
-
totalTests += s.report.numTotalTests;
|
|
2471
|
-
totalPassed += s.report.numPassedTests;
|
|
2472
|
-
totalFailed += s.report.numFailedTests;
|
|
2473
|
-
totalSkipped += s.report.numPendingTests;
|
|
2474
|
-
const ok = s.report.success;
|
|
2475
|
-
const icon = ok ? `${C.green}✔${C.reset}` : `${C.red}✖${C.reset}`;
|
|
2476
|
-
const countColor = ok ? C.green : C.red;
|
|
2477
|
-
process.stdout.write(`${icon} ${header} ${countColor}${s.report.numPassedTests}/${s.report.numTotalTests}${C.reset} ${C.dim}passed${C.reset}\n`);
|
|
2478
|
-
for (const file of s.report.testResults) for (const a of file.assertionResults) {
|
|
2479
|
-
const aIcon = assertionIcon(a.status);
|
|
2480
|
-
const dur = a.duration != null ? ` ${C.gray}${formatDuration(a.duration)}${C.reset}` : "";
|
|
2481
|
-
process.stdout.write(` ${aIcon} ${a.fullName}${dur}\n`);
|
|
2482
|
-
if (a.status === "failed" && a.failureMessages?.length) for (const msg of a.failureMessages) {
|
|
2483
|
-
const firstLine = msg.split("\n")[0] ?? msg;
|
|
2484
|
-
process.stdout.write(` ${C.red}${firstLine}${C.reset}\n`);
|
|
2485
|
-
}
|
|
2486
|
-
}
|
|
2487
|
-
}
|
|
2488
|
-
const specsPassed = summaries.filter((s) => s.exitCode === 0).length;
|
|
2489
|
-
const specsFailed = summaries.filter((s) => s.exitCode !== 0).length;
|
|
2490
|
-
process.stdout.write("\n");
|
|
2491
|
-
process.stdout.write(` ${C.bold}Specs${C.reset} ${summaries.length} (${C.green}${specsPassed} passed${C.reset}, ${specsFailed > 0 ? C.red : C.dim}${specsFailed} failed${C.reset})\n`);
|
|
2492
|
-
process.stdout.write(` ${C.bold}Tests${C.reset} ${totalTests} (${C.green}${totalPassed} passed${C.reset}, ${totalFailed > 0 ? C.red : C.dim}${totalFailed} failed${C.reset}, ${C.yellow}${totalSkipped} skipped${C.reset})\n`);
|
|
2493
|
-
process.stdout.write("\n");
|
|
2923
|
+
function sameShape(a, b) {
|
|
2924
|
+
return a.command === b.command && (a.selector ?? "") === (b.selector ?? "") && (a.value ?? "") === (b.value ?? "") && (a.assertType ?? "") === (b.assertType ?? "");
|
|
2494
2925
|
}
|
|
2495
|
-
|
|
2496
|
-
|
|
2497
|
-
|
|
2498
|
-
|
|
2499
|
-
|
|
2500
|
-
|
|
2501
|
-
|
|
2502
|
-
|
|
2926
|
+
//#endregion
|
|
2927
|
+
//#region src/claude/extract-json.ts
|
|
2928
|
+
/**
|
|
2929
|
+
* Pulls a JSON object out of a Claude completion. Accepts either a fenced
|
|
2930
|
+
* ```json block or a bare `{...}` payload that constitutes the whole reply.
|
|
2931
|
+
* Returns null when neither shape is present.
|
|
2932
|
+
*/
|
|
2933
|
+
function extractJsonBlock(text) {
|
|
2934
|
+
const fenced = text.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
|
|
2935
|
+
if (fenced && fenced[1]) return fenced[1].trim();
|
|
2936
|
+
const trimmed = text.trim();
|
|
2937
|
+
if (trimmed.startsWith("{") && trimmed.endsWith("}")) return trimmed;
|
|
2938
|
+
return null;
|
|
2503
2939
|
}
|
|
2504
|
-
|
|
2505
|
-
|
|
2506
|
-
|
|
2940
|
+
//#endregion
|
|
2941
|
+
//#region src/prompts/draft.ts
|
|
2942
|
+
function buildNamingSystemPrompt() {
|
|
2943
|
+
return `You name a new ccqa test case based on the user's intent and the existing feature tree.
|
|
2944
|
+
|
|
2945
|
+
ccqa test cases live under \`.ccqa/features/<featureName>/test-cases/<specName>/spec.yaml\`.
|
|
2946
|
+
|
|
2947
|
+
## Naming rules
|
|
2948
|
+
|
|
2949
|
+
- featureName and specName are kebab-case ASCII (lowercase, words separated by '-').
|
|
2950
|
+
- featureName: a broad area (e.g. "tasks", "auth", "billing", "search").
|
|
2951
|
+
- specName: a short scenario name (e.g. "create-and-complete", "login-with-email", "search-by-tag").
|
|
2952
|
+
- Reuse existing featureName when the user's intent fits an existing area. Only invent a new featureName when the existing tree clearly does not cover the area.
|
|
2953
|
+
- specName must NOT collide with an existing spec under the chosen feature. If the natural name collides, pick a different one that distinguishes the new scenario from the existing ones.
|
|
2954
|
+
- Use the codebase (Read/Grep/Glob) sparingly to confirm domain vocabulary if helpful. Do not over-explore.
|
|
2955
|
+
|
|
2956
|
+
## Output (STRICT)
|
|
2957
|
+
|
|
2958
|
+
Output ONE fenced \`\`\`json block, nothing else outside it:
|
|
2959
|
+
|
|
2960
|
+
{
|
|
2961
|
+
"featureName": "<kebab-case>",
|
|
2962
|
+
"specName": "<kebab-case>",
|
|
2963
|
+
"reason": "<one short sentence: why this name and how it relates to existing specs>"
|
|
2507
2964
|
}
|
|
2508
|
-
|
|
2509
|
-
async function streamFiltered(source, sink) {
|
|
2510
|
-
source.setEncoding("utf8");
|
|
2511
|
-
let buffer = "";
|
|
2512
|
-
for await (const chunk of source) {
|
|
2513
|
-
buffer += chunk;
|
|
2514
|
-
let nl = buffer.indexOf("\n");
|
|
2515
|
-
while (nl !== -1) {
|
|
2516
|
-
const line = buffer.slice(0, nl);
|
|
2517
|
-
buffer = buffer.slice(nl + 1);
|
|
2518
|
-
if (!NOISE_LINE_PATTERNS.some((p) => p.test(line))) sink.write(line + "\n");
|
|
2519
|
-
nl = buffer.indexOf("\n");
|
|
2520
|
-
}
|
|
2521
|
-
}
|
|
2522
|
-
if (buffer.length > 0 && !NOISE_LINE_PATTERNS.some((p) => p.test(buffer))) sink.write(buffer);
|
|
2965
|
+
`;
|
|
2523
2966
|
}
|
|
2524
|
-
|
|
2525
|
-
|
|
2526
|
-
|
|
2527
|
-
|
|
2528
|
-
|
|
2529
|
-
|
|
2530
|
-
|
|
2531
|
-
|
|
2532
|
-
|
|
2533
|
-
|
|
2534
|
-
|
|
2535
|
-
|
|
2536
|
-
|
|
2967
|
+
function buildNamingPrompt(intent, tree) {
|
|
2968
|
+
return `## User intent
|
|
2969
|
+
|
|
2970
|
+
${intent}
|
|
2971
|
+
|
|
2972
|
+
## Existing feature tree
|
|
2973
|
+
|
|
2974
|
+
${tree.length === 0 ? "(no existing features yet)" : tree.map((f) => {
|
|
2975
|
+
const specLines = f.specs.length === 0 ? " (no specs yet)" : f.specs.map((s) => ` - ${s.specName}`).join("\n");
|
|
2976
|
+
return `- ${f.featureName}/\n${specLines}`;
|
|
2977
|
+
}).join("\n")}
|
|
2978
|
+
|
|
2979
|
+
## Task
|
|
2980
|
+
|
|
2981
|
+
Pick featureName and specName for the new test case. Follow the naming rules. Avoid colliding with any existing specName under the chosen feature.
|
|
2982
|
+
`;
|
|
2537
2983
|
}
|
|
2538
|
-
|
|
2539
|
-
|
|
2540
|
-
const traceSetupCommand = new Command("trace-setup").argument("<name>", "Setup name to trace (e.g. login)").description("Trace a setup procedure using dummy placeholder values").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").action(async (name, opts) => {
|
|
2541
|
-
await runTraceSetup(name, opts.model);
|
|
2542
|
-
});
|
|
2543
|
-
async function runTraceSetup(name, model) {
|
|
2544
|
-
header("trace-setup", name);
|
|
2545
|
-
try {
|
|
2546
|
-
meta("agent-browser", assertAgentBrowserAvailable());
|
|
2547
|
-
} catch (e) {
|
|
2548
|
-
if (e instanceof AgentBrowserUnavailableError) {
|
|
2549
|
-
error(formatAgentBrowserUnavailableMessage());
|
|
2550
|
-
process.exit(1);
|
|
2551
|
-
}
|
|
2552
|
-
throw e;
|
|
2553
|
-
}
|
|
2554
|
-
await ensureCcqaDir();
|
|
2555
|
-
const spec = parseSetupSpec(await readSetupSpecFile(name));
|
|
2556
|
-
const resolvedSpec = replacePlaceholdersWithDummies(spec);
|
|
2557
|
-
const secretsToScrub = buildSecretsToScrub(spec);
|
|
2558
|
-
meta("setup", spec.title);
|
|
2559
|
-
meta("steps", spec.steps.length);
|
|
2560
|
-
if (spec.placeholders) meta("placeholders", Object.keys(spec.placeholders).join(", "));
|
|
2561
|
-
blank();
|
|
2562
|
-
const systemPrompt = buildSetupTraceSystemPrompt(resolvedSpec);
|
|
2563
|
-
const prompt = buildSetupTracePrompt(resolvedSpec);
|
|
2564
|
-
info("Running agent-browser session...");
|
|
2565
|
-
blank();
|
|
2566
|
-
const routeSteps = [];
|
|
2567
|
-
let overallStatus = "passed";
|
|
2568
|
-
const traceActions = [];
|
|
2569
|
-
const { isError } = await invokeClaudeStreaming({
|
|
2570
|
-
prompt,
|
|
2571
|
-
systemPrompt,
|
|
2572
|
-
allowedTools: [
|
|
2573
|
-
"Bash(*)",
|
|
2574
|
-
"Read",
|
|
2575
|
-
"Grep",
|
|
2576
|
-
"Glob"
|
|
2577
|
-
],
|
|
2578
|
-
env: {
|
|
2579
|
-
PATH: pathWithAgentBrowserShim(process.env["PATH"]),
|
|
2580
|
-
ANTHROPIC_API_KEY: ""
|
|
2581
|
-
},
|
|
2582
|
-
model,
|
|
2583
|
-
onAbAction: (abAction) => {
|
|
2584
|
-
const action = parseAbAction(scrubSecrets(abAction, secretsToScrub));
|
|
2585
|
-
if (action) traceActions.push(action);
|
|
2586
|
-
},
|
|
2587
|
-
onAbActionFailed: () => {
|
|
2588
|
-
traceActions.pop();
|
|
2589
|
-
}
|
|
2590
|
-
}, (msg) => {
|
|
2591
|
-
if (msg.type !== "assistant") return;
|
|
2592
|
-
for (const block of msg.message.content ?? []) {
|
|
2593
|
-
if (block.type !== "text" || !block.text) continue;
|
|
2594
|
-
const text = block.text;
|
|
2595
|
-
const statusLine = parseStatusLine(text);
|
|
2596
|
-
if (statusLine) step(statusLine.type, statusLine.stepId, statusLine.detail);
|
|
2597
|
-
for (const line of text.split("\n")) {
|
|
2598
|
-
const trimmed = line.trim();
|
|
2599
|
-
if (trimmed.startsWith("ROUTE_STEP|")) {
|
|
2600
|
-
const routeStep = parseRouteStep(trimmed);
|
|
2601
|
-
if (routeStep) {
|
|
2602
|
-
routeSteps.push(routeStep);
|
|
2603
|
-
if (routeStep.status === "FAILED") overallStatus = "failed";
|
|
2604
|
-
}
|
|
2605
|
-
} else if (trimmed.startsWith("AB_ACTION|snapshot|") || trimmed.startsWith("AB_ACTION|assert|")) {
|
|
2606
|
-
const action = parseAbAction(scrubSecrets(trimmed, secretsToScrub));
|
|
2607
|
-
if (action) traceActions.push(action);
|
|
2608
|
-
}
|
|
2609
|
-
}
|
|
2610
|
-
}
|
|
2611
|
-
});
|
|
2612
|
-
if (isError) overallStatus = "failed";
|
|
2613
|
-
const route = {
|
|
2614
|
-
specName: name,
|
|
2615
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2616
|
-
status: overallStatus,
|
|
2617
|
-
steps: routeSteps
|
|
2618
|
-
};
|
|
2619
|
-
const [routePath, actionsPath] = await Promise.all([saveSetupRoute(name, route), saveSetupActions(name, traceActions)]);
|
|
2620
|
-
blank();
|
|
2621
|
-
meta("route", routePath);
|
|
2622
|
-
meta("saved", actionsPath);
|
|
2623
|
-
meta("actions", traceActions.length);
|
|
2624
|
-
meta("status", overallStatus.toUpperCase());
|
|
2625
|
-
hint(`run 'ccqa generate-setup ${name}' to generate and validate the setup`);
|
|
2626
|
-
}
|
|
2627
|
-
function replacePlaceholdersWithDummies(spec) {
|
|
2628
|
-
if (!spec.placeholders) return spec;
|
|
2629
|
-
const dummies = spec.placeholders;
|
|
2630
|
-
const resolve = (text) => {
|
|
2631
|
-
let result = text;
|
|
2632
|
-
for (const [key, def] of Object.entries(dummies)) result = result.replaceAll(`{{${key}}}`, resolveEnvRefs(def.dummy));
|
|
2633
|
-
return result;
|
|
2634
|
-
};
|
|
2635
|
-
return {
|
|
2636
|
-
...spec,
|
|
2637
|
-
steps: spec.steps.map((step) => ({
|
|
2638
|
-
...step,
|
|
2639
|
-
instruction: resolve(step.instruction),
|
|
2640
|
-
expected: resolve(step.expected)
|
|
2641
|
-
}))
|
|
2642
|
-
};
|
|
2643
|
-
}
|
|
2644
|
-
/**
|
|
2645
|
-
* Build the substitution map used to scrub real secret values out of
|
|
2646
|
-
* recorded actions before they are written to actions.json.
|
|
2647
|
-
*
|
|
2648
|
-
* For each placeholder whose dummy contains env refs, store
|
|
2649
|
-
* <resolved-value> -> <original ${VAR} string>
|
|
2650
|
-
* so that an `ab fill ... <secret>` line records the placeholder string
|
|
2651
|
-
* instead of the secret. Empty resolved values are skipped — they would
|
|
2652
|
-
* otherwise replace incidental empty strings in the recorded actions.
|
|
2653
|
-
*/
|
|
2654
|
-
function buildSecretsToScrub(spec) {
|
|
2655
|
-
const map = /* @__PURE__ */ new Map();
|
|
2656
|
-
if (!spec.placeholders) return map;
|
|
2657
|
-
const dummies = spec.placeholders;
|
|
2658
|
-
for (const def of Object.values(dummies)) {
|
|
2659
|
-
if (!hasEnvRef(def.dummy)) continue;
|
|
2660
|
-
const resolved = resolveEnvRefs(def.dummy);
|
|
2661
|
-
if (!resolved) continue;
|
|
2662
|
-
map.set(resolved, def.dummy);
|
|
2663
|
-
}
|
|
2664
|
-
return map;
|
|
2665
|
-
}
|
|
2666
|
-
/** Replace every occurrence of a recorded secret with its `${VAR}` placeholder. */
|
|
2667
|
-
function scrubSecrets(line, secrets) {
|
|
2668
|
-
if (secrets.size === 0) return line;
|
|
2669
|
-
let result = line;
|
|
2670
|
-
for (const [secret, placeholder] of secrets) {
|
|
2671
|
-
if (!result.includes(secret)) continue;
|
|
2672
|
-
result = result.split(secret).join(placeholder);
|
|
2673
|
-
}
|
|
2674
|
-
return result;
|
|
2675
|
-
}
|
|
2676
|
-
//#endregion
|
|
2677
|
-
//#region src/cli/generate-setup.ts
|
|
2678
|
-
const generateSetupCommand = new Command("generate-setup").argument("<name>", "Setup name to generate (e.g. login)").description("Clean up, validate, and templatize setup actions").option("--max-retries <n>", "Maximum number of auto-fix retries", "3").option("--from-dummy", "Resume from existing test.dummy.spec.ts (after manual fix)").option("--auto", "Apply auto-fixes without confirmation regardless of confidence (CI use)").option("--no-interactive", "Never prompt; only auto-apply when confidence is high, otherwise give up").option("--language <bcp47>", "Language for diagnose reasoning / hint text (e.g. 'en', 'ja')", "en").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").action(async (name, opts) => {
|
|
2679
|
-
const mode = resolveMode(opts);
|
|
2680
|
-
await runGenerateSetup(name, parseInt(opts.maxRetries, 10), opts.fromDummy ?? false, mode, opts.language ?? "en", opts.model);
|
|
2681
|
-
});
|
|
2682
|
-
async function runGenerateSetup(name, maxRetries, fromDummy, mode, outputLanguage, model) {
|
|
2683
|
-
header("generate-setup", name);
|
|
2684
|
-
await ensureCcqaDir();
|
|
2685
|
-
const specContent = await readSetupSpecFile(name);
|
|
2686
|
-
const spec = parseSetupSpec(specContent);
|
|
2687
|
-
const dummyPath = join(getSetupDir(name), "test.dummy.spec.ts");
|
|
2688
|
-
const finalPath = join(getSetupDir(name), "test.spec.ts");
|
|
2689
|
-
let cleanedActions = [];
|
|
2690
|
-
if (fromDummy) {
|
|
2691
|
-
if (!await stat(dummyPath).then(() => true).catch(() => false)) {
|
|
2692
|
-
warn(`test.dummy.spec.ts not found. Run without --from-dummy first.`);
|
|
2693
|
-
process.exit(1);
|
|
2694
|
-
}
|
|
2695
|
-
info("Resuming from existing test.dummy.spec.ts");
|
|
2696
|
-
} else {
|
|
2697
|
-
const { actions } = await getSetupActions(name);
|
|
2698
|
-
meta("setup", spec.title);
|
|
2699
|
-
meta("actions", actions.length);
|
|
2700
|
-
meta("fix-mode", mode);
|
|
2701
|
-
meta("language", outputLanguage);
|
|
2702
|
-
blank();
|
|
2703
|
-
cleanedActions = await cleanupActions(actions, model);
|
|
2704
|
-
if (cleanedActions.length !== actions.length) meta("cleaned", cleanedActions.length);
|
|
2705
|
-
await writeFile(dummyPath, actionsToScript(cleanedActions, spec.title), "utf-8");
|
|
2706
|
-
meta("saved", dummyPath);
|
|
2707
|
-
}
|
|
2708
|
-
blank();
|
|
2709
|
-
const agentBrowserSession = `ccqa-generate-setup-${name}-${Date.now()}`;
|
|
2710
|
-
const runVitestForSession = (path) => runVitestResolved(path, agentBrowserSession);
|
|
2711
|
-
await closeSession(agentBrowserSession);
|
|
2712
|
-
const signalHandler = () => {
|
|
2713
|
-
closeSession(agentBrowserSession).finally(() => process.exit(130));
|
|
2714
|
-
};
|
|
2715
|
-
process.once("SIGINT", signalHandler);
|
|
2716
|
-
process.once("SIGTERM", signalHandler);
|
|
2717
|
-
try {
|
|
2718
|
-
const initialRun = await timedPhase("vitest run #1", () => runVitestForSession(dummyPath), "run");
|
|
2719
|
-
let passed = initialRun.exitCode === 0;
|
|
2720
|
-
if (!passed) passed = await runAutoFixLoop({
|
|
2721
|
-
scriptPath: dummyPath,
|
|
2722
|
-
initialRun,
|
|
2723
|
-
specMarkdown: specContent,
|
|
2724
|
-
actions: cleanedActions,
|
|
2725
|
-
maxRetries,
|
|
2726
|
-
mode,
|
|
2727
|
-
runVitest: runVitestForSession,
|
|
2728
|
-
agentBrowserSession,
|
|
2729
|
-
outputLanguage,
|
|
2730
|
-
model
|
|
2731
|
-
});
|
|
2732
|
-
if (!passed) {
|
|
2733
|
-
warn("auto-fix exhausted; setup test still failing");
|
|
2734
|
-
hint(`edit ${dummyPath} manually, then run: ccqa generate-setup ${name} --from-dummy`);
|
|
2735
|
-
process.exit(1);
|
|
2736
|
-
}
|
|
2737
|
-
await writeFile(finalPath, reversePlaceholdersInScript(await readFile(dummyPath, "utf8"), spec.placeholders), "utf-8");
|
|
2738
|
-
await unlink(dummyPath).catch(() => {});
|
|
2739
|
-
blank();
|
|
2740
|
-
meta("saved", finalPath);
|
|
2741
|
-
hint(`setup '${name}' is ready; reference it in test-spec.md with setups: [{name: ${name}, params: {...}}]`);
|
|
2742
|
-
} finally {
|
|
2743
|
-
process.off("SIGINT", signalHandler);
|
|
2744
|
-
process.off("SIGTERM", signalHandler);
|
|
2745
|
-
await closeSession(agentBrowserSession);
|
|
2746
|
-
}
|
|
2747
|
-
}
|
|
2748
|
-
/**
|
|
2749
|
-
* Replace dummy values with {{placeholder}} directly in the test script text.
|
|
2750
|
-
* Longer dummy values are replaced first to avoid partial matches.
|
|
2751
|
-
*/
|
|
2752
|
-
function reversePlaceholdersInScript(script, placeholders) {
|
|
2753
|
-
if (!placeholders) return script;
|
|
2754
|
-
const entries = Object.entries(placeholders).sort((a, b) => b[1].dummy.length - a[1].dummy.length);
|
|
2755
|
-
let result = script;
|
|
2756
|
-
for (const [key, def] of entries) result = result.replaceAll(def.dummy, `{{${key}}}`);
|
|
2757
|
-
return result;
|
|
2758
|
-
}
|
|
2759
|
-
async function runVitest(scriptPath, agentBrowserSession) {
|
|
2760
|
-
const { exitCode, stdout, stderr } = await spawnVitestTeed([
|
|
2761
|
-
"run",
|
|
2762
|
-
"--config",
|
|
2763
|
-
bundledVitestConfigPath(),
|
|
2764
|
-
scriptPath
|
|
2765
|
-
], agentBrowserSession ? { env: {
|
|
2766
|
-
...process.env,
|
|
2767
|
-
AGENT_BROWSER_SESSION: agentBrowserSession
|
|
2768
|
-
} } : {});
|
|
2769
|
-
const currentScript = await readFile(scriptPath, "utf8");
|
|
2770
|
-
return {
|
|
2771
|
-
exitCode,
|
|
2772
|
-
output: stdout + stderr,
|
|
2773
|
-
currentScript
|
|
2774
|
-
};
|
|
2775
|
-
}
|
|
2776
|
-
/**
|
|
2777
|
-
* Run vitest on `test.dummy.spec.ts`, but transparently expand any `${VAR}`
|
|
2778
|
-
* env refs to real values for the duration of the run. The original file is
|
|
2779
|
-
* preserved unchanged so subsequent reverse-replace still sees the env-ref
|
|
2780
|
-
* literals. Auto-fix edits the original file (via writeFile in callers), so
|
|
2781
|
-
* we always re-read it before each invocation.
|
|
2782
|
-
*/
|
|
2783
|
-
async function runVitestResolved(scriptPath, agentBrowserSession) {
|
|
2784
|
-
const original = await readFile(scriptPath, "utf8");
|
|
2785
|
-
if (!hasEnvRef(original)) return runVitest(scriptPath, agentBrowserSession);
|
|
2786
|
-
const tmpPath = scriptPath.replace(/\.ts$/, ".__resolved.spec.ts");
|
|
2787
|
-
await writeFile(tmpPath, resolveEnvRefs(original), "utf-8");
|
|
2788
|
-
try {
|
|
2789
|
-
const { exitCode, stdout, stderr } = await spawnVitestTeed([
|
|
2790
|
-
"run",
|
|
2791
|
-
"--config",
|
|
2792
|
-
bundledVitestConfigPath(),
|
|
2793
|
-
tmpPath
|
|
2794
|
-
], agentBrowserSession ? { env: {
|
|
2795
|
-
...process.env,
|
|
2796
|
-
AGENT_BROWSER_SESSION: agentBrowserSession
|
|
2797
|
-
} } : {});
|
|
2798
|
-
return {
|
|
2799
|
-
exitCode,
|
|
2800
|
-
output: stdout + stderr,
|
|
2801
|
-
currentScript: original
|
|
2802
|
-
};
|
|
2803
|
-
} finally {
|
|
2804
|
-
await unlink(tmpPath).catch(() => {});
|
|
2805
|
-
}
|
|
2806
|
-
}
|
|
2807
|
-
async function cleanupActions(actions, model) {
|
|
2808
|
-
try {
|
|
2809
|
-
const { result, isError } = await invokeClaudeStreaming({
|
|
2810
|
-
prompt: buildCleanupPrompt(actions),
|
|
2811
|
-
disableBuiltinTools: true,
|
|
2812
|
-
maxTurns: 1,
|
|
2813
|
-
model
|
|
2814
|
-
}, () => {});
|
|
2815
|
-
if (isError || !result) return actions;
|
|
2816
|
-
const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
|
|
2817
|
-
const parsed = JSON.parse(json);
|
|
2818
|
-
if (Array.isArray(parsed) && parsed.length > 0) return parsed;
|
|
2819
|
-
} catch {}
|
|
2820
|
-
return actions;
|
|
2821
|
-
}
|
|
2822
|
-
//#endregion
|
|
2823
|
-
//#region src/prompts/draft.ts
|
|
2824
|
-
function buildNamingSystemPrompt() {
|
|
2825
|
-
return `You name a new ccqa test case based on the user's intent and the existing feature tree.
|
|
2826
|
-
|
|
2827
|
-
ccqa test cases live under \`.ccqa/features/<featureName>/test-cases/<specName>/test-spec.md\`.
|
|
2828
|
-
|
|
2829
|
-
## Naming rules
|
|
2984
|
+
function buildDraftSystemPrompt(blocks) {
|
|
2985
|
+
return `You are a QA engineer drafting and refining a ccqa spec.yaml.
|
|
2830
2986
|
|
|
2831
|
-
|
|
2832
|
-
- featureName: a broad area (e.g. "tasks", "auth", "billing", "search").
|
|
2833
|
-
- specName: a short scenario name (e.g. "create-and-complete", "login-with-email", "search-by-tag").
|
|
2834
|
-
- Reuse existing featureName when the user's intent fits an existing area. Only invent a new featureName when the existing tree clearly does not cover the area.
|
|
2835
|
-
- specName must NOT collide with an existing spec under the chosen feature. If the natural name collides, pick a different one that distinguishes the new scenario from the existing ones.
|
|
2836
|
-
- Use the codebase (Read/Grep/Glob) sparingly to confirm domain vocabulary if helpful. Do not over-explore.
|
|
2837
|
-
|
|
2838
|
-
## Output (STRICT)
|
|
2839
|
-
|
|
2840
|
-
Output ONE fenced \`\`\`json block, nothing else outside it:
|
|
2841
|
-
|
|
2842
|
-
{
|
|
2843
|
-
"featureName": "<kebab-case>",
|
|
2844
|
-
"specName": "<kebab-case>",
|
|
2845
|
-
"reason": "<one short sentence: why this name and how it relates to existing specs>"
|
|
2846
|
-
}
|
|
2847
|
-
`;
|
|
2848
|
-
}
|
|
2849
|
-
function buildNamingPrompt(intent, tree) {
|
|
2850
|
-
return `## User intent
|
|
2987
|
+
The CLI runs you in a loop: each turn the user gives an intent (first run) or a refinement instruction (later runs). You read the codebase, validate the spec, and return a single JSON report. The CLI displays a diff and asks the user whether to apply.
|
|
2851
2988
|
|
|
2852
|
-
|
|
2989
|
+
## spec.yaml format (STRICT)
|
|
2853
2990
|
|
|
2854
|
-
|
|
2991
|
+
Pure YAML — no markdown body, no frontmatter dashes.
|
|
2855
2992
|
|
|
2856
|
-
|
|
2857
|
-
|
|
2858
|
-
|
|
2859
|
-
|
|
2993
|
+
Top-level fields:
|
|
2994
|
+
- \`title\`: string (required) — short human-readable name for the test
|
|
2995
|
+
- \`relatedPaths\`: array of glob string (optional) — source files this spec depends on, used by \`ccqa drift --changed\`
|
|
2996
|
+
- \`steps\`: array (required, at least one)
|
|
2860
2997
|
|
|
2861
|
-
|
|
2998
|
+
A step is one of two shapes:
|
|
2862
2999
|
|
|
2863
|
-
|
|
2864
|
-
|
|
2865
|
-
}
|
|
2866
|
-
|
|
2867
|
-
|
|
2868
|
-
|
|
2869
|
-
The CLI runs you in a loop: each turn the user gives an intent (first run) or a refinement instruction (later runs). You read the codebase, validate the spec, and return a single JSON report. The CLI displays a diff and asks the user whether to apply.
|
|
2870
|
-
|
|
2871
|
-
## test-spec.md format (STRICT)
|
|
3000
|
+
**Action step** — a user-facing browser interaction:
|
|
3001
|
+
\`\`\`yaml
|
|
3002
|
+
- instruction: <imperative; include the URL directly or via \${ENV_VAR}>
|
|
3003
|
+
expected: <observable outcome — visible text, URL pattern, element state>
|
|
3004
|
+
\`\`\`
|
|
2872
3005
|
|
|
2873
|
-
|
|
3006
|
+
**Include step** — invoke a reusable block from \`.ccqa/blocks/<name>/spec.yaml\`:
|
|
3007
|
+
\`\`\`yaml
|
|
3008
|
+
- include: <block-name>
|
|
3009
|
+
params:
|
|
3010
|
+
<param-name>: <string value, can use \${ENV_VAR}>
|
|
3011
|
+
\`\`\`
|
|
2874
3012
|
|
|
2875
|
-
|
|
2876
|
-
- title: string (required)
|
|
2877
|
-
- baseUrl: string (required, e.g. http://localhost:3000)
|
|
2878
|
-
- prerequisites: string (optional, free text)
|
|
2879
|
-
- setups: array of { name: string, params?: Record<string,string> } (optional)
|
|
3013
|
+
## URLs
|
|
2880
3014
|
|
|
2881
|
-
|
|
3015
|
+
Each step writes the URL it opens directly inside \`instruction\` (e.g. \`"\${APP_URL}/articles を開く"\`). Use \`\${ENV_VAR}\` references for environment-specific values.
|
|
2882
3016
|
|
|
2883
|
-
|
|
2884
|
-
### Step 1: <short title>
|
|
2885
|
-
- **Instruction**: <imperative, one sentence>
|
|
2886
|
-
- **Expected**: <observable outcome>
|
|
3017
|
+
## Available blocks
|
|
2887
3018
|
|
|
2888
|
-
|
|
2889
|
-
...
|
|
2890
|
-
\`\`\`
|
|
3019
|
+
${formatBlockList(blocks)}
|
|
2891
3020
|
|
|
2892
3021
|
## Quality rules
|
|
2893
3022
|
|
|
2894
3023
|
- One user-facing action per step (login, click, fill, navigate, ...).
|
|
2895
|
-
-
|
|
2896
|
-
- Forbidden in
|
|
3024
|
+
- \`expected\` must be assertion-friendly: visible text, URL pattern, element state.
|
|
3025
|
+
- Forbidden in \`expected\`: timestamps, exact counts, session IDs, internal state.
|
|
2897
3026
|
- 3–8 steps is typical. Fewer means too coarse; more means too fine.
|
|
2898
3027
|
|
|
2899
3028
|
## Workflow (use Read / Grep / Glob extensively)
|
|
2900
3029
|
|
|
2901
|
-
1. Read the codebase under cwd to find concrete strings: routes, button labels, aria-labels, page titles, placeholders. Use those exact strings in
|
|
2902
|
-
2. If
|
|
2903
|
-
3.
|
|
2904
|
-
|
|
2905
|
-
- **
|
|
3030
|
+
1. Read the codebase under cwd to find concrete strings: routes, button labels, aria-labels, page titles, placeholders. Use those exact strings in \`expected\`.
|
|
3031
|
+
2. If you use \`include:\` steps, verify each \`params\` key matches a declared param of the block (see the Available blocks list above).
|
|
3032
|
+
3. Populate \`relatedPaths\` with **provisional** glob patterns pointing at the source files this spec touches: the route/page file for each URL the spec visits, plus the component files (or their parent feature directory) that render the aria-labels, placeholders, or visible texts the spec asserts on. Prefer directory globs (e.g. \`src/features/tasks/**\`) when several files in one area are involved. Be conservative — include a path if you're unsure rather than omit it. \`ccqa trace\` will refine this list later from real browser observations.
|
|
3033
|
+
4. Validate the (current or proposed) spec on four axes — emit one issue per finding:
|
|
3034
|
+
- **assertable**: each \`expected\` can be verified against a string/URL/state that exists in code.
|
|
3035
|
+
- **blocks**: every \`include\` resolves to a real block; every \`params\` key is declared on that block; every required param is provided.
|
|
2906
3036
|
- **granularity**: not too coarse (multiple actions per step) nor too fine (snapshot-only steps); order is logical.
|
|
2907
3037
|
- **unimplemented**: any feature mentioned in the spec that you cannot find in code.
|
|
2908
3038
|
|
|
@@ -2917,13 +3047,13 @@ Schema:
|
|
|
2917
3047
|
"issues": [
|
|
2918
3048
|
{
|
|
2919
3049
|
"severity": "OK" | "WARN" | "ERROR",
|
|
2920
|
-
"category": "assertable" | "
|
|
3050
|
+
"category": "assertable" | "blocks" | "granularity" | "unimplemented",
|
|
2921
3051
|
"stepId": "step-01" | null,
|
|
2922
3052
|
"message": "<one-line summary>",
|
|
2923
3053
|
"detail": "<optional, multiline explanation>"
|
|
2924
3054
|
}
|
|
2925
3055
|
],
|
|
2926
|
-
"patch": "<COMPLETE rewritten
|
|
3056
|
+
"patch": "<COMPLETE rewritten spec.yaml, or empty string if no changes>"
|
|
2927
3057
|
}
|
|
2928
3058
|
\`\`\`
|
|
2929
3059
|
|
|
@@ -2931,123 +3061,640 @@ Schema:
|
|
|
2931
3061
|
|
|
2932
3062
|
- \`patch\` must be the COMPLETE file content if non-empty (never a diff fragment).
|
|
2933
3063
|
- The CLI replaces the file atomically with \`patch\`.
|
|
3064
|
+
- The patch must be valid YAML matching the schema above. The CLI re-parses it before applying; if it fails validation, the patch is rejected.
|
|
2934
3065
|
- For **create** mode: produce a fresh spec from the user intent.
|
|
2935
3066
|
- For **refine** mode with a non-empty user instruction: apply the user's request, plus fix any issues it introduces. Preserve the user's wording elsewhere.
|
|
2936
3067
|
- For **refine** mode with an empty user instruction: only fix issues you find against the current spec; if everything is fine, return \`patch: ""\`.
|
|
2937
3068
|
- If \`patch\` is the same as the current spec, return \`patch: ""\` instead.
|
|
2938
3069
|
`;
|
|
2939
3070
|
}
|
|
2940
|
-
function
|
|
2941
|
-
|
|
2942
|
-
|
|
2943
|
-
|
|
2944
|
-
|
|
2945
|
-
|
|
2946
|
-
|
|
2947
|
-
|
|
2948
|
-
|
|
2949
|
-
|
|
2950
|
-
|
|
2951
|
-
|
|
2952
|
-
|
|
2953
|
-
|
|
2954
|
-
|
|
2955
|
-
|
|
2956
|
-
|
|
2957
|
-
|
|
2958
|
-
|
|
2959
|
-
|
|
2960
|
-
|
|
2961
|
-
|
|
2962
|
-
|
|
2963
|
-
|
|
2964
|
-
|
|
2965
|
-
|
|
2966
|
-
|
|
2967
|
-
|
|
2968
|
-
|
|
2969
|
-
|
|
2970
|
-
|
|
3071
|
+
function formatBlockList(blocks) {
|
|
3072
|
+
if (blocks.length === 0) return "(no blocks defined yet — only action steps are available.)";
|
|
3073
|
+
return blocks.map((b) => {
|
|
3074
|
+
const paramLines = b.params.length === 0 ? " params: (none)" : b.params.map((p) => ` - ${p.name}${p.required ? "" : " (optional)"}${p.secret ? " [secret]" : ""}`).join("\n");
|
|
3075
|
+
return `- \`${b.name}\` — ${b.title}\n${paramLines}`;
|
|
3076
|
+
}).join("\n");
|
|
3077
|
+
}
|
|
3078
|
+
function buildDraftPrompt(input) {
|
|
3079
|
+
const { mode, existing, userInput } = input;
|
|
3080
|
+
if (mode === "create") return `## Mode
|
|
3081
|
+
|
|
3082
|
+
create — no spec exists yet at the target path. Produce a fresh spec.yaml.
|
|
3083
|
+
|
|
3084
|
+
## User intent
|
|
3085
|
+
|
|
3086
|
+
${userInput}
|
|
3087
|
+
|
|
3088
|
+
## Task
|
|
3089
|
+
|
|
3090
|
+
Read the codebase under cwd. Discover concrete strings (routes, labels, titles). Produce a complete spec.yaml as the \`patch\` field, plus any issues you'd flag about your own draft.
|
|
3091
|
+
`;
|
|
3092
|
+
return `## Mode
|
|
3093
|
+
|
|
3094
|
+
refine — a spec already exists. Apply the user's instruction (if any) and validate against the codebase.
|
|
3095
|
+
|
|
3096
|
+
## Current spec
|
|
3097
|
+
|
|
3098
|
+
\`\`\`yaml
|
|
3099
|
+
${existing}\`\`\`
|
|
3100
|
+
|
|
3101
|
+
${userInput ? `## User refinement instruction\n\n${userInput}\n` : `## User refinement instruction\n\n(empty — re-validate the current spec against the codebase; only emit a non-empty patch if something is actually wrong)\n`}## Task
|
|
3102
|
+
|
|
3103
|
+
1. Read the codebase under cwd and any referenced blocks (\`.ccqa/blocks/<name>/spec.yaml\`).
|
|
3104
|
+
2. If the user's instruction is non-empty, apply it to the spec.
|
|
3105
|
+
3. Validate the resulting spec on the four axes. Emit issues.
|
|
3106
|
+
4. Return the complete updated spec as \`patch\`. If no changes are needed, return \`patch: ""\`.
|
|
3107
|
+
`;
|
|
3108
|
+
}
|
|
3109
|
+
//#endregion
|
|
3110
|
+
//#region src/prompts/drift.ts
|
|
3111
|
+
function buildDriftSystemPrompt(blocks) {
|
|
3112
|
+
return `${buildDraftSystemPrompt(blocks)}
|
|
3113
|
+
|
|
3114
|
+
## Drift mode
|
|
3115
|
+
|
|
3116
|
+
You are running non-interactively in CI. The user will not see or apply the patch — only the \`issues\` array.
|
|
3117
|
+
|
|
3118
|
+
- Always set \`patch\` to "" in your response.
|
|
3119
|
+
- Focus issue messages on what is **out of sync** between the spec and the current codebase: missing aria-labels, renamed routes, removed buttons, placeholders that no longer exist, include references that point to non-existent blocks.
|
|
3120
|
+
- Do NOT raise issues about stylistic preferences in the spec wording.
|
|
3121
|
+
- Treat \`category: unimplemented\` as the primary signal for drift: anything the spec asserts that you cannot find in code is a drift finding.
|
|
3122
|
+
|
|
3123
|
+
## Drift severity policy (STRICT)
|
|
3124
|
+
|
|
3125
|
+
The CLI exits non-zero when any issue has \`severity: "ERROR"\` (default) or — with \`--severity warn\` — when any \`WARN\` is present. Pick severity by **whether a deterministic replay of this spec would fail today**, not by how confident you are in your own analysis.
|
|
3126
|
+
|
|
3127
|
+
### CRITICAL: spec ↔ source mismatch is ERROR, not "vague phrasing" WARN
|
|
3128
|
+
|
|
3129
|
+
The most common false negative is treating a concrete spec/source mismatch as a WARN about "expected phrasing." It is not. Apply this decision rule **before** picking severity:
|
|
3130
|
+
|
|
3131
|
+
1. **Pick the concrete strings the spec asserts** in each step's \`expected\` (visible text, aria-labels, button labels, route paths). For \`expected\` like "the Dashboard page is visible", the spec is asserting that the literal string "Dashboard" — or the page conceptually identified by that label — is rendered.
|
|
3132
|
+
2. **Search the source** for those exact strings (\`Grep\` / \`Read\`) at the location the step references (the relevant page/component/route).
|
|
3133
|
+
3. Classify:
|
|
3134
|
+
- **ERROR** — the source instead renders a *different* string in that location (e.g. spec says "Dashboard", the breadcrumb in \`DashboardPage.tsx\` now renders "Overview"). A replay against the current source would fail; a replay against a stale staging environment would pass and *hide* the drift — exactly the case drift CI exists to catch. Cite both sides in \`detail\`: the spec line and the file:line of the source mismatch.
|
|
3135
|
+
- **WARN (vague phrasing)** — the source's actual string IS present somewhere relevant; the \`expected\` just paraphrases it more loosely (e.g. spec says "the Save button is visible" and the source has both visible "Save" text and \`aria-label="Save"\`). Replay still passes; the spec could just be tightened.
|
|
3136
|
+
- **OK** — the spec's exact string appears in source at the relevant location.
|
|
3137
|
+
|
|
3138
|
+
Use **ERROR** when the spec would break on replay:
|
|
3139
|
+
- A selector the spec relies on (\`aria-label\`, \`placeholder\`, \`data-testid\`, button text) **does not exist anywhere in the source**.
|
|
3140
|
+
- A URL / route the spec navigates to is no longer defined.
|
|
3141
|
+
- An \`expected\` asserts a string or visible text that is no longer rendered by the relevant component.
|
|
3142
|
+
- The source renders a *different* string in the place the spec describes (per the decision rule above).
|
|
3143
|
+
- An \`include\` step references a block that does not exist under \`.ccqa/blocks/<name>/spec.yaml\`, or a \`params\` key is not declared on that block.
|
|
3144
|
+
- The spec references a feature/page that has been removed from the codebase.
|
|
3145
|
+
|
|
3146
|
+
Use **WARN** when the spec is still likely to work, but quality could improve:
|
|
3147
|
+
- The \`expected\` paraphrases a string that **still exists** in source (the literal target is findable, just imprecisely worded).
|
|
3148
|
+
- A step bundles multiple actions, or a needed intermediate verification step is missing.
|
|
3149
|
+
- Stable signals exist that the spec could leverage but currently doesn't.
|
|
3150
|
+
- You are unsure whether a referenced string exists (give the user the benefit of the doubt; do not hard-fail CI on uncertainty).
|
|
3151
|
+
|
|
3152
|
+
Use **OK** for axes you actively verified and found no issue.
|
|
3153
|
+
|
|
3154
|
+
If you cannot decide between ERROR and WARN, choose WARN. Reserve ERROR for findings you can back up with a specific file path or grep result that proves the drift.
|
|
3155
|
+
|
|
3156
|
+
Conversely: when you DO have a citation showing a concrete spec/source mismatch (per the decision rule above), you MUST use ERROR — "vague phrasing" WARN is not a safe fallback for an actual drift.
|
|
3157
|
+
`;
|
|
3158
|
+
}
|
|
3159
|
+
function buildDriftUserPrompt(existing) {
|
|
3160
|
+
return buildDraftPrompt({
|
|
3161
|
+
mode: "refine",
|
|
3162
|
+
existing,
|
|
3163
|
+
userInput: ""
|
|
3164
|
+
});
|
|
3165
|
+
}
|
|
3166
|
+
//#endregion
|
|
3167
|
+
//#region src/types.ts
|
|
3168
|
+
const RouteStepSchema = z.object({
|
|
3169
|
+
title: z.string(),
|
|
3170
|
+
action: z.string(),
|
|
3171
|
+
observation: z.string(),
|
|
3172
|
+
status: z.enum([
|
|
3173
|
+
"PASSED",
|
|
3174
|
+
"FAILED",
|
|
3175
|
+
"SKIPPED"
|
|
3176
|
+
]),
|
|
3177
|
+
reason: z.string().optional()
|
|
3178
|
+
});
|
|
3179
|
+
z.object({
|
|
3180
|
+
specName: z.string(),
|
|
3181
|
+
timestamp: z.string(),
|
|
3182
|
+
status: z.enum(["passed", "failed"]),
|
|
3183
|
+
steps: z.array(RouteStepSchema)
|
|
3184
|
+
});
|
|
3185
|
+
const DraftIssueSchema = z.object({
|
|
3186
|
+
severity: z.enum([
|
|
3187
|
+
"OK",
|
|
3188
|
+
"WARN",
|
|
3189
|
+
"ERROR"
|
|
3190
|
+
]),
|
|
3191
|
+
category: z.enum([
|
|
3192
|
+
"assertable",
|
|
3193
|
+
"blocks",
|
|
3194
|
+
"granularity",
|
|
3195
|
+
"unimplemented"
|
|
3196
|
+
]),
|
|
3197
|
+
stepId: z.string().nullable(),
|
|
3198
|
+
message: z.string(),
|
|
3199
|
+
detail: z.string().optional()
|
|
3200
|
+
});
|
|
3201
|
+
const DraftReportSchema = z.object({
|
|
3202
|
+
issues: z.array(DraftIssueSchema),
|
|
3203
|
+
patch: z.string()
|
|
3204
|
+
});
|
|
3205
|
+
const DRAFT_CATEGORY_LABEL = {
|
|
3206
|
+
assertable: "Assertability",
|
|
3207
|
+
blocks: "Block references",
|
|
3208
|
+
granularity: "Step granularity",
|
|
3209
|
+
unimplemented: "Unimplemented checks"
|
|
3210
|
+
};
|
|
3211
|
+
const DraftNamingSchema = z.object({
|
|
3212
|
+
featureName: z.string().min(1),
|
|
3213
|
+
specName: z.string().min(1),
|
|
3214
|
+
reason: z.string().optional()
|
|
3215
|
+
});
|
|
3216
|
+
//#endregion
|
|
3217
|
+
//#region src/drift/analyze.ts
|
|
3218
|
+
const DEFAULT_CONCURRENCY$1 = 3;
|
|
3219
|
+
/**
|
|
3220
|
+
* Run drift checks against a list of pre-collected targets. Pure library
|
|
3221
|
+
* function: no commander, no process.exit, no stdout writes. Callers handle
|
|
3222
|
+
* presentation. `cli/drift` does the full sweep with `--changed` scoping;
|
|
3223
|
+
* `cli/run` calls this with just the failing specs after vitest.
|
|
3224
|
+
*/
|
|
3225
|
+
async function analyzeDrift(input) {
|
|
3226
|
+
const { targets, cwd, blocks, concurrency = DEFAULT_CONCURRENCY$1, model, onSpecStart } = input;
|
|
3227
|
+
const results = new Array(targets.length);
|
|
3228
|
+
let cursor = 0;
|
|
3229
|
+
const worker = async () => {
|
|
3230
|
+
while (true) {
|
|
3231
|
+
const idx = cursor++;
|
|
3232
|
+
if (idx >= targets.length) return;
|
|
3233
|
+
const target = targets[idx];
|
|
3234
|
+
onSpecStart?.(target);
|
|
3235
|
+
results[idx] = await checkSpec(target, {
|
|
3236
|
+
cwd,
|
|
3237
|
+
blocks,
|
|
3238
|
+
model
|
|
3239
|
+
});
|
|
3240
|
+
}
|
|
3241
|
+
};
|
|
3242
|
+
const pool = Array.from({ length: Math.min(concurrency, targets.length) }, () => worker());
|
|
3243
|
+
await Promise.all(pool);
|
|
3244
|
+
return results;
|
|
3245
|
+
}
|
|
3246
|
+
async function checkSpec(target, opts) {
|
|
3247
|
+
const { featureName, specName } = target;
|
|
3248
|
+
const existing = await tryReadSpecFile(featureName, specName, opts.cwd);
|
|
3249
|
+
if (existing === null) return {
|
|
3250
|
+
target,
|
|
3251
|
+
ok: false,
|
|
3252
|
+
issues: [],
|
|
3253
|
+
error: `spec file disappeared after enumeration: ${featureName}/${specName}`
|
|
3254
|
+
};
|
|
3255
|
+
const { result, isError } = await invokeClaudeStreaming({
|
|
3256
|
+
prompt: buildDriftUserPrompt(existing),
|
|
3257
|
+
systemPrompt: buildDriftSystemPrompt(opts.blocks),
|
|
3258
|
+
allowedTools: [
|
|
3259
|
+
"Read",
|
|
3260
|
+
"Grep",
|
|
3261
|
+
"Glob"
|
|
3262
|
+
],
|
|
3263
|
+
silenceBashLog: true,
|
|
3264
|
+
cwd: opts.cwd,
|
|
3265
|
+
...opts.model ? { model: opts.model } : {}
|
|
3266
|
+
}, (_msg) => {});
|
|
3267
|
+
if (isError) return {
|
|
3268
|
+
target,
|
|
3269
|
+
ok: false,
|
|
3270
|
+
issues: [],
|
|
3271
|
+
error: "Claude returned an error result"
|
|
3272
|
+
};
|
|
3273
|
+
const json = extractJsonBlock(result);
|
|
3274
|
+
if (!json) return {
|
|
3275
|
+
target,
|
|
3276
|
+
ok: false,
|
|
3277
|
+
issues: [],
|
|
3278
|
+
error: "Claude did not return a json block"
|
|
3279
|
+
};
|
|
3280
|
+
let report;
|
|
3281
|
+
try {
|
|
3282
|
+
report = DraftReportSchema.parse(JSON.parse(json));
|
|
3283
|
+
} catch (e) {
|
|
3284
|
+
return {
|
|
3285
|
+
target,
|
|
3286
|
+
ok: false,
|
|
3287
|
+
issues: [],
|
|
3288
|
+
error: `failed to parse drift report: ${e.message}`
|
|
3289
|
+
};
|
|
3290
|
+
}
|
|
3291
|
+
return {
|
|
3292
|
+
target,
|
|
3293
|
+
ok: true,
|
|
3294
|
+
issues: report.issues
|
|
3295
|
+
};
|
|
3296
|
+
}
|
|
3297
|
+
//#endregion
|
|
3298
|
+
//#region src/drift/format.ts
|
|
3299
|
+
/**
|
|
3300
|
+
* Render drift results as a string. The CLI commands and the `run` failure
|
|
3301
|
+
* hook are the only callers; both want the formatted output returned so
|
|
3302
|
+
* they can prefix / interleave / pipe it as needed.
|
|
3303
|
+
*/
|
|
3304
|
+
function renderDrift(results, format, cwd) {
|
|
3305
|
+
if (format === "json") return renderJson(results);
|
|
3306
|
+
if (format === "github") return renderGithub(results, cwd);
|
|
3307
|
+
return renderText(results);
|
|
3308
|
+
}
|
|
3309
|
+
const HEAVY_RULE = "═".repeat(72);
|
|
3310
|
+
function renderText(results) {
|
|
3311
|
+
const out = [];
|
|
3312
|
+
for (const r of results) {
|
|
3313
|
+
out.push("");
|
|
3314
|
+
const heading = `══ ${r.target.featureName}/${r.target.specName} `;
|
|
3315
|
+
const tail = "═".repeat(Math.max(3, 72 - heading.length));
|
|
3316
|
+
out.push(`${heading}${tail}`);
|
|
3317
|
+
if (r.error) {
|
|
3318
|
+
out.push(` ERROR ${r.error}`);
|
|
3319
|
+
continue;
|
|
3320
|
+
}
|
|
3321
|
+
const errors = r.issues.filter((i) => i.severity === "ERROR");
|
|
3322
|
+
const warnings = r.issues.filter((i) => i.severity === "WARN");
|
|
3323
|
+
const passed = r.issues.filter((i) => i.severity === "OK");
|
|
3324
|
+
if (errors.length === 0 && warnings.length === 0) {
|
|
3325
|
+
const label = passed.length === 1 ? "check" : "checks";
|
|
3326
|
+
const detail = passed.length > 0 ? `all ${passed.length} ${label} passed` : "no issues";
|
|
3327
|
+
out.push(` ✓ ${detail}`);
|
|
3328
|
+
continue;
|
|
3329
|
+
}
|
|
3330
|
+
for (const issue of errors) appendFinding(out, "ERROR", issue);
|
|
3331
|
+
for (const issue of warnings) appendFinding(out, "WARN", issue);
|
|
3332
|
+
if (passed.length > 0) {
|
|
3333
|
+
const names = passed.map((i) => DRAFT_CATEGORY_LABEL[i.category]).join(", ");
|
|
3334
|
+
out.push("");
|
|
3335
|
+
out.push(` ✓ passed (${passed.length}): ${names}`);
|
|
3336
|
+
}
|
|
3337
|
+
}
|
|
3338
|
+
out.push("");
|
|
3339
|
+
out.push(HEAVY_RULE);
|
|
3340
|
+
const totals = summarize(results);
|
|
3341
|
+
out.push(` specs ${results.length} (${totals.errored} errored)`);
|
|
3342
|
+
out.push(` findings ${totals.error} error, ${totals.warn} warn, ${totals.ok} ok`);
|
|
3343
|
+
out.push("");
|
|
3344
|
+
return out.join("\n");
|
|
3345
|
+
}
|
|
3346
|
+
function appendFinding(out, level, issue) {
|
|
3347
|
+
const stepPart = issue.stepId ? ` ${issue.stepId}` : "";
|
|
3348
|
+
out.push("");
|
|
3349
|
+
out.push(` ${level} ${DRAFT_CATEGORY_LABEL[issue.category]}${stepPart}`);
|
|
3350
|
+
out.push(` ${issue.message}`);
|
|
3351
|
+
if (issue.detail) out.push(` └ ${issue.detail.replace(/\n/g, "\n ")}`);
|
|
3352
|
+
}
|
|
3353
|
+
function renderJson(results) {
|
|
3354
|
+
const payload = { specs: results.map((r) => ({
|
|
3355
|
+
feature: r.target.featureName,
|
|
3356
|
+
spec: r.target.specName,
|
|
3357
|
+
ok: r.ok,
|
|
3358
|
+
...r.error ? { error: r.error } : {},
|
|
3359
|
+
issues: r.issues.map((i) => ({
|
|
3360
|
+
severity: i.severity,
|
|
3361
|
+
category: i.category,
|
|
3362
|
+
stepId: i.stepId,
|
|
3363
|
+
message: i.message,
|
|
3364
|
+
...i.detail ? { detail: i.detail } : {}
|
|
3365
|
+
}))
|
|
3366
|
+
})) };
|
|
3367
|
+
return `${JSON.stringify(payload, null, 2)}\n`;
|
|
3368
|
+
}
|
|
3369
|
+
function renderGithub(results, cwd) {
|
|
3370
|
+
const repoRoot = process.env["GITHUB_WORKSPACE"] ?? process.cwd();
|
|
3371
|
+
const lines = [];
|
|
3372
|
+
for (const r of results) {
|
|
3373
|
+
const file = githubRelPath(cwd, repoRoot, r.target.featureName, r.target.specName);
|
|
3374
|
+
if (r.error) {
|
|
3375
|
+
lines.push(`::error file=${file}::${escapeGhMessage(r.error)}`);
|
|
3376
|
+
continue;
|
|
3377
|
+
}
|
|
3378
|
+
for (const issue of r.issues) {
|
|
3379
|
+
if (issue.severity === "OK") continue;
|
|
3380
|
+
const level = issue.severity === "ERROR" ? "error" : "warning";
|
|
3381
|
+
const title = `${r.target.featureName}/${r.target.specName} — ${issue.category}${issue.stepId ? ` (${issue.stepId})` : ""}`;
|
|
3382
|
+
const body = issue.detail ? `${issue.message}\n${issue.detail}` : issue.message;
|
|
3383
|
+
lines.push(`::${level} file=${file},title=${escapeGhProp(title)}::${escapeGhMessage(body)}`);
|
|
3384
|
+
}
|
|
3385
|
+
}
|
|
3386
|
+
return lines.length === 0 ? "" : `${lines.join("\n")}\n`;
|
|
3387
|
+
}
|
|
3388
|
+
function githubRelPath(cwd, repoRoot, featureName, specName) {
|
|
3389
|
+
const abs = resolve(cwd, ".ccqa", "features", featureName, "test-cases", specName, "spec.yaml");
|
|
3390
|
+
const rel = relative(repoRoot, abs);
|
|
3391
|
+
return rel.startsWith("..") ? abs : rel;
|
|
3392
|
+
}
|
|
3393
|
+
function escapeGhMessage(s) {
|
|
3394
|
+
return s.replace(/%/g, "%25").replace(/\r/g, "%0D").replace(/\n/g, "%0A");
|
|
3395
|
+
}
|
|
3396
|
+
function escapeGhProp(s) {
|
|
3397
|
+
return s.replace(/%/g, "%25").replace(/\r/g, "%0D").replace(/\n/g, "%0A").replace(/,/g, "%2C").replace(/:/g, "%3A");
|
|
3398
|
+
}
|
|
3399
|
+
function summarize(results) {
|
|
3400
|
+
let error = 0;
|
|
3401
|
+
let warn = 0;
|
|
3402
|
+
let ok = 0;
|
|
3403
|
+
let errored = 0;
|
|
3404
|
+
for (const r of results) {
|
|
3405
|
+
if (r.error) errored++;
|
|
3406
|
+
for (const issue of r.issues) if (issue.severity === "ERROR") error++;
|
|
3407
|
+
else if (issue.severity === "WARN") warn++;
|
|
3408
|
+
else ok++;
|
|
3409
|
+
}
|
|
3410
|
+
return {
|
|
3411
|
+
error,
|
|
3412
|
+
warn,
|
|
3413
|
+
ok,
|
|
3414
|
+
errored
|
|
3415
|
+
};
|
|
3416
|
+
}
|
|
3417
|
+
//#endregion
|
|
3418
|
+
//#region src/drift/exit-code.ts
|
|
3419
|
+
/**
|
|
3420
|
+
* Map drift results to an exit code. Spec-level errors (Claude call failed)
|
|
3421
|
+
* always fail; otherwise ERROR severity always fails, WARN fails only when
|
|
3422
|
+
* the threshold is `warn`.
|
|
3423
|
+
*/
|
|
3424
|
+
function determineExitCode(results, threshold) {
|
|
3425
|
+
for (const r of results) {
|
|
3426
|
+
if (r.error) return 1;
|
|
3427
|
+
for (const issue of r.issues) {
|
|
3428
|
+
if (issue.severity === "ERROR") return 1;
|
|
3429
|
+
if (threshold === "warn" && issue.severity === "WARN") return 1;
|
|
3430
|
+
}
|
|
3431
|
+
}
|
|
3432
|
+
return 0;
|
|
3433
|
+
}
|
|
3434
|
+
//#endregion
|
|
3435
|
+
//#region src/drift/auth.ts
|
|
3436
|
+
/**
|
|
3437
|
+
* Probe whether the host has any credential the Anthropic SDK can pick up:
|
|
3438
|
+
* 1. ANTHROPIC_API_KEY env var (CI / scripted use)
|
|
3439
|
+
* 2. ~/.claude/.credentials.json (local Claude Code login)
|
|
3440
|
+
*
|
|
3441
|
+
* `run --drift` is opt-in, so the caller will only consult this after the
|
|
3442
|
+
* user has asked for drift. We never throw — auth absence is a normal flow
|
|
3443
|
+
* that surfaces as "drift analysis skipped".
|
|
3444
|
+
*/
|
|
3445
|
+
function driftAuthAvailable() {
|
|
3446
|
+
const key = process.env["ANTHROPIC_API_KEY"];
|
|
3447
|
+
if (typeof key === "string" && key.length > 0) return { ok: true };
|
|
3448
|
+
if (existsSync(join(homedir(), ".claude", ".credentials.json"))) return { ok: true };
|
|
3449
|
+
return {
|
|
3450
|
+
ok: false,
|
|
3451
|
+
reason: "no ANTHROPIC_API_KEY / claude login"
|
|
3452
|
+
};
|
|
3453
|
+
}
|
|
3454
|
+
//#endregion
|
|
3455
|
+
//#region src/cli/run.ts
|
|
3456
|
+
const USER_VITEST_CONFIG = resolve(".ccqa/vitest.config.ts");
|
|
3457
|
+
async function resolveVitestConfig() {
|
|
3458
|
+
try {
|
|
3459
|
+
await access(USER_VITEST_CONFIG);
|
|
3460
|
+
return USER_VITEST_CONFIG;
|
|
3461
|
+
} catch {
|
|
3462
|
+
return bundledVitestConfigPath();
|
|
3463
|
+
}
|
|
3464
|
+
}
|
|
3465
|
+
const runCommand = new Command("run").argument("[target]", "Spec to run: '<feature>/<spec>', '<feature>', or omit for all").description("Run generated agent-browser test scripts. Pass --drift to invoke a Claude-driven drift analysis on each failing spec (skipped silently when no test fails). Requires ANTHROPIC_API_KEY or a local Claude login.").option("--drift", "On vitest failure, run drift analysis on the failing specs").option("--drift-strict", "Treat drift ERROR findings as a run failure (exit 1 even if vitest passed). Implies --drift.").option("--format <fmt>", "Output format for the drift block: text | json | github", "text").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Used by --drift only. Overrides CCQA_MODEL.").action(async (target, opts) => {
|
|
3466
|
+
await runTests(target, opts);
|
|
3467
|
+
});
|
|
3468
|
+
async function runTests(target, opts) {
|
|
3469
|
+
header("run", target);
|
|
3470
|
+
const specs = await resolveSpecs(target);
|
|
3471
|
+
if (specs.length === 0) {
|
|
3472
|
+
error("no test scripts found");
|
|
3473
|
+
hint("run 'ccqa generate <feature>/<spec>' first to generate tests");
|
|
3474
|
+
process.exit(1);
|
|
3475
|
+
}
|
|
3476
|
+
const tmpDir = await mkdtemp(join(tmpdir(), "ccqa-run-"));
|
|
3477
|
+
const summaries = [];
|
|
3478
|
+
let overallExitCode = 0;
|
|
3479
|
+
const vitestConfig = await resolveVitestConfig();
|
|
3480
|
+
try {
|
|
3481
|
+
for (let i = 0; i < specs.length; i++) {
|
|
3482
|
+
const { featureName, specName } = specs[i];
|
|
3483
|
+
const scriptFile = await getTestScript(featureName, specName);
|
|
3484
|
+
if (!scriptFile) {
|
|
3485
|
+
warn(`${featureName}/${specName}: no test.spec.ts found`);
|
|
3486
|
+
continue;
|
|
3487
|
+
}
|
|
3488
|
+
run(`${featureName}/${specName}`);
|
|
3489
|
+
meta("test", scriptFile);
|
|
3490
|
+
blank();
|
|
3491
|
+
const reportFile = join(tmpDir, `report-${i}.json`);
|
|
3492
|
+
const proc = spawnVitestStreaming([
|
|
3493
|
+
"run",
|
|
3494
|
+
"--config",
|
|
3495
|
+
vitestConfig,
|
|
3496
|
+
scriptFile,
|
|
3497
|
+
"--reporter=json",
|
|
3498
|
+
`--outputFile.json=${reportFile}`
|
|
3499
|
+
]);
|
|
3500
|
+
await Promise.all([streamFiltered(proc.stdout, process.stdout), streamFiltered(proc.stderr, process.stderr)]);
|
|
3501
|
+
const exitCode = await proc.exited;
|
|
3502
|
+
if (exitCode !== 0) overallExitCode = exitCode;
|
|
3503
|
+
const report = await readReport(reportFile);
|
|
3504
|
+
summaries.push({
|
|
3505
|
+
featureName,
|
|
3506
|
+
specName,
|
|
3507
|
+
scriptFile,
|
|
3508
|
+
report,
|
|
3509
|
+
exitCode
|
|
3510
|
+
});
|
|
3511
|
+
blank();
|
|
3512
|
+
}
|
|
3513
|
+
printSummary(summaries);
|
|
3514
|
+
overallExitCode = await maybeRunDrift(summaries, opts, overallExitCode);
|
|
3515
|
+
} finally {
|
|
3516
|
+
await rm(tmpDir, {
|
|
3517
|
+
recursive: true,
|
|
3518
|
+
force: true
|
|
3519
|
+
});
|
|
3520
|
+
}
|
|
3521
|
+
process.exit(overallExitCode);
|
|
3522
|
+
}
|
|
3523
|
+
function failedSpec(s) {
|
|
3524
|
+
if (s.exitCode !== 0) return true;
|
|
3525
|
+
return (s.report?.numFailedTests ?? 0) > 0;
|
|
3526
|
+
}
|
|
3527
|
+
function parseDriftFormat(raw) {
|
|
3528
|
+
const v = raw ?? "text";
|
|
3529
|
+
if (v === "text" || v === "json" || v === "github") return v;
|
|
3530
|
+
error(`invalid --format: ${v} (expected text|json|github)`);
|
|
3531
|
+
process.exit(2);
|
|
3532
|
+
}
|
|
3533
|
+
/**
|
|
3534
|
+
* Choose which specs to drift-check. `--drift` is a fail-supplement: only the
|
|
3535
|
+
* specs that failed get a drift analysis (the goal is to *explain* a vitest
|
|
3536
|
+
* failure). `--drift-strict` is an audit: even passing specs are checked,
|
|
3537
|
+
* because the CI need is "fail loud if the spec lags behind the source",
|
|
3538
|
+
* which can absolutely happen while vitest is still green against a stale
|
|
3539
|
+
* staging environment.
|
|
3540
|
+
*/
|
|
3541
|
+
function selectDriftTargets(summaries, opts) {
|
|
3542
|
+
if (opts.driftStrict) return summaries;
|
|
3543
|
+
if (opts.drift) return summaries.filter(failedSpec);
|
|
3544
|
+
return [];
|
|
3545
|
+
}
|
|
3546
|
+
/**
|
|
3547
|
+
* Opt-in post-vitest drift hook. With `--drift`, fires only when at least
|
|
3548
|
+
* one spec failed (supplemental signal). With `--drift-strict`, fires
|
|
3549
|
+
* unconditionally so a spec/source divergence is caught even when vitest
|
|
3550
|
+
* passed. Skips silently when auth is unavailable so the run's exit code
|
|
3551
|
+
* is determined by vitest alone.
|
|
3552
|
+
*/
|
|
3553
|
+
async function maybeRunDrift(summaries, opts, currentExitCode) {
|
|
3554
|
+
const candidates = selectDriftTargets(summaries, opts);
|
|
3555
|
+
if (candidates.length === 0) return currentExitCode;
|
|
3556
|
+
const auth = driftAuthAvailable();
|
|
3557
|
+
if (!auth.ok) {
|
|
3558
|
+
info(`drift analysis skipped (${auth.reason})`);
|
|
3559
|
+
return currentExitCode;
|
|
3560
|
+
}
|
|
3561
|
+
const format = parseDriftFormat(opts.format);
|
|
3562
|
+
const cwd = process.cwd();
|
|
3563
|
+
const tree = await listFeatureTree(cwd);
|
|
3564
|
+
const targets = candidates.map((s) => {
|
|
3565
|
+
const spec = tree.find((f) => f.featureName === s.featureName)?.specs.find((sp) => sp.specName === s.specName);
|
|
3566
|
+
if (!spec) return null;
|
|
3567
|
+
const t = {
|
|
3568
|
+
featureName: s.featureName,
|
|
3569
|
+
specName: s.specName
|
|
3570
|
+
};
|
|
3571
|
+
if (spec.relatedPaths) t.relatedPaths = spec.relatedPaths;
|
|
3572
|
+
if (spec.includedBlocks) t.includedBlocks = spec.includedBlocks;
|
|
3573
|
+
return t;
|
|
3574
|
+
}).filter((t) => t !== null);
|
|
3575
|
+
if (targets.length === 0) {
|
|
3576
|
+
info("drift analysis skipped (no spec.yaml found for failing specs)");
|
|
3577
|
+
return currentExitCode;
|
|
3578
|
+
}
|
|
3579
|
+
const results = await analyzeDrift({
|
|
3580
|
+
targets,
|
|
3581
|
+
cwd,
|
|
3582
|
+
blocks: await loadAvailableBlocks(cwd),
|
|
3583
|
+
concurrency: Math.min(3, targets.length),
|
|
3584
|
+
...opts.model ? { model: opts.model } : {},
|
|
3585
|
+
onSpecStart: (t) => {
|
|
3586
|
+
if (format === "text") info(`drift: checking ${t.featureName}/${t.specName}`);
|
|
3587
|
+
}
|
|
3588
|
+
});
|
|
3589
|
+
if (format === "text") process.stdout.write(`\n${C.cyan}${C.bold}──────── drift analysis ────────${C.reset}\n`);
|
|
3590
|
+
process.stdout.write(renderDrift(results, format, cwd));
|
|
3591
|
+
if (opts.driftStrict && determineExitCode(results, "error") !== 0) return currentExitCode || 1;
|
|
3592
|
+
return currentExitCode;
|
|
3593
|
+
}
|
|
3594
|
+
async function readReport(path) {
|
|
3595
|
+
try {
|
|
3596
|
+
const raw = await readFile(path, "utf8");
|
|
3597
|
+
return JSON.parse(raw);
|
|
3598
|
+
} catch {
|
|
3599
|
+
return null;
|
|
3600
|
+
}
|
|
3601
|
+
}
|
|
3602
|
+
const useColor = process.stdout.isTTY && process.env.NO_COLOR == null;
|
|
3603
|
+
const C = {
|
|
3604
|
+
reset: useColor ? "\x1B[0m" : "",
|
|
3605
|
+
bold: useColor ? "\x1B[1m" : "",
|
|
3606
|
+
dim: useColor ? "\x1B[2m" : "",
|
|
3607
|
+
green: useColor ? "\x1B[32m" : "",
|
|
3608
|
+
red: useColor ? "\x1B[31m" : "",
|
|
3609
|
+
yellow: useColor ? "\x1B[33m" : "",
|
|
3610
|
+
cyan: useColor ? "\x1B[36m" : "",
|
|
3611
|
+
gray: useColor ? "\x1B[90m" : ""
|
|
3612
|
+
};
|
|
3613
|
+
function printSummary(summaries) {
|
|
3614
|
+
process.stdout.write(`\n${C.cyan}${C.bold}──────── ccqa summary ────────${C.reset}\n\n`);
|
|
3615
|
+
let totalTests = 0;
|
|
3616
|
+
let totalPassed = 0;
|
|
3617
|
+
let totalFailed = 0;
|
|
3618
|
+
let totalSkipped = 0;
|
|
3619
|
+
for (const s of summaries) {
|
|
3620
|
+
const header = `${C.bold}${s.featureName}/${s.specName}${C.reset}`;
|
|
3621
|
+
if (!s.report) {
|
|
3622
|
+
const icon = s.exitCode === 0 ? `${C.green}✔${C.reset}` : `${C.red}✖${C.reset}`;
|
|
3623
|
+
process.stdout.write(`${icon} ${header} ${C.dim}(no report)${C.reset}\n`);
|
|
3624
|
+
continue;
|
|
3625
|
+
}
|
|
3626
|
+
totalTests += s.report.numTotalTests;
|
|
3627
|
+
totalPassed += s.report.numPassedTests;
|
|
3628
|
+
totalFailed += s.report.numFailedTests;
|
|
3629
|
+
totalSkipped += s.report.numPendingTests;
|
|
3630
|
+
const ok = s.report.success;
|
|
3631
|
+
const icon = ok ? `${C.green}✔${C.reset}` : `${C.red}✖${C.reset}`;
|
|
3632
|
+
const countColor = ok ? C.green : C.red;
|
|
3633
|
+
process.stdout.write(`${icon} ${header} ${countColor}${s.report.numPassedTests}/${s.report.numTotalTests}${C.reset} ${C.dim}passed${C.reset}\n`);
|
|
3634
|
+
for (const file of s.report.testResults) for (const a of file.assertionResults) {
|
|
3635
|
+
const aIcon = assertionIcon(a.status);
|
|
3636
|
+
const dur = a.duration != null ? ` ${C.gray}${formatDuration(a.duration)}${C.reset}` : "";
|
|
3637
|
+
process.stdout.write(` ${aIcon} ${a.fullName}${dur}\n`);
|
|
3638
|
+
if (a.status === "failed" && a.failureMessages?.length) for (const msg of a.failureMessages) {
|
|
3639
|
+
const firstLine = msg.split("\n")[0] ?? msg;
|
|
3640
|
+
process.stdout.write(` ${C.red}${firstLine}${C.reset}\n`);
|
|
3641
|
+
}
|
|
3642
|
+
}
|
|
3643
|
+
}
|
|
3644
|
+
const specsPassed = summaries.filter((s) => s.exitCode === 0).length;
|
|
3645
|
+
const specsFailed = summaries.filter((s) => s.exitCode !== 0).length;
|
|
3646
|
+
process.stdout.write("\n");
|
|
3647
|
+
process.stdout.write(` ${C.bold}Specs${C.reset} ${summaries.length} (${C.green}${specsPassed} passed${C.reset}, ${specsFailed > 0 ? C.red : C.dim}${specsFailed} failed${C.reset})\n`);
|
|
3648
|
+
process.stdout.write(` ${C.bold}Tests${C.reset} ${totalTests} (${C.green}${totalPassed} passed${C.reset}, ${totalFailed > 0 ? C.red : C.dim}${totalFailed} failed${C.reset}, ${C.yellow}${totalSkipped} skipped${C.reset})\n`);
|
|
3649
|
+
process.stdout.write("\n");
|
|
3650
|
+
}
|
|
3651
|
+
function assertionIcon(status) {
|
|
3652
|
+
switch (status) {
|
|
3653
|
+
case "passed": return `${C.green}✔${C.reset}`;
|
|
3654
|
+
case "failed": return `${C.red}✖${C.reset}`;
|
|
3655
|
+
case "skipped":
|
|
3656
|
+
case "pending":
|
|
3657
|
+
case "todo": return `${C.yellow}◌${C.reset}`;
|
|
3658
|
+
}
|
|
3659
|
+
}
|
|
3660
|
+
function formatDuration(ms) {
|
|
3661
|
+
if (ms < 1e3) return `${Math.round(ms)}ms`;
|
|
3662
|
+
return `${(ms / 1e3).toFixed(2)}s`;
|
|
3663
|
+
}
|
|
3664
|
+
const NOISE_LINE_PATTERNS = [/^JSON report written to /];
|
|
3665
|
+
async function streamFiltered(source, sink) {
|
|
3666
|
+
source.setEncoding("utf8");
|
|
3667
|
+
let buffer = "";
|
|
3668
|
+
for await (const chunk of source) {
|
|
3669
|
+
buffer += chunk;
|
|
3670
|
+
let nl = buffer.indexOf("\n");
|
|
3671
|
+
while (nl !== -1) {
|
|
3672
|
+
const line = buffer.slice(0, nl);
|
|
3673
|
+
buffer = buffer.slice(nl + 1);
|
|
3674
|
+
if (!NOISE_LINE_PATTERNS.some((p) => p.test(line))) sink.write(line + "\n");
|
|
3675
|
+
nl = buffer.indexOf("\n");
|
|
3676
|
+
}
|
|
3677
|
+
}
|
|
3678
|
+
if (buffer.length > 0 && !NOISE_LINE_PATTERNS.some((p) => p.test(buffer))) sink.write(buffer);
|
|
3679
|
+
}
|
|
3680
|
+
async function resolveSpecs(target) {
|
|
3681
|
+
if (!target) return listAllSpecs();
|
|
3682
|
+
if (target.includes("/")) {
|
|
3683
|
+
const { featureName, specName } = parseSpecPath(target);
|
|
3684
|
+
return [{
|
|
3685
|
+
featureName,
|
|
3686
|
+
specName
|
|
3687
|
+
}];
|
|
3688
|
+
}
|
|
3689
|
+
return (await listSpecsForFeature(target)).map((specName) => ({
|
|
3690
|
+
featureName: target,
|
|
3691
|
+
specName
|
|
3692
|
+
}));
|
|
2971
3693
|
}
|
|
2972
|
-
//#endregion
|
|
2973
|
-
//#region src/types.ts
|
|
2974
|
-
const TestStepSchema = z.object({
|
|
2975
|
-
id: z.string(),
|
|
2976
|
-
title: z.string(),
|
|
2977
|
-
instruction: z.string(),
|
|
2978
|
-
expected: z.string()
|
|
2979
|
-
});
|
|
2980
|
-
const SetupRefSchema = z.object({
|
|
2981
|
-
name: z.string(),
|
|
2982
|
-
params: z.record(z.string(), z.string()).optional()
|
|
2983
|
-
});
|
|
2984
|
-
z.object({
|
|
2985
|
-
title: z.string(),
|
|
2986
|
-
baseUrl: z.string(),
|
|
2987
|
-
prerequisites: z.string().optional(),
|
|
2988
|
-
setups: z.array(SetupRefSchema).optional(),
|
|
2989
|
-
steps: z.array(TestStepSchema)
|
|
2990
|
-
});
|
|
2991
|
-
const PlaceholderDefSchema = z.object({
|
|
2992
|
-
dummy: z.string(),
|
|
2993
|
-
description: z.string().optional()
|
|
2994
|
-
});
|
|
2995
|
-
z.object({
|
|
2996
|
-
title: z.string(),
|
|
2997
|
-
placeholders: z.record(z.string(), PlaceholderDefSchema).optional(),
|
|
2998
|
-
steps: z.array(TestStepSchema)
|
|
2999
|
-
});
|
|
3000
|
-
const RouteStepSchema = z.object({
|
|
3001
|
-
title: z.string(),
|
|
3002
|
-
action: z.string(),
|
|
3003
|
-
observation: z.string(),
|
|
3004
|
-
status: z.enum([
|
|
3005
|
-
"PASSED",
|
|
3006
|
-
"FAILED",
|
|
3007
|
-
"SKIPPED"
|
|
3008
|
-
]),
|
|
3009
|
-
reason: z.string().optional()
|
|
3010
|
-
});
|
|
3011
|
-
z.object({
|
|
3012
|
-
specName: z.string(),
|
|
3013
|
-
timestamp: z.string(),
|
|
3014
|
-
status: z.enum(["passed", "failed"]),
|
|
3015
|
-
steps: z.array(RouteStepSchema)
|
|
3016
|
-
});
|
|
3017
|
-
const DraftIssueSchema = z.object({
|
|
3018
|
-
severity: z.enum([
|
|
3019
|
-
"OK",
|
|
3020
|
-
"WARN",
|
|
3021
|
-
"ERROR"
|
|
3022
|
-
]),
|
|
3023
|
-
category: z.enum([
|
|
3024
|
-
"assertable",
|
|
3025
|
-
"setups",
|
|
3026
|
-
"granularity",
|
|
3027
|
-
"unimplemented"
|
|
3028
|
-
]),
|
|
3029
|
-
stepId: z.string().nullable(),
|
|
3030
|
-
message: z.string(),
|
|
3031
|
-
detail: z.string().optional()
|
|
3032
|
-
});
|
|
3033
|
-
const DraftReportSchema = z.object({
|
|
3034
|
-
issues: z.array(DraftIssueSchema),
|
|
3035
|
-
patch: z.string()
|
|
3036
|
-
});
|
|
3037
|
-
const DraftNamingSchema = z.object({
|
|
3038
|
-
featureName: z.string().min(1),
|
|
3039
|
-
specName: z.string().min(1),
|
|
3040
|
-
reason: z.string().optional()
|
|
3041
|
-
});
|
|
3042
3694
|
//#endregion
|
|
3043
3695
|
//#region src/cli/draft.ts
|
|
3044
|
-
const CATEGORY_LABEL =
|
|
3045
|
-
|
|
3046
|
-
setups: "Setup references",
|
|
3047
|
-
granularity: "Step granularity",
|
|
3048
|
-
unimplemented: "Unimplemented checks"
|
|
3049
|
-
};
|
|
3050
|
-
const draftCommand = new Command("draft").argument("[feature/spec]", "Optional spec path (e.g. tasks/create-and-complete). If omitted, Claude proposes one from your intent.").description("Interactively draft and refine a test-spec.md with Claude Code").option("--instruction <text>", "Non-interactive single-shot instruction (skips the interactive loop)").option("--apply", "Auto-apply each generated patch without [y/N] confirmation", false).action(async (specPath, opts) => {
|
|
3696
|
+
const CATEGORY_LABEL = DRAFT_CATEGORY_LABEL;
|
|
3697
|
+
const draftCommand = new Command("draft").argument("[feature/spec]", "Optional spec path (e.g. tasks/create-and-complete). If omitted, Claude proposes one from your intent.").description("Interactively draft and refine a spec.yaml with Claude Code").option("--instruction <text>", "Non-interactive single-shot instruction (skips the interactive loop)").option("--apply", "Auto-apply each generated patch without [y/N] confirmation", false).action(async (specPath, opts) => {
|
|
3051
3698
|
await ensureCcqaDir();
|
|
3052
3699
|
let featureName;
|
|
3053
3700
|
let specName;
|
|
@@ -3097,7 +3744,7 @@ async function runDraft(featureName, specName, opts, prefilledIntent) {
|
|
|
3097
3744
|
async function runOneTurn(input) {
|
|
3098
3745
|
const { featureName, specName, existing, userInput, autoApply } = input;
|
|
3099
3746
|
const isFirstRun = existing === null;
|
|
3100
|
-
const systemPrompt = buildDraftSystemPrompt();
|
|
3747
|
+
const systemPrompt = buildDraftSystemPrompt(await loadAvailableBlocks());
|
|
3101
3748
|
const userPrompt = buildDraftPrompt({
|
|
3102
3749
|
mode: isFirstRun ? "create" : "refine",
|
|
3103
3750
|
existing: existing ?? "",
|
|
@@ -3261,10 +3908,7 @@ async function proposeNaming(opts) {
|
|
|
3261
3908
|
const tree = await listFeatureTree();
|
|
3262
3909
|
const treeForPrompt = tree.map((f) => ({
|
|
3263
3910
|
featureName: f.featureName,
|
|
3264
|
-
specs: f.specs.map((s) => ({
|
|
3265
|
-
specName: s.specName,
|
|
3266
|
-
...s.title ? { title: s.title } : {}
|
|
3267
|
-
}))
|
|
3911
|
+
specs: f.specs.map((s) => ({ specName: s.specName }))
|
|
3268
3912
|
}));
|
|
3269
3913
|
info("Proposing a feature/spec name based on your intent...");
|
|
3270
3914
|
const { result, isError } = await invokeClaudeStreaming({
|
|
@@ -3367,13 +4011,6 @@ function ensureUnique(tree, featureName, specName) {
|
|
|
3367
4011
|
specName: `${specName}-${Date.now()}`
|
|
3368
4012
|
};
|
|
3369
4013
|
}
|
|
3370
|
-
function extractJsonBlock(text) {
|
|
3371
|
-
const fenced = text.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
|
|
3372
|
-
if (fenced && fenced[1]) return fenced[1].trim();
|
|
3373
|
-
const trimmed = text.trim();
|
|
3374
|
-
if (trimmed.startsWith("{") && trimmed.endsWith("}")) return trimmed;
|
|
3375
|
-
return null;
|
|
3376
|
-
}
|
|
3377
4014
|
function printUnifiedDiff(before, after) {
|
|
3378
4015
|
const lines = computeLineDiff(before.split("\n"), after.split("\n"));
|
|
3379
4016
|
for (const line of lines) process.stdout.write(line + "\n");
|
|
@@ -3421,6 +4058,426 @@ function truncate(s, n) {
|
|
|
3421
4058
|
return s.slice(s.length - n);
|
|
3422
4059
|
}
|
|
3423
4060
|
//#endregion
|
|
4061
|
+
//#region src/drift/affected.ts
|
|
4062
|
+
const execFileP = promisify(execFile);
|
|
4063
|
+
/**
|
|
4064
|
+
* Resolve the base ref to diff against for `ccqa drift --changed`.
|
|
4065
|
+
* Precedence: explicit override > GITHUB_BASE_REF > origin/main.
|
|
4066
|
+
*/
|
|
4067
|
+
function resolveBaseRef(explicit) {
|
|
4068
|
+
if (explicit && explicit.length > 0) return explicit;
|
|
4069
|
+
const ghBase = process.env["GITHUB_BASE_REF"];
|
|
4070
|
+
if (ghBase && ghBase.length > 0) return ghBase.startsWith("origin/") ? ghBase : `origin/${ghBase}`;
|
|
4071
|
+
return "origin/main";
|
|
4072
|
+
}
|
|
4073
|
+
/**
|
|
4074
|
+
* Run `git diff --name-status base...HEAD` from `cwd` and return one entry per
|
|
4075
|
+
* changed file. Renames are reported under their NEW path with status
|
|
4076
|
+
* "renamed" — the OLD path is dropped because the spec mapping is against the
|
|
4077
|
+
* post-rename layout.
|
|
4078
|
+
*
|
|
4079
|
+
* Paths are re-rooted to be relative to `cwd`, not the git repo root. In a
|
|
4080
|
+
* monorepo where `cwd` is a sub-package (e.g. `apps/foo`), git emits paths
|
|
4081
|
+
* relative to the repo root, but specs declare relatedPaths relative to
|
|
4082
|
+
* their own package. Changes outside `cwd` are dropped so an unrelated PR
|
|
4083
|
+
* can never accidentally scope a sub-package's specs in.
|
|
4084
|
+
*/
|
|
4085
|
+
async function getChangedFiles(base, cwd) {
|
|
4086
|
+
const [{ stdout: rootOut }, { stdout: diffOut }] = await Promise.all([execFileP("git", ["rev-parse", "--show-toplevel"], { cwd }), execFileP("git", [
|
|
4087
|
+
"diff",
|
|
4088
|
+
"--name-status",
|
|
4089
|
+
"-M",
|
|
4090
|
+
`${base}...HEAD`
|
|
4091
|
+
], {
|
|
4092
|
+
cwd,
|
|
4093
|
+
maxBuffer: 32 * 1024 * 1024
|
|
4094
|
+
})]);
|
|
4095
|
+
return rerootChangedFiles(parseGitDiffOutput(diffOut), rootOut.trim(), cwd);
|
|
4096
|
+
}
|
|
4097
|
+
/**
|
|
4098
|
+
* Convert paths in `entries` from git-repo-root relative to `cwd` relative,
|
|
4099
|
+
* dropping anything outside `cwd`. Exported for unit tests.
|
|
4100
|
+
*/
|
|
4101
|
+
function rerootChangedFiles(entries, repoRoot, cwd) {
|
|
4102
|
+
const prefix = relative(repoRoot, cwd);
|
|
4103
|
+
if (!prefix) return entries;
|
|
4104
|
+
const out = [];
|
|
4105
|
+
for (const e of entries) {
|
|
4106
|
+
const rel = relative(prefix, e.path);
|
|
4107
|
+
if (rel.startsWith("..") || rel === "") continue;
|
|
4108
|
+
out.push({
|
|
4109
|
+
...e,
|
|
4110
|
+
path: rel
|
|
4111
|
+
});
|
|
4112
|
+
}
|
|
4113
|
+
return out;
|
|
4114
|
+
}
|
|
4115
|
+
function parseGitDiffOutput(stdout) {
|
|
4116
|
+
const out = [];
|
|
4117
|
+
for (const line of stdout.split("\n")) {
|
|
4118
|
+
if (!line.trim()) continue;
|
|
4119
|
+
const parts = line.split(" ");
|
|
4120
|
+
const code = parts[0];
|
|
4121
|
+
if (!code) continue;
|
|
4122
|
+
if (code.startsWith("R")) {
|
|
4123
|
+
const newPath = parts[2];
|
|
4124
|
+
if (newPath) out.push({
|
|
4125
|
+
path: newPath,
|
|
4126
|
+
status: "renamed"
|
|
4127
|
+
});
|
|
4128
|
+
continue;
|
|
4129
|
+
}
|
|
4130
|
+
if (code.startsWith("C")) {
|
|
4131
|
+
const newPath = parts[2];
|
|
4132
|
+
if (newPath) out.push({
|
|
4133
|
+
path: newPath,
|
|
4134
|
+
status: "added"
|
|
4135
|
+
});
|
|
4136
|
+
continue;
|
|
4137
|
+
}
|
|
4138
|
+
const path = parts[1];
|
|
4139
|
+
if (!path) continue;
|
|
4140
|
+
switch (code[0]) {
|
|
4141
|
+
case "A":
|
|
4142
|
+
out.push({
|
|
4143
|
+
path,
|
|
4144
|
+
status: "added"
|
|
4145
|
+
});
|
|
4146
|
+
break;
|
|
4147
|
+
case "M":
|
|
4148
|
+
case "T":
|
|
4149
|
+
out.push({
|
|
4150
|
+
path,
|
|
4151
|
+
status: "modified"
|
|
4152
|
+
});
|
|
4153
|
+
break;
|
|
4154
|
+
case "D":
|
|
4155
|
+
out.push({
|
|
4156
|
+
path,
|
|
4157
|
+
status: "deleted"
|
|
4158
|
+
});
|
|
4159
|
+
break;
|
|
4160
|
+
default: out.push({
|
|
4161
|
+
path,
|
|
4162
|
+
status: "modified"
|
|
4163
|
+
});
|
|
4164
|
+
}
|
|
4165
|
+
}
|
|
4166
|
+
return out;
|
|
4167
|
+
}
|
|
4168
|
+
function stripLeadingDotSlash(s) {
|
|
4169
|
+
return s.startsWith("./") ? s.slice(2) : s;
|
|
4170
|
+
}
|
|
4171
|
+
const REGEX_CACHE = /* @__PURE__ */ new Map();
|
|
4172
|
+
/** Compiles `pattern` to a RegExp, memoized so repeated `--changed` matches don't re-build. */
|
|
4173
|
+
function compileGlob(pattern) {
|
|
4174
|
+
const cached = REGEX_CACHE.get(pattern);
|
|
4175
|
+
if (cached) return cached;
|
|
4176
|
+
const compiled = globToRegExp(stripLeadingDotSlash(pattern));
|
|
4177
|
+
REGEX_CACHE.set(pattern, compiled);
|
|
4178
|
+
return compiled;
|
|
4179
|
+
}
|
|
4180
|
+
function globToRegExp(pattern) {
|
|
4181
|
+
let re = "^";
|
|
4182
|
+
let i = 0;
|
|
4183
|
+
while (i < pattern.length) {
|
|
4184
|
+
const ch = pattern[i];
|
|
4185
|
+
if (ch === "?") {
|
|
4186
|
+
re += "[^/]";
|
|
4187
|
+
i++;
|
|
4188
|
+
continue;
|
|
4189
|
+
}
|
|
4190
|
+
if (ch !== "*") {
|
|
4191
|
+
re += /[.+^${}()|[\]\\]/.test(ch) ? "\\" + ch : ch;
|
|
4192
|
+
i++;
|
|
4193
|
+
continue;
|
|
4194
|
+
}
|
|
4195
|
+
if (pattern[i + 1] !== "*") {
|
|
4196
|
+
re += "[^/]*";
|
|
4197
|
+
i++;
|
|
4198
|
+
continue;
|
|
4199
|
+
}
|
|
4200
|
+
const hasLeadingSlash = re.endsWith("/");
|
|
4201
|
+
const hasTrailingSlash = pattern[i + 2] === "/";
|
|
4202
|
+
if (hasLeadingSlash) re = re.slice(0, -1);
|
|
4203
|
+
if (hasLeadingSlash || hasTrailingSlash) re += "(?:/?.*)?";
|
|
4204
|
+
else re += ".*";
|
|
4205
|
+
i += hasTrailingSlash ? 3 : 2;
|
|
4206
|
+
}
|
|
4207
|
+
return new RegExp(re + "$");
|
|
4208
|
+
}
|
|
4209
|
+
/**
|
|
4210
|
+
* Returns true if `changedPath` is covered by any of `relatedPaths`. An empty
|
|
4211
|
+
* `relatedPaths` returns false — callers handle the "unscoped spec" case
|
|
4212
|
+
* separately (treat the spec as always-affected) before calling this.
|
|
4213
|
+
*/
|
|
4214
|
+
function isPathAffectedBy(changedPath, relatedPaths) {
|
|
4215
|
+
const stripped = stripLeadingDotSlash(changedPath);
|
|
4216
|
+
for (const pattern of relatedPaths) if (compileGlob(pattern).test(stripped)) return true;
|
|
4217
|
+
return false;
|
|
4218
|
+
}
|
|
4219
|
+
//#endregion
|
|
4220
|
+
//#region src/drift/route-new-files.ts
|
|
4221
|
+
/**
|
|
4222
|
+
* Lightweight Claude call: given a list of new files in the PR and the existing
|
|
4223
|
+
* specs (with their relatedPaths globs as a hint), return the spec keys (in
|
|
4224
|
+
* "<feature>/<spec>" form) that the new files plausibly affect.
|
|
4225
|
+
*
|
|
4226
|
+
* Conservative by design — false positives are safer than false negatives,
|
|
4227
|
+
* because a missed spec turns into undetected drift in CI. When the router
|
|
4228
|
+
* call itself fails, we log a warning rather than fail-close: the surrounding
|
|
4229
|
+
* glob match is the primary signal; the router only adds coverage for new
|
|
4230
|
+
* paths no glob captures.
|
|
4231
|
+
*/
|
|
4232
|
+
async function routeNewFilesToSpecs(input) {
|
|
4233
|
+
const { newFiles, specs, cwd, model } = input;
|
|
4234
|
+
const empty = /* @__PURE__ */ new Set();
|
|
4235
|
+
if (newFiles.length === 0 || specs.length === 0) return empty;
|
|
4236
|
+
const { result, isError } = await invokeClaudeStreaming({
|
|
4237
|
+
prompt: buildRouterPrompt(await Promise.all(newFiles.map(async (path) => ({
|
|
4238
|
+
path,
|
|
4239
|
+
head: await readHead(join(cwd, path))
|
|
4240
|
+
}))), specs),
|
|
4241
|
+
systemPrompt: buildRouterSystemPrompt(),
|
|
4242
|
+
allowedTools: [
|
|
4243
|
+
"Read",
|
|
4244
|
+
"Grep",
|
|
4245
|
+
"Glob"
|
|
4246
|
+
],
|
|
4247
|
+
silenceBashLog: true,
|
|
4248
|
+
cwd,
|
|
4249
|
+
...model ? { model } : {}
|
|
4250
|
+
}, (_msg) => {});
|
|
4251
|
+
if (isError) {
|
|
4252
|
+
warn("new-file router: Claude returned an error; skipping router signal");
|
|
4253
|
+
return empty;
|
|
4254
|
+
}
|
|
4255
|
+
const json = extractJsonBlock(result);
|
|
4256
|
+
if (!json) {
|
|
4257
|
+
warn("new-file router: no JSON block in response; skipping router signal");
|
|
4258
|
+
return empty;
|
|
4259
|
+
}
|
|
4260
|
+
let parsed;
|
|
4261
|
+
try {
|
|
4262
|
+
parsed = JSON.parse(json);
|
|
4263
|
+
} catch (e) {
|
|
4264
|
+
warn(`new-file router: failed to parse JSON (${e.message}); skipping router signal`);
|
|
4265
|
+
return empty;
|
|
4266
|
+
}
|
|
4267
|
+
const out = /* @__PURE__ */ new Set();
|
|
4268
|
+
const validKeys = new Set(specs.map((s) => `${s.featureName}/${s.specName}`));
|
|
4269
|
+
if (typeof parsed === "object" && parsed !== null && "affectedSpecs" in parsed) {
|
|
4270
|
+
const arr = parsed.affectedSpecs;
|
|
4271
|
+
if (Array.isArray(arr)) {
|
|
4272
|
+
for (const item of arr) if (typeof item === "string" && validKeys.has(item)) out.add(item);
|
|
4273
|
+
}
|
|
4274
|
+
}
|
|
4275
|
+
return out;
|
|
4276
|
+
}
|
|
4277
|
+
async function readHead(absPath) {
|
|
4278
|
+
const content = await readFile(absPath, "utf-8").catch(() => "");
|
|
4279
|
+
if (!content) return "";
|
|
4280
|
+
return content.split("\n").slice(0, 40).join("\n");
|
|
4281
|
+
}
|
|
4282
|
+
function buildRouterSystemPrompt() {
|
|
4283
|
+
return `You triage which ccqa test specs are potentially affected by NEW source files added in a pull request.
|
|
4284
|
+
|
|
4285
|
+
You will receive:
|
|
4286
|
+
- A list of new files (path + first ~40 lines of each)
|
|
4287
|
+
- A list of existing specs with their declared relatedPaths globs
|
|
4288
|
+
|
|
4289
|
+
Your job: return the spec keys (in "<feature>/<spec>" form) whose behaviour might depend on any of the new files.
|
|
4290
|
+
|
|
4291
|
+
## Rules
|
|
4292
|
+
|
|
4293
|
+
- Be **conservative**: when in doubt, include the spec. A spurious inclusion costs one extra drift check; a missed spec lets real drift slip through CI.
|
|
4294
|
+
- Use \`Read\`, \`Grep\`, \`Glob\` if you need to inspect the spec body or related code, but stay focused — this is a triage step, not a full review.
|
|
4295
|
+
- Ignore specs whose relatedPaths clearly point to a different area than every new file (e.g. \`src/auth/**\` specs vs new files only under \`src/billing/**\`).
|
|
4296
|
+
- Files like tests, generated code, build artifacts, vendor dirs typically do not affect any spec. Skip them.
|
|
4297
|
+
|
|
4298
|
+
## Output (STRICT)
|
|
4299
|
+
|
|
4300
|
+
Output ONE fenced \`\`\`json block, nothing else:
|
|
4301
|
+
|
|
4302
|
+
\`\`\`json
|
|
4303
|
+
{
|
|
4304
|
+
"affectedSpecs": ["feature/spec", "feature/spec"]
|
|
4305
|
+
}
|
|
4306
|
+
\`\`\`
|
|
4307
|
+
|
|
4308
|
+
Use exactly the keys you saw in the input ("<feature>/<spec>"). Return an empty array if no spec is affected.
|
|
4309
|
+
`;
|
|
4310
|
+
}
|
|
4311
|
+
function buildRouterPrompt(previews, specs) {
|
|
4312
|
+
return `## New files
|
|
4313
|
+
|
|
4314
|
+
${previews.map((p) => {
|
|
4315
|
+
const headBlock = p.head ? `\n\`\`\`\n${p.head}\n\`\`\`` : "\n(empty or unreadable)";
|
|
4316
|
+
return `### ${p.path}${headBlock}`;
|
|
4317
|
+
}).join("\n\n")}
|
|
4318
|
+
|
|
4319
|
+
## Existing specs
|
|
4320
|
+
|
|
4321
|
+
${specs.map((s) => {
|
|
4322
|
+
const paths = s.relatedPaths.length === 0 ? " (no relatedPaths declared)" : s.relatedPaths.map((p) => ` - ${p}`).join("\n");
|
|
4323
|
+
return `- ${s.featureName}/${s.specName}\n${paths}`;
|
|
4324
|
+
}).join("\n")}
|
|
4325
|
+
|
|
4326
|
+
## Task
|
|
4327
|
+
|
|
4328
|
+
Return the spec keys that might be affected by any of the new files. Conservative inclusion is preferred over missing real drift.
|
|
4329
|
+
`;
|
|
4330
|
+
}
|
|
4331
|
+
//#endregion
|
|
4332
|
+
//#region src/cli/drift.ts
|
|
4333
|
+
const DEFAULT_CONCURRENCY = 3;
|
|
4334
|
+
const driftCommand = new Command("drift").argument("[feature/spec]", "Optional spec id. If omitted, every spec under .ccqa/features/ is checked.").description("Check whether each spec.yaml is still in sync with the current codebase (CI-friendly, no patches applied).").option("--format <fmt>", "Output format: text | json | github", "text").option("--severity <level>", "Exit non-zero on this severity or higher: warn | error", "error").option("--concurrency <n>", `Parallel spec checks (default: ${DEFAULT_CONCURRENCY})`).option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").option("--cwd <path>", "Working directory used as both the .ccqa root and the codebase Claude reads. Useful for monorepos. Defaults to process.cwd().").option("--changed", "Restrict drift checks to specs whose relatedPaths intersect the git diff against --base (or, in CI, $GITHUB_BASE_REF, else origin/main). New files are routed to specs via a single lightweight Claude call.").option("--base <ref>", "Base ref to diff against when --changed is set. Defaults to $GITHUB_BASE_REF (CI) or origin/main.").action(async (specPath, opts) => {
|
|
4335
|
+
const format = parseFormat(opts.format);
|
|
4336
|
+
const threshold = parseSeverity(opts.severity);
|
|
4337
|
+
const concurrency = parseConcurrency(opts.concurrency);
|
|
4338
|
+
const cwd = opts.cwd ? resolve(opts.cwd) : process.cwd();
|
|
4339
|
+
await ensureCcqaDir(cwd);
|
|
4340
|
+
if (opts.changed && specPath) {
|
|
4341
|
+
error("--changed and an explicit spec id cannot be combined; --changed only applies to a full sweep");
|
|
4342
|
+
process.exit(2);
|
|
4343
|
+
}
|
|
4344
|
+
let targets = await collectTargets(specPath, cwd);
|
|
4345
|
+
if (targets.length === 0) exitWithNoSpecs(format, "no test specs found under .ccqa/features/");
|
|
4346
|
+
if (format === "text") {
|
|
4347
|
+
header("drift", specPath ?? `${targets.length} spec${targets.length > 1 ? "s" : ""}`);
|
|
4348
|
+
if (opts.cwd) meta("cwd", cwd);
|
|
4349
|
+
}
|
|
4350
|
+
if (opts.changed) {
|
|
4351
|
+
const total = targets.length;
|
|
4352
|
+
targets = await filterByChanged({
|
|
4353
|
+
targets,
|
|
4354
|
+
cwd,
|
|
4355
|
+
baseOverride: opts.base,
|
|
4356
|
+
format,
|
|
4357
|
+
model: opts.model
|
|
4358
|
+
});
|
|
4359
|
+
if (format === "text") meta("scoped", `${targets.length} of ${total} spec${total > 1 ? "s" : ""}`);
|
|
4360
|
+
if (targets.length === 0) exitWithNoSpecs(format, "no specs intersect the changed file set; nothing to check");
|
|
4361
|
+
}
|
|
4362
|
+
const blocks = await loadAvailableBlocks(cwd);
|
|
4363
|
+
const results = await analyzeDrift({
|
|
4364
|
+
targets,
|
|
4365
|
+
cwd,
|
|
4366
|
+
blocks,
|
|
4367
|
+
concurrency,
|
|
4368
|
+
...opts.model ? { model: opts.model } : {},
|
|
4369
|
+
onSpecStart: (t) => {
|
|
4370
|
+
if (format === "text") info(`checking ${t.featureName}/${t.specName}`);
|
|
4371
|
+
}
|
|
4372
|
+
});
|
|
4373
|
+
process.stdout.write(renderDrift(results, format, cwd));
|
|
4374
|
+
process.exit(determineExitCode(results, threshold));
|
|
4375
|
+
});
|
|
4376
|
+
function exitWithNoSpecs(format, message) {
|
|
4377
|
+
if (format === "json") process.stdout.write(`${JSON.stringify({ specs: [] }, null, 2)}\n`);
|
|
4378
|
+
else if (format === "text") info(message);
|
|
4379
|
+
process.exit(0);
|
|
4380
|
+
}
|
|
4381
|
+
async function filterByChanged(input) {
|
|
4382
|
+
const { targets, cwd, baseOverride, format, model } = input;
|
|
4383
|
+
const base = resolveBaseRef(baseOverride);
|
|
4384
|
+
let changed;
|
|
4385
|
+
try {
|
|
4386
|
+
changed = await getChangedFiles(base, cwd);
|
|
4387
|
+
} catch (e) {
|
|
4388
|
+
error(`failed to run 'git diff' against ${base}: ${e.message}`);
|
|
4389
|
+
process.exit(2);
|
|
4390
|
+
}
|
|
4391
|
+
if (format === "text") {
|
|
4392
|
+
meta("changed-base", base);
|
|
4393
|
+
meta("changed-files", changed.length);
|
|
4394
|
+
}
|
|
4395
|
+
if (changed.length === 0) return [];
|
|
4396
|
+
const newFiles = changed.filter((f) => f.status === "added");
|
|
4397
|
+
const existingChanges = changed.filter((f) => f.status !== "added");
|
|
4398
|
+
const affected = /* @__PURE__ */ new Set();
|
|
4399
|
+
const touchedBlockNames = /* @__PURE__ */ new Set();
|
|
4400
|
+
for (const f of changed) {
|
|
4401
|
+
const blockName = parseBlockPath(f.path);
|
|
4402
|
+
if (blockName) touchedBlockNames.add(blockName);
|
|
4403
|
+
}
|
|
4404
|
+
for (const t of targets) {
|
|
4405
|
+
if (!t.relatedPaths) {
|
|
4406
|
+
affected.add(specKey(t));
|
|
4407
|
+
continue;
|
|
4408
|
+
}
|
|
4409
|
+
if (existingChanges.some((f) => isPathAffectedBy(f.path, t.relatedPaths)) || newFiles.some((f) => isPathAffectedBy(f.path, t.relatedPaths))) {
|
|
4410
|
+
affected.add(specKey(t));
|
|
4411
|
+
continue;
|
|
4412
|
+
}
|
|
4413
|
+
if (t.includedBlocks?.some((name) => touchedBlockNames.has(name))) affected.add(specKey(t));
|
|
4414
|
+
}
|
|
4415
|
+
if (newFiles.length > 0) {
|
|
4416
|
+
if (format === "text") info(`routing ${newFiles.length} new file(s) to specs via Claude...`);
|
|
4417
|
+
const routed = await routeNewFilesToSpecs({
|
|
4418
|
+
newFiles: newFiles.map((f) => f.path),
|
|
4419
|
+
specs: targets.filter((t) => t.relatedPaths).map((t) => ({
|
|
4420
|
+
featureName: t.featureName,
|
|
4421
|
+
specName: t.specName,
|
|
4422
|
+
relatedPaths: t.relatedPaths
|
|
4423
|
+
})),
|
|
4424
|
+
cwd,
|
|
4425
|
+
model
|
|
4426
|
+
});
|
|
4427
|
+
for (const key of routed) affected.add(key);
|
|
4428
|
+
}
|
|
4429
|
+
return targets.filter((t) => affected.has(specKey(t)));
|
|
4430
|
+
}
|
|
4431
|
+
async function collectTargets(specPath, cwd) {
|
|
4432
|
+
const tree = await listFeatureTree(cwd);
|
|
4433
|
+
if (specPath) {
|
|
4434
|
+
const { featureName, specName } = parseSpecPath(specPath);
|
|
4435
|
+
const spec = tree.find((f) => f.featureName === featureName)?.specs.find((s) => s.specName === specName);
|
|
4436
|
+
if (!spec?.hasSpecFile) {
|
|
4437
|
+
error(`spec not found: ${featureName}/${specName} (under ${cwd})`);
|
|
4438
|
+
process.exit(1);
|
|
4439
|
+
}
|
|
4440
|
+
return [{
|
|
4441
|
+
featureName,
|
|
4442
|
+
specName,
|
|
4443
|
+
includedBlocks: spec.includedBlocks ?? []
|
|
4444
|
+
}];
|
|
4445
|
+
}
|
|
4446
|
+
const out = [];
|
|
4447
|
+
for (const feature of tree) for (const spec of feature.specs) {
|
|
4448
|
+
if (!spec.hasSpecFile) continue;
|
|
4449
|
+
const t = {
|
|
4450
|
+
featureName: feature.featureName,
|
|
4451
|
+
specName: spec.specName
|
|
4452
|
+
};
|
|
4453
|
+
if (spec.relatedPaths) t.relatedPaths = spec.relatedPaths;
|
|
4454
|
+
if (spec.includedBlocks) t.includedBlocks = spec.includedBlocks;
|
|
4455
|
+
out.push(t);
|
|
4456
|
+
}
|
|
4457
|
+
return out;
|
|
4458
|
+
}
|
|
4459
|
+
function parseFormat(raw) {
|
|
4460
|
+
const v = raw ?? "text";
|
|
4461
|
+
if (v === "text" || v === "json" || v === "github") return v;
|
|
4462
|
+
error(`invalid --format: ${v} (expected text|json|github)`);
|
|
4463
|
+
process.exit(2);
|
|
4464
|
+
}
|
|
4465
|
+
function parseSeverity(raw) {
|
|
4466
|
+
const v = raw ?? "error";
|
|
4467
|
+
if (v === "warn" || v === "error") return v;
|
|
4468
|
+
error(`invalid --severity: ${v} (expected warn|error)`);
|
|
4469
|
+
process.exit(2);
|
|
4470
|
+
}
|
|
4471
|
+
function parseConcurrency(raw) {
|
|
4472
|
+
if (raw === void 0) return DEFAULT_CONCURRENCY;
|
|
4473
|
+
const n = Number.parseInt(raw, 10);
|
|
4474
|
+
if (!Number.isFinite(n) || n < 1) {
|
|
4475
|
+
error(`invalid --concurrency: ${raw} (expected positive integer)`);
|
|
4476
|
+
process.exit(2);
|
|
4477
|
+
}
|
|
4478
|
+
return n;
|
|
4479
|
+
}
|
|
4480
|
+
//#endregion
|
|
3424
4481
|
//#region src/cli/index.ts
|
|
3425
4482
|
const packageJsonPath = resolvePackageJson();
|
|
3426
4483
|
const { version } = JSON.parse(readFileSync(packageJsonPath, "utf8"));
|
|
@@ -3437,11 +4494,10 @@ function resolvePackageJson() {
|
|
|
3437
4494
|
const program = new Command();
|
|
3438
4495
|
program.name("ccqa").description("E2E test CLI using Claude Code + agent-browser").version(version);
|
|
3439
4496
|
program.addCommand(draftCommand);
|
|
4497
|
+
program.addCommand(driftCommand);
|
|
3440
4498
|
program.addCommand(traceCommand);
|
|
3441
4499
|
program.addCommand(generateCommand);
|
|
3442
4500
|
program.addCommand(runCommand);
|
|
3443
|
-
program.addCommand(traceSetupCommand);
|
|
3444
|
-
program.addCommand(generateSetupCommand);
|
|
3445
4501
|
program.parse();
|
|
3446
4502
|
//#endregion
|
|
3447
4503
|
export {};
|