ccqa 0.3.9 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/bin/ccqa.mjs CHANGED
@@ -1,28 +1,41 @@
1
1
  #!/usr/bin/env node
2
+ import { n as spawnAB } from "../spawn-ab-BxjEhA5e.mjs";
2
3
  import { createRequire } from "node:module";
3
4
  import { Command } from "commander";
4
- import { accessSync, readFileSync, statSync } from "node:fs";
5
+ import { accessSync, existsSync, readFileSync, statSync } from "node:fs";
5
6
  import { fileURLToPath } from "node:url";
6
- import { access, mkdir, mkdtemp, readFile, readdir, rm, stat, unlink, writeFile } from "node:fs/promises";
7
- import { delimiter, dirname, join, resolve } from "node:path";
7
+ import { access, mkdir, mkdtemp, readFile, readdir, rm, stat, writeFile } from "node:fs/promises";
8
8
  import { query } from "@anthropic-ai/claude-agent-sdk";
9
- import matter from "gray-matter";
10
- import { spawn } from "node:child_process";
9
+ import { delimiter, dirname, join, relative, resolve } from "node:path";
10
+ import { parse, stringify } from "yaml";
11
+ import { ZodError, z } from "zod";
12
+ import { execFile, spawn } from "node:child_process";
11
13
  import { createInterface } from "node:readline";
12
- import { tmpdir } from "node:os";
14
+ import { homedir, tmpdir } from "node:os";
13
15
  import { createInterface as createInterface$1 } from "node:readline/promises";
14
- import { z } from "zod";
16
+ import { promisify } from "node:util";
15
17
  //#region src/prompts/trace.ts
16
18
  function generateSessionName() {
17
19
  return `ccqa-trace-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-")}`;
18
20
  }
19
- function buildTraceSystemPrompt(spec, options) {
20
- const sessionName = options?.sessionName ?? generateSessionName();
21
- const skipCookiesClear = options?.skipCookiesClear ?? false;
22
- const stepsText = spec.steps.map((step) => `### ${step.id}: ${step.title}
21
+ /**
22
+ * Build the trace system prompt. `input.steps` is a flat list with includes
23
+ * already expanded (each step carries id / source / instruction / expected).
24
+ * The spec opens URLs via explicit step instructions (e.g.
25
+ * `instruction: "${APP_URL}/articles を開く"`).
26
+ *
27
+ * In v0.4 every spec is traced from scratch — block contents are inlined
28
+ * into the spec's own step list at expand time, so the prompt has no
29
+ * special "this is a block" mode. The `source` tag on each step still
30
+ * distinguishes spec-native steps from inlined block steps for the
31
+ * `// step:` comments in the eventual codegen output.
32
+ */
33
+ function buildTraceSystemPrompt(input) {
34
+ const sessionName = input.sessionName ?? generateSessionName();
35
+ const stepsText = input.steps.map((step) => `### ${step.id} [${step.source}]
23
36
  - **Instruction**: ${step.instruction}
24
37
  - **Expected**: ${step.expected}`).join("\n\n");
25
- const prereqText = spec.prerequisites ? `## Prerequisites\n${spec.prerequisites}\n\n` : "";
38
+ const relatedPathsBlock = buildRelatedPathsInstruction();
26
39
  return `You are an expert QA engineer executing a browser E2E test. Execute each step precisely and record every browser action as a structured log line.
27
40
 
28
41
  ## Session
@@ -43,7 +56,8 @@ agent-browser --session SESSION uncheck "<selector>"
43
56
  agent-browser --session SESSION press <Key>
44
57
  agent-browser --session SESSION select "<selector>" "<value>"
45
58
  agent-browser --session SESSION hover "<selector>"
46
- agent-browser --session SESSION wait --text "<text>"
59
+ agent-browser --session SESSION wait --text "<text>" [--timeout <ms>]
60
+ agent-browser --session SESSION wait "<selector>" [--timeout <ms>] [--state visible|hidden]
47
61
  agent-browser --session SESSION cookies clear
48
62
  \`\`\`
49
63
 
@@ -78,17 +92,18 @@ agent-browser --session SESSION cookies clear
78
92
 
79
93
  ## Test Specification
80
94
 
81
- Title: ${spec.title}
82
- Base URL: ${spec.baseUrl}
95
+ Title: ${input.title}
96
+
97
+ Each step's instruction names the URL to open directly (or via \`\${ENV_VAR}\`). Open exactly the URL the step says to open.
83
98
 
84
- ${prereqText}## Steps
99
+ ## Steps
85
100
 
86
101
  ${stepsText}
87
102
 
88
103
  ## Execution Workflow
89
104
 
90
105
  For each step:
91
- 1. Emit \`STEP_START|<step-id>|<step-title>\`
106
+ 1. Emit \`STEP_START|<step-id>|<short description of what this step does>\`
92
107
  2. Run \`snapshot\` and identify selectors from the ARIA tree
93
108
  3. Execute the action using an ALLOWED selector
94
109
  4. Emit \`AB_ACTION|...\` for every browser action (see below)
@@ -175,6 +190,15 @@ AB_ACTION|assert|<assertType>|<selector or "">|<value or "">|<observation>
175
190
 
176
191
  The selector in AB_ACTION must be one of the ALLOWED formats above.
177
192
 
193
+ **CRITICAL — record only successful actions.** The AB_ACTION stream is the
194
+ canonical replay sequence: every line in it must be reproducible on a fresh
195
+ browser session. Therefore:
196
+
197
+ - If you tried a selector and \`agent-browser\` returned a non-zero exit (selector not found, element not interactable, timeout): **do NOT emit \`AB_ACTION|...\`** for that attempt. Take a fresh snapshot, switch selector, and only emit the AB_ACTION for the call that finally succeeded.
198
+ - If you explored multiple selectors for the same logical action (e.g. tried \`[aria-label='Email']\`, it failed, then \`[placeholder='Email']\` worked): emit AB_ACTION for the **working selector only**. The failed attempt must not appear in the trace.
199
+ - The same rule applies to \`AB_ACTION|assert|...\` lines: only emit them for assertions you actually verified on the current page in the current snapshot. Never declare an assertion against a selector you have not just confirmed visible — even if you intended to use it earlier.
200
+ - If a step ultimately fails after retries: emit \`ASSERTION_FAILED\` and STOP. Do NOT leave half-recorded actions for the failed step in the AB_ACTION stream.
201
+
178
202
  ## Assertion Protocol
179
203
 
180
204
  After verifying each step, emit \`AB_ACTION|assert\` lines for each signal you confirmed.
@@ -206,9 +230,36 @@ After verifying each step, emit \`AB_ACTION|assert\` lines for each signal you c
206
230
  **Selector rules for assert actions — CRITICAL:**
207
231
  - Use the **same ALLOWED formats** as browser actions — never invent aria-label values
208
232
  - Only use \`[aria-label='...']\` if that **exact** aria-label string appears in the current ARIA snapshot output
209
- - When unsure, prefer \`text_visible\`/\`text_not_visible\` (no selector needed) over guessing a selector
233
+ - When unsure, prefer \`text_visible\`/\`text_not_visible\` (no selector needed) over guessing a selector — but still pre-verify with \`wait --text\` per the MUST-VERIFY rule below; \`alt\`-attribute "text" will not match.
210
234
  - For \`element_disabled\`/\`element_enabled\`: use a CSS class selector if no aria-label is confirmed in the snapshot
211
235
 
236
+ **MUST-VERIFY rule — STRICT (applies to every assert except \`url_contains\`):**
237
+
238
+ The \`snapshot\` output is the **accessibility tree**: a semantic view. \`agent-browser\` queries the **real DOM**. They DO NOT always match. Two known traps:
239
+
240
+ 1. *Selector trap*: a snapshot row like \`textbox "Email address"\` is reachable via \`[placeholder='...']\` but **NOT** via \`[aria-label='...']\` if no \`aria-label\` attribute is actually set — the browser inferred the label from \`<label for=>\` / surrounding text / \`placeholder\`.
241
+ 2. *Text trap*: a snapshot row like \`link "Dashboard"\` may come from \`<a><img alt="Dashboard"></a>\` — the visible "text" is an \`alt\` attribute, not a text node. \`text_visible\` (which scans visible text nodes via \`wait --text\`) will NOT find it.
242
+
243
+ Before emitting an \`AB_ACTION|assert|...\` line, **verify the assertion form actually resolves on the live page**:
244
+
245
+ \`\`\`bash
246
+ # element_visible / element_enabled / element_disabled / element_checked / element_unchecked
247
+ agent-browser --session SESSION wait "<selector>" --timeout 3000
248
+
249
+ # element_not_visible
250
+ agent-browser --session SESSION wait "<selector>" --state hidden --timeout 3000
251
+
252
+ # text_visible
253
+ agent-browser --session SESSION wait --text "<text>" --timeout 3000
254
+
255
+ # text_not_visible
256
+ agent-browser --session SESSION wait --text "<text>" --state hidden --timeout 3000
257
+ \`\`\`
258
+
259
+ Apply the "record only successful actions" rule from the AB_ACTION section above. **Additionally**, when *no* form verifies — e.g. you tried \`[aria-label='X']\`, \`[placeholder='X']\`, and \`text=X\` and they all timed out, or the "text" turned out to be an \`alt\` / aria-label — **DROP the assertion entirely**. Fewer, real assertions beat invented ones that fail at replay. Prefer swapping a failed \`text_visible\` for an \`element_visible\` against the link/button selector when the visible label came from \`alt\` / aria-label.
260
+
261
+ \`url_contains\` is exempt — it checks the current URL string, not the DOM/accessibility tree.
262
+
212
263
  **Examples:**
213
264
  \`\`\`
214
265
  AB_ACTION|assert|url_contains|||/dashboard|Navigated to dashboard
@@ -224,7 +275,7 @@ AB_ACTION|assert|text_visible|||Success|Confirmation message appeared
224
275
  Emit exactly one status line per step (outside any code block):
225
276
 
226
277
  \`\`\`
227
- STEP_START|<step-id>|<step-title>
278
+ STEP_START|<step-id>|<short description of what this step does>
228
279
  STEP_DONE|<step-id>|<what was verified>
229
280
  ASSERTION_FAILED|<step-id>|<category: app-bug|env-issue|auth-blocked|missing-test-data|selector-drift|agent-misread>: <reason>
230
281
  STEP_SKIPPED|<step-id>|<reason>
@@ -237,45 +288,61 @@ RUN_COMPLETED|failed|<summary>
237
288
  After each step (outside any code block):
238
289
 
239
290
  \`\`\`
240
- ROUTE_STEP|<step-id>|<step-title>|ACTION:<what you did>|OBSERVATION:<what you verified>|STATUS:<PASSED|FAILED|SKIPPED>
291
+ ROUTE_STEP|<step-id>|<short description>|ACTION:<what you did>|OBSERVATION:<what you verified>|STATUS:<PASSED|FAILED|SKIPPED>
241
292
  \`\`\`
242
293
 
243
- ## Start
294
+ ${relatedPathsBlock}## Start
244
295
 
245
- ${skipCookiesClear ? `A setup procedure has already been executed in this session. Do NOT clear cookies — keep the existing session state.
296
+ Begin by clearing cookies, then proceed straight to the first step's instruction.
246
297
 
247
298
  \`\`\`bash
248
- agent-browser --session ${sessionName} open ${spec.baseUrl}
249
- \`\`\`
250
-
251
- Emit:
252
- \`\`\`
253
- AB_ACTION|open|${spec.baseUrl}
254
- \`\`\`` : `\`\`\`bash
255
299
  agent-browser --session ${sessionName} cookies clear
256
- agent-browser --session ${sessionName} open ${spec.baseUrl}
257
300
  \`\`\`
258
301
 
259
302
  Emit:
260
303
  \`\`\`
261
304
  AB_ACTION|cookies_clear
262
- AB_ACTION|open|${spec.baseUrl}
263
- \`\`\``}
305
+ \`\`\`
264
306
 
265
- Then emit \`STEP_START|step-01|...\` and begin.`;
266
- }
267
- function buildTracePrompt(spec) {
268
- return `Execute the test for "${spec.title}" at ${spec.baseUrl}.`;
307
+ Then emit \`STEP_START|step-01|...\` and execute the first step. The first step is responsible for opening the initial URL.
308
+ `;
269
309
  }
270
- function buildSetupTraceSystemPrompt(spec) {
271
- return buildTraceSystemPrompt({
272
- title: spec.title,
273
- baseUrl: "about:blank",
274
- steps: spec.steps
275
- });
310
+ function buildRelatedPathsInstruction() {
311
+ return `## Post-run: emit \`relatedPaths\` block
312
+
313
+ After all steps are complete (regardless of pass/fail) and **before** \`RUN_COMPLETED\`, you MUST emit a single \`RELATED_PATHS\` block. The host (not you) writes these paths into the spec — your only job is to emit the block.
314
+
315
+ \`relatedPaths\` is a list of glob patterns identifying the source files this spec depends on. CI uses them to decide whether a code change should trigger a drift check for this spec.
316
+
317
+ **Do NOT modify any source files.** You have only \`Read\`, \`Grep\`, and \`Glob\` for source inspection. The block you emit is the only output the host uses to update the spec.
318
+
319
+ **Inputs to consider:**
320
+ - The URLs you opened (\`AB_ACTION|open|...\`)
321
+ - The aria-labels, placeholders, and visible texts you clicked / filled / waited on
322
+ - The component / page / route files that render those strings (find them with \`Grep\`/\`Read\`/\`Glob\`)
323
+
324
+ **How to choose paths:**
325
+ 1. For each URL the test navigates to, locate the route/page file and include it (e.g. \`src/app/tasks/page.tsx\`, \`src/pages/tasks/index.tsx\`).
326
+ 2. For each unique aria-label / placeholder / visible text you interacted with, \`Grep\` the codebase, find the defining component, and include either the file or its parent feature directory.
327
+ 3. Prefer **directory globs** (e.g. \`src/features/tasks/**\`) over individual files when several related components live in the same area. Otherwise list specific files.
328
+ 4. Skip third-party files (\`node_modules/\`), build output (\`dist/\`, \`.next/\`), and generated code.
329
+ 5. Be conservative — false positives (extra paths) are fine; false negatives (missing paths) cause drift to be missed in CI. When unsure whether a path is relevant, include it.
330
+
331
+ **Output format (STRICT — one line per path, no leading dashes, no commentary inside the block):**
332
+
333
+ \`\`\`
334
+ RELATED_PATHS_BEGIN
335
+ src/features/tasks/**
336
+ src/app/tasks/page.tsx
337
+ RELATED_PATHS_END
338
+ \`\`\`
339
+
340
+ Emit the block outside any other code block, on its own lines. If the test could not exercise the feature at all (e.g. blocked early), emit the block anyway with whatever paths you can identify; emit \`RELATED_PATHS_BEGIN\` immediately followed by \`RELATED_PATHS_END\` only if you genuinely could not identify any related file.
341
+
342
+ `;
276
343
  }
277
- function buildSetupTracePrompt(spec) {
278
- return `Execute the setup procedure "${spec.title}". Follow each step precisely.`;
344
+ function buildTracePrompt(title) {
345
+ return `Execute the test for "${title}". Each step's instruction includes the URL or selector context it needs.`;
279
346
  }
280
347
  //#endregion
281
348
  //#region src/cli/logger.ts
@@ -348,9 +415,14 @@ function resolveModel(explicit) {
348
415
  return envModel && envModel.length > 0 ? envModel : void 0;
349
416
  }
350
417
  async function invokeClaudeStreaming(options, onEvent) {
351
- const { prompt, systemPrompt, allowedTools, disableBuiltinTools = false, maxTurns, env, model, onAbAction, onAbActionFailed, silenceBashLog = false } = options;
418
+ const { prompt, systemPrompt, allowedTools, disableBuiltinTools = false, maxTurns, env, model, cwd, onAbAction, onAbActionFailed, silenceBashLog = false } = options;
352
419
  const resolvedModel = resolveModel(model);
353
420
  let lastAbToolUseId = null;
421
+ const claimAbToolUse = (toolUseId) => {
422
+ if (toolUseId !== lastAbToolUseId) return false;
423
+ lastAbToolUseId = null;
424
+ return true;
425
+ };
354
426
  const sdkOptions = {
355
427
  systemPrompt,
356
428
  maxTurns,
@@ -358,6 +430,7 @@ async function invokeClaudeStreaming(options, onEvent) {
358
430
  permissionMode: "bypassPermissions",
359
431
  allowDangerouslySkipPermissions: true,
360
432
  ...resolvedModel ? { model: resolvedModel } : {},
433
+ ...cwd ? { cwd } : {},
361
434
  ...env ? { env: {
362
435
  ...process.env,
363
436
  ...env
@@ -384,13 +457,17 @@ async function invokeClaudeStreaming(options, onEvent) {
384
457
  } else lastAbToolUseId = null;
385
458
  return {};
386
459
  }] }],
460
+ PostToolUse: [{ hooks: [async (input) => {
461
+ if (input.hook_event_name !== "PostToolUse") return {};
462
+ if (input.tool_name !== "Bash") return {};
463
+ if (!isBashToolResponseError(input.tool_response)) return {};
464
+ if (claimAbToolUse(input.tool_use_id) && onAbActionFailed) onAbActionFailed();
465
+ return {};
466
+ }] }],
387
467
  PostToolUseFailure: [{ hooks: [async (input) => {
388
468
  if (input.hook_event_name !== "PostToolUseFailure") return {};
389
469
  if (input.tool_name !== "Bash") return {};
390
- if (input.tool_use_id === lastAbToolUseId && onAbActionFailed) {
391
- onAbActionFailed();
392
- lastAbToolUseId = null;
393
- }
470
+ if (claimAbToolUse(input.tool_use_id) && onAbActionFailed) onAbActionFailed();
394
471
  return {};
395
472
  }] }]
396
473
  } : void 0
@@ -460,6 +537,26 @@ function isBlockedAbSubcommand(cmd) {
460
537
  const sub = extractAbSubcommand(cmd);
461
538
  return sub !== null && BLOCKED_AB_SUBCOMMANDS.has(sub);
462
539
  }
540
+ /**
541
+ * Detects "the Bash tool returned an error" from a SDK PostToolUse hook's
542
+ * `tool_response`. The SDK can shape this two ways depending on how Claude
543
+ * Code reports Bash failures:
544
+ *
545
+ * - `{ is_error: true, ... }` — the canonical Bash failure shape
546
+ * - `{ output, exitCode, killed?, ... }` — the BashOutput shape; treat
547
+ * non-zero exit / kill as error
548
+ *
549
+ * We accept either. Anything else (including missing fields) is treated as a
550
+ * successful response so we never roll back over an unrelated tool call.
551
+ */
552
+ function isBashToolResponseError(tool_response) {
553
+ if (tool_response === null || typeof tool_response !== "object") return false;
554
+ const r = tool_response;
555
+ if (r["is_error"] === true) return true;
556
+ if (typeof r["exitCode"] === "number" && r["exitCode"] !== 0) return true;
557
+ if (r["killed"] === true) return true;
558
+ return false;
559
+ }
463
560
  /** Returns true if any argument to an agent-browser command uses a @ref selector (e.g. @e14). */
464
561
  function hasRefSelector(cmd) {
465
562
  const abIdx = cmd.indexOf("agent-browser");
@@ -521,20 +618,261 @@ async function* replayMockMessages(path) {
521
618
  }
522
619
  }
523
620
  //#endregion
621
+ //#region src/runtime/env-vars.ts
622
+ const ENV_VAR_RE = /\$\{([A-Z_][A-Z0-9_]*)\}|\$([A-Z_][A-Z0-9_]*)/g;
623
+ const ANY_VAR_RE = /\$\{([A-Za-z_][A-Za-z0-9_]*)\}|\$([A-Za-z_][A-Za-z0-9_]*)/g;
624
+ /**
625
+ * Replace every `$NAME` / `${NAME}` reference in `value` using `lookup`. When
626
+ * `lookup` returns `undefined`, the original reference text is preserved
627
+ * (callers that want empty-string substitution should wrap with `?? ""`).
628
+ */
629
+ function substituteVars(value, lookup) {
630
+ ANY_VAR_RE.lastIndex = 0;
631
+ return value.replace(ANY_VAR_RE, (match, braced, plain) => {
632
+ const replacement = lookup(braced ?? plain ?? "");
633
+ return replacement === void 0 ? match : replacement;
634
+ });
635
+ }
636
+ /**
637
+ * Resolve every `$VAR` / `${VAR}` reference against the current process env.
638
+ *
639
+ * Missing variables expand to the empty string, mirroring `sh` behaviour.
640
+ * Throwing would force ccqa to be invoked with every var set even for
641
+ * unused blocks, which is more user-hostile than letting the test fail
642
+ * downstream with a clearer message ("login form rejected: empty password").
643
+ */
644
+ function resolveEnvRefs(value) {
645
+ return value.replace(ENV_VAR_RE, (_, braced, plain) => {
646
+ const name = braced ?? plain ?? "";
647
+ return process.env[name] ?? "";
648
+ });
649
+ }
650
+ /**
651
+ * Embed `$VAR` / `${VAR}` as a JS template-literal expression that reads
652
+ * `process.env.VAR ?? ""` at runtime. Used by `ccqa generate` so the test
653
+ * script never bakes in the secret value.
654
+ *
655
+ * Returns a JavaScript string-literal expression (template literal when env
656
+ * refs are present, plain string literal otherwise).
657
+ *
658
+ * Examples:
659
+ * "${PASSWORD}" -> '`${process.env.PASSWORD ?? ""}`'
660
+ * "user-${SUFFIX}@x.com" -> '`user-${process.env.SUFFIX ?? ""}@x.com`'
661
+ * "literal value" -> '"literal value"'
662
+ */
663
+ function envRefsToJsExpression(value) {
664
+ return refsToJsExpression(value, () => null);
665
+ }
666
+ /**
667
+ * Generalised version of `envRefsToJsExpression`. Each `$NAME` / `${NAME}`
668
+ * reference in `value` is passed to `nameToExpr(name)` first:
669
+ *
670
+ * - If it returns a string, that string is interpolated as a JS expression
671
+ * (no quoting / no `?? ""` wrap — the caller decides the shape).
672
+ * - If it returns `null`, the reference is treated as a missing env var
673
+ * and expands to `process.env.<NAME> ?? ""` (the legacy behaviour).
674
+ *
675
+ * Used by the block codegen path: param names map to `params.<name>`,
676
+ * everything else falls through to `process.env.X ?? ""`.
677
+ */
678
+ function refsToJsExpression(value, nameToExpr) {
679
+ ANY_VAR_RE.lastIndex = 0;
680
+ if (!ANY_VAR_RE.test(value)) return JSON.stringify(value);
681
+ const escaped = value.replace(/\\/g, "\\\\").replace(/`/g, "\\`").replace(/\$\{/g, (_match, offset, source) => {
682
+ const probe = new RegExp(ANY_VAR_RE.source, "g");
683
+ let m;
684
+ while ((m = probe.exec(source)) !== null) if (m.index === offset) return "${";
685
+ return "\\${";
686
+ });
687
+ ANY_VAR_RE.lastIndex = 0;
688
+ return `\`${escaped.replace(ANY_VAR_RE, (_match, braced, plain) => {
689
+ const name = braced ?? plain ?? "";
690
+ const expr = nameToExpr(name);
691
+ return expr !== null ? `\${${expr}}` : `\${process.env.${name} ?? ""}`;
692
+ })}\``;
693
+ }
694
+ //#endregion
695
+ //#region src/spec/yaml-schema.ts
696
+ /**
697
+ * An action step: one user-facing browser interaction. `instruction` and
698
+ * `expected` are the natural-language description handed to Claude during
699
+ * `ccqa trace`. URLs live inside `instruction`, either verbatim or via
700
+ * `${ENV_VAR}` references (resolved at runtime).
701
+ */
702
+ const ActionStepSchema = z.object({
703
+ instruction: z.string().min(1),
704
+ expected: z.string().min(1)
705
+ }).strict();
706
+ /**
707
+ * An include step: invokes a reusable block (`.ccqa/blocks/<name>/spec.yaml`).
708
+ * `params` values are plain strings; env refs (`${VAR}`) inside them are
709
+ * resolved at expand time the same way step instructions are.
710
+ */
711
+ const IncludeStepSchema = z.object({
712
+ include: z.string().min(1),
713
+ params: z.record(z.string(), z.string()).optional()
714
+ }).strict();
715
+ /**
716
+ * A spec step is either an action step or an include step. The two are
717
+ * discriminated by the presence of the `include` key — see `isIncludeStep`.
718
+ */
719
+ const StepSchema = z.union([ActionStepSchema, IncludeStepSchema]);
720
+ /** Top-level spec schema. `.strict()` rejects any unknown key. */
721
+ const TestSpecSchema = z.object({
722
+ title: z.string().min(1),
723
+ relatedPaths: z.array(z.string().min(1)).optional(),
724
+ steps: z.array(StepSchema).min(1)
725
+ }).strict();
726
+ /**
727
+ * A block param declaration. `required` defaults to true; only explicit
728
+ * `required: false` makes it optional. `secret: true` flags the value as
729
+ * sensitive — codegen renders such values as `process.env.<NAME> ?? ""`
730
+ * template literals so the secret never ends up baked into test.spec.ts.
731
+ * `dummy` is a placeholder value surfaced by the draft / drift prompts
732
+ * (which see the block in isolation, before any include site exists);
733
+ * `description` is the param's semantic role, also consumed by those
734
+ * prompts and by spec authors browsing the block.
735
+ */
736
+ const BlockParamSchema = z.object({
737
+ name: z.string().min(1),
738
+ required: z.boolean().optional(),
739
+ secret: z.boolean().optional(),
740
+ dummy: z.string().optional(),
741
+ description: z.string().optional()
742
+ }).strict();
743
+ /**
744
+ * Block schema. Block steps are restricted to ActionStep — nested blocks are
745
+ * forbidden. Including a block from inside another block fails parsing here
746
+ * (the store layer maps the cryptic "Unrecognized key: 'include'" error into
747
+ * a targeted nested-block message).
748
+ */
749
+ const BlockSpecSchema = z.object({
750
+ title: z.string().min(1),
751
+ params: z.array(BlockParamSchema).optional(),
752
+ steps: z.array(ActionStepSchema).min(1)
753
+ }).strict();
754
+ /** Runtime predicate for the StepSchema union. */
755
+ function isIncludeStep(step) {
756
+ return "include" in step;
757
+ }
758
+ /** Returns true if a block param is required (default: true). */
759
+ function isParamRequired(param) {
760
+ return param.required !== false;
761
+ }
762
+ //#endregion
763
+ //#region src/spec/parser.ts
764
+ /** Parse a spec.yaml. Schema rejections are rewritten with actionable messages. */
765
+ function parseTestSpec(content, source = "spec.yaml") {
766
+ const raw = parseYamlOrThrow(content, source);
767
+ try {
768
+ return TestSpecSchema.parse(raw);
769
+ } catch (e) {
770
+ throw enrichZodError(e, source, false);
771
+ }
772
+ }
773
+ /**
774
+ * Parse a block's spec.yaml. Block-specific errors include the targeted
775
+ * nested-block message (the underlying zod failure on an `include` key
776
+ * inside a block step is hard to read).
777
+ */
778
+ function parseBlockSpec(content, source = "block spec.yaml") {
779
+ const raw = parseYamlOrThrow(content, source);
780
+ try {
781
+ return BlockSpecSchema.parse(raw);
782
+ } catch (e) {
783
+ throw enrichZodError(e, source, true);
784
+ }
785
+ }
786
+ function parseYamlOrThrow(content, source) {
787
+ try {
788
+ return parse(content);
789
+ } catch (e) {
790
+ throw new Error(`Failed to parse YAML (${source}): ${e.message}`);
791
+ }
792
+ }
793
+ function enrichZodError(error, source, isBlock) {
794
+ if (!(error instanceof ZodError)) return error;
795
+ const lines = [`Invalid ${source}:`];
796
+ for (const issue of error.issues) {
797
+ const path = issue.path.join(".") || "(root)";
798
+ const message = humanizeIssue(issue, isBlock);
799
+ lines.push(` - ${path}: ${message}`);
800
+ }
801
+ return new Error(lines.join("\n"));
802
+ }
803
+ function humanizeIssue(issue, isBlock) {
804
+ if (issue.code === "unrecognized_keys") {
805
+ const keys = Array.isArray(issue.keys) ? issue.keys : [];
806
+ if (isBlock && keys.includes("include")) return `Nested blocks are not supported — flatten by inlining the included block's steps into this block.`;
807
+ return `Unknown keys: ${keys.join(", ")}`;
808
+ }
809
+ return issue.message;
810
+ }
811
+ //#endregion
812
+ //#region src/spec/expand.ts
813
+ /**
814
+ * Walk the spec's top-level steps, inlining any `- include: <block>` reference
815
+ * as the block's own steps in order. The result is a flat `step-NN`-numbered
816
+ * sequence — block boundaries survive only as the `source` tag, so trace and
817
+ * codegen never need a separate block code path.
818
+ */
819
+ function expandSpec(spec, options) {
820
+ const out = [];
821
+ let counter = 0;
822
+ const allocId = () => {
823
+ counter += 1;
824
+ return `step-${String(counter).padStart(2, "0")}`;
825
+ };
826
+ for (const step of spec.steps) if (isIncludeStep(step)) {
827
+ const block = resolveBlock(step.include, step.params ?? {}, options.blocks);
828
+ for (const blockStep of block.steps) out.push({
829
+ id: allocId(),
830
+ source: step.include,
831
+ instruction: substituteVars(blockStep.instruction, block.lookup),
832
+ expected: substituteVars(blockStep.expected, block.lookup)
833
+ });
834
+ } else out.push({
835
+ id: allocId(),
836
+ source: "spec",
837
+ instruction: step.instruction,
838
+ expected: step.expected
839
+ });
840
+ return out;
841
+ }
842
+ function resolveBlock(blockName, rawParams, blocks) {
843
+ const block = blocks.get(blockName);
844
+ if (!block) throw new Error(`Unknown block: "${blockName}". Define it under .ccqa/blocks/${blockName}/spec.yaml.`);
845
+ const declaredParams = new Map((block.params ?? []).map((p) => [p.name, p]));
846
+ for (const key of Object.keys(rawParams)) if (!declaredParams.has(key)) throw new Error(`Block "${blockName}" received unknown param "${key}". Declared params: ${[...declaredParams.keys()].join(", ") || "(none)"}.`);
847
+ for (const [pname, def] of declaredParams) if (isParamRequired(def) && !(pname in rawParams)) throw new Error(`Block "${blockName}" is missing required param "${pname}".`);
848
+ const lookup = (name) => {
849
+ if (Object.prototype.hasOwnProperty.call(rawParams, name)) return rawParams[name];
850
+ };
851
+ return {
852
+ steps: block.steps,
853
+ lookup
854
+ };
855
+ }
856
+ /**
857
+ * Collect every block name referenced by a spec (top-level only — blocks
858
+ * cannot nest). Used by the store / drift layers to know which blocks to
859
+ * load or invalidate.
860
+ */
861
+ function collectIncludedBlockNames(spec) {
862
+ const names = /* @__PURE__ */ new Set();
863
+ for (const step of spec.steps) if (isIncludeStep(step)) names.add(step.include);
864
+ return [...names];
865
+ }
866
+ //#endregion
524
867
  //#region src/store/index.ts
525
868
  const CCQA_DIR = ".ccqa";
869
+ const SPEC_FILE = "spec.yaml";
526
870
  function getCcqaDir(cwd = process.cwd()) {
527
871
  return join(cwd, CCQA_DIR);
528
872
  }
529
- /**
530
- * Accepts both the canonical 2-segment alias and the on-disk 4-segment path
531
- * (which is what shell tab-completion produces):
532
- * - "tasks/create-and-complete"
533
- * - "features/tasks/test-cases/create-and-complete"
534
- * - ".ccqa/features/tasks/test-cases/create-and-complete"
535
- * All forms resolve to { featureName: "tasks", specName: "create-and-complete" }.
536
- * Trailing slashes are tolerated.
537
- */
873
+ function specKey(ref) {
874
+ return `${ref.featureName}/${ref.specName}`;
875
+ }
538
876
  function parseSpecPath(specPath) {
539
877
  const parts = specPath.replace(/^\.\/+/, "").replace(/\/+$/, "").split("/").filter((p) => p.length > 0);
540
878
  if (parts[0] === ".ccqa") parts.shift();
@@ -556,23 +894,44 @@ function getSpecDir(featureName, specName, cwd) {
556
894
  }
557
895
  async function ensureCcqaDir(cwd) {
558
896
  await mkdir(join(getCcqaDir(cwd), "features"), { recursive: true });
897
+ await mkdir(join(getCcqaDir(cwd), "blocks"), { recursive: true });
559
898
  }
560
899
  async function readSpecFile(featureName, specName, cwd) {
561
- const specPath = join(getSpecDir(featureName, specName, cwd), "test-spec.md");
900
+ const specPath = join(getSpecDir(featureName, specName, cwd), SPEC_FILE);
562
901
  return readFile(specPath, "utf-8").catch(() => {
563
902
  throw new Error(`Spec file not found: ${specPath}`);
564
903
  });
565
904
  }
566
905
  async function tryReadSpecFile(featureName, specName, cwd) {
567
- return readFile(join(getSpecDir(featureName, specName, cwd), "test-spec.md"), "utf-8").catch(() => null);
906
+ return readFile(join(getSpecDir(featureName, specName, cwd), SPEC_FILE), "utf-8").catch(() => null);
568
907
  }
569
908
  async function saveSpecFile(featureName, specName, content, cwd) {
570
909
  const specDir = getSpecDir(featureName, specName, cwd);
571
910
  await mkdir(specDir, { recursive: true });
572
- const specPath = join(specDir, "test-spec.md");
911
+ const specPath = join(specDir, SPEC_FILE);
573
912
  await writeFile(specPath, content.endsWith("\n") ? content : content + "\n", "utf-8");
574
913
  return specPath;
575
914
  }
915
+ /**
916
+ * Replace (or insert) the `relatedPaths` key in the spec. Preserves every
917
+ * other top-level field and the entire steps array. Returns the absolute
918
+ * path that was written, or null if the spec file does not exist.
919
+ */
920
+ async function updateSpecRelatedPaths(featureName, specName, relatedPaths, cwd) {
921
+ const specPath = join(getSpecDir(featureName, specName, cwd), SPEC_FILE);
922
+ const existing = await readFile(specPath, "utf-8").catch(() => null);
923
+ if (existing === null) return null;
924
+ await writeFile(specPath, stringify(stripUndefined({
925
+ ...parseTestSpec(existing, specPath),
926
+ relatedPaths: relatedPaths.length > 0 ? relatedPaths : void 0
927
+ }), { lineWidth: 0 }), "utf-8");
928
+ return specPath;
929
+ }
930
+ function stripUndefined(obj) {
931
+ const out = {};
932
+ for (const [k, v] of Object.entries(obj)) if (v !== void 0) out[k] = v;
933
+ return out;
934
+ }
576
935
  async function saveRoute(featureName, specName, route, cwd) {
577
936
  const specDir = getSpecDir(featureName, specName, cwd);
578
937
  await mkdir(specDir, { recursive: true });
@@ -587,38 +946,72 @@ async function saveTraceActions(featureName, specName, actions, cwd) {
587
946
  await writeFile(actionsPath, JSON.stringify(actions, null, 2), "utf-8");
588
947
  return actionsPath;
589
948
  }
590
- function getSetupDir(name, cwd) {
591
- return join(getCcqaDir(cwd), "setups", name);
949
+ function getBlocksDir(cwd) {
950
+ return join(getCcqaDir(cwd), "blocks");
592
951
  }
593
- async function readSetupSpecFile(name, cwd) {
594
- const specPath = join(getSetupDir(name, cwd), "setup-spec.md");
595
- return readFile(specPath, "utf-8").catch(() => {
596
- throw new Error(`Setup spec not found: ${specPath}`);
597
- });
952
+ /**
953
+ * Inverse of `getBlockDir`. Given a file path that appears in a git diff,
954
+ * return the block name if the path points at the block's spec.yaml, else
955
+ * null. Used by `drift --changed` to invalidate specs whose included blocks
956
+ * were edited. (v0.4 inlines blocks into every spec's own trace, so the
957
+ * block directory holds only spec.yaml — no per-block actions.json / route
958
+ * lives here anymore.)
959
+ */
960
+ function parseBlockPath(path) {
961
+ return path.match(/(?:^|\/)\.ccqa\/blocks\/([^/]+)\/spec\.yaml$/)?.[1] ?? null;
598
962
  }
599
- async function saveSetupActions(name, actions, cwd) {
600
- const dir = getSetupDir(name, cwd);
601
- await mkdir(dir, { recursive: true });
602
- const path = join(dir, "actions.json");
603
- await writeFile(path, JSON.stringify(actions, null, 2), "utf-8");
604
- return path;
963
+ /**
964
+ * Load every block under `.ccqa/blocks/<name>/spec.yaml`. Used by the trace /
965
+ * generate / drift entry points to validate include references at parse time.
966
+ *
967
+ * A malformed block is fatal — surfaces as a thrown Error with the path that
968
+ * failed. Missing block directories (no `spec.yaml`) are silently skipped so
969
+ * stray files don't break the loader.
970
+ */
971
+ async function loadAllBlocks(cwd) {
972
+ const dir = getBlocksDir(cwd);
973
+ const names = await readdir(dir).catch(() => []);
974
+ const entries = await Promise.all(names.map(async (name) => {
975
+ const path = join(dir, name, SPEC_FILE);
976
+ const content = await readFile(path, "utf-8").catch(() => null);
977
+ return content === null ? null : [name, parseBlockSpec(content, path)];
978
+ }));
979
+ return new Map(entries.filter((e) => e !== null));
605
980
  }
606
- async function getSetupActions(name, cwd) {
607
- const path = join(getSetupDir(name, cwd), "actions.json");
608
- const content = await readFile(path, "utf-8").catch(() => {
609
- throw new Error(`No setup actions found for: ${name}. Run \`ccqa trace-setup ${name}\` first.`);
610
- });
611
- return {
612
- path,
613
- actions: JSON.parse(content)
614
- };
981
+ /**
982
+ * Project the parsed blocks into the shape the draft / drift prompts consume.
983
+ * Co-located with `loadAllBlocks` so callers don't have to remember the
984
+ * isParamRequired / secret-default mapping.
985
+ */
986
+ async function loadAvailableBlocks(cwd) {
987
+ return [...(await loadAllBlocks(cwd)).entries()].map(([name, block]) => ({
988
+ name,
989
+ title: block.title,
990
+ params: (block.params ?? []).map((p) => ({
991
+ name: p.name,
992
+ required: isParamRequired(p),
993
+ secret: p.secret === true
994
+ }))
995
+ }));
615
996
  }
616
- async function saveSetupRoute(name, route, cwd) {
617
- const dir = getSetupDir(name, cwd);
618
- await mkdir(dir, { recursive: true });
619
- const routePath = join(dir, "route.md");
620
- await writeFile(routePath, routeToMarkdown(route), "utf-8");
621
- return routePath;
997
+ /**
998
+ * Probe for orphaned files left over from earlier ccqa versions inside
999
+ * `.ccqa/blocks/<name>/`. Both pre-v0.4 `test.spec.ts` (function-export
1000
+ * blocks) and the short-lived `actions.json` / `route.md` (recorded-block
1001
+ * variant) are dead in the new "blocks are pure spec templates" model and
1002
+ * should be deleted manually. Returns the absolute paths.
1003
+ */
1004
+ async function findStaleBlockArtifacts(cwd) {
1005
+ const dir = getBlocksDir(cwd);
1006
+ const names = await readdir(dir).catch(() => []);
1007
+ return (await Promise.all(names.flatMap((name) => [
1008
+ "test.spec.ts",
1009
+ "actions.json",
1010
+ "route.md"
1011
+ ].map(async (f) => {
1012
+ const path = join(dir, name, f);
1013
+ return await stat(path).then(() => true).catch(() => false) ? path : null;
1014
+ })))).filter((p) => p !== null);
622
1015
  }
623
1016
  async function getTraceActions(featureName, specName, cwd) {
624
1017
  const path = join(getSpecDir(featureName, specName, cwd), "actions.json");
@@ -660,8 +1053,7 @@ async function listSpecsForFeature(featureName, cwd) {
660
1053
  }
661
1054
  /**
662
1055
  * Lists every feature/spec dir under .ccqa/features/, regardless of whether
663
- * the spec is fully drafted yet. Used by `ccqa draft` to suggest non-colliding
664
- * feature/spec names that fit the existing structure.
1056
+ * the spec is fully drafted yet. Each spec file is read at most once.
665
1057
  */
666
1058
  async function listFeatureTree(cwd) {
667
1059
  const featuresDir = join(getCcqaDir(cwd), "features");
@@ -672,16 +1064,27 @@ async function listFeatureTree(cwd) {
672
1064
  return {
673
1065
  featureName,
674
1066
  specs: await Promise.all(specDirs.map(async (specName) => {
675
- const content = await readFile(join(testCasesDir, specName, "test-spec.md"), "utf-8").catch(() => null);
1067
+ const specFile = join(testCasesDir, specName, SPEC_FILE);
1068
+ const content = await readFile(specFile, "utf-8").catch(() => null);
676
1069
  if (content === null) return {
677
1070
  specName,
678
1071
  hasSpecFile: false
679
1072
  };
680
- return {
681
- specName,
682
- hasSpecFile: true,
683
- title: content.match(/^title:\s*"?([^"\n]+)"?/m)?.[1]?.trim()
684
- };
1073
+ try {
1074
+ const spec = parseTestSpec(content, specFile);
1075
+ const entry = {
1076
+ specName,
1077
+ hasSpecFile: true,
1078
+ includedBlocks: collectIncludedBlockNames(spec)
1079
+ };
1080
+ if (spec.relatedPaths) entry.relatedPaths = spec.relatedPaths;
1081
+ return entry;
1082
+ } catch {
1083
+ return {
1084
+ specName,
1085
+ hasSpecFile: true
1086
+ };
1087
+ }
685
1088
  }))
686
1089
  };
687
1090
  }));
@@ -706,196 +1109,67 @@ function routeToMarkdown(route) {
706
1109
  return lines.join("\n");
707
1110
  }
708
1111
  //#endregion
709
- //#region src/spec/parser.ts
710
- function parseTestSpec(content) {
711
- const { data, content: body } = matter(content);
712
- const steps = parseSteps(body);
713
- const prerequisites = parsePrerequisites(body);
714
- return {
715
- title: String(data["title"] ?? "Untitled"),
716
- baseUrl: String(data["baseUrl"] ?? "http://localhost:3000"),
717
- prerequisites: prerequisites || void 0,
718
- setups: parseSetupRefs(data["setups"]),
719
- steps
720
- };
721
- }
722
- function parseSetupSpec(content) {
723
- const { data, content: body } = matter(content);
724
- const steps = parseSteps(body);
725
- const placeholders = parsePlaceholders(data["placeholders"]);
726
- return {
727
- title: String(data["title"] ?? "Untitled"),
728
- placeholders: Object.keys(placeholders).length > 0 ? placeholders : void 0,
729
- steps
730
- };
731
- }
732
- function parsePlaceholders(raw) {
733
- if (!raw || typeof raw !== "object") return {};
734
- const result = {};
735
- for (const [key, val] of Object.entries(raw)) if (val && typeof val === "object" && "dummy" in val) {
736
- const v = val;
737
- result[key] = {
738
- dummy: String(v["dummy"]),
739
- description: v["description"] ? String(v["description"]) : void 0
740
- };
741
- }
742
- return result;
743
- }
744
- function parseSetupRefs(raw) {
745
- if (!Array.isArray(raw)) return void 0;
746
- const refs = [];
747
- for (const item of raw) if (typeof item === "object" && item !== null && "name" in item) {
748
- const i = item;
749
- refs.push({
750
- name: String(i["name"]),
751
- params: i["params"] && typeof i["params"] === "object" ? Object.fromEntries(Object.entries(i["params"]).map(([k, v]) => [k, String(v)])) : void 0
752
- });
753
- }
754
- return refs.length > 0 ? refs : void 0;
755
- }
756
- function parsePrerequisites(body) {
757
- const match = body.match(/##\s+Prerequisites\s+([\s\S]*?)(?=##|$)/);
758
- if (!match || !match[1]) return null;
759
- return match[1].trim();
760
- }
761
- function parseSteps(body) {
762
- const stepBlocks = body.split(/###\s+Step\s+\d+:/);
763
- const steps = [];
764
- for (let i = 1; i < stepBlocks.length; i++) {
765
- const block = stepBlocks[i];
766
- if (!block) continue;
767
- const titleMatch = block.match(/^(.+)/);
768
- const instructionMatch = block.match(/\*\*Instruction\*\*:\s*(.+)/);
769
- const expectedMatch = block.match(/\*\*Expected\*\*:\s*(.+)/);
770
- if (!titleMatch || !instructionMatch || !expectedMatch) continue;
771
- steps.push({
772
- id: `step-${String(i).padStart(2, "0")}`,
773
- title: titleMatch[1]?.trim() ?? "",
774
- instruction: instructionMatch[1]?.trim() ?? "",
775
- expected: expectedMatch[1]?.trim() ?? ""
776
- });
777
- }
778
- return steps;
1112
+ //#region src/cli/stale-blocks.ts
1113
+ /**
1114
+ * Hint when stale per-block artifacts (`test.spec.ts`, `actions.json`,
1115
+ * `route.md`) from earlier ccqa versions are still present. v0.4 treats
1116
+ * blocks as pure spec templates — they no longer have their own executable
1117
+ * or recorded artifacts, so these files are dead code and should be deleted
1118
+ * manually. Shared by `trace` and `generate`.
1119
+ */
1120
+ async function warnStaleBlockArtifacts() {
1121
+ const stale = await findStaleBlockArtifacts();
1122
+ if (stale.length === 0) return;
1123
+ for (const p of stale) hint(`stale block artifact detected: ${p} — v0.4 no longer uses these; delete it manually.`);
779
1124
  }
780
1125
  //#endregion
781
- //#region src/runtime/bundled-config.ts
782
- const CANDIDATES = [
783
- "../runtime/vitest.config.mjs",
784
- "./vitest.config.mjs",
785
- "./vitest.config.ts"
786
- ];
787
- function bundledVitestConfigPath() {
788
- for (const rel of CANDIDATES) {
789
- const candidate = fileURLToPath(new URL(rel, import.meta.url));
790
- try {
791
- accessSync(candidate);
792
- return candidate;
793
- } catch {}
1126
+ //#region src/drift/parse-related-paths.ts
1127
+ /**
1128
+ * Pull a `RELATED_PATHS_BEGIN ... RELATED_PATHS_END` block out of the trace
1129
+ * agent's combined text output. Lines inside the block become entries; blank
1130
+ * lines, bullet markers, and code fences are tolerated. Returns null when the
1131
+ * agent did not emit a block at all so the caller can warn instead of silently
1132
+ * clearing the spec's existing relatedPaths.
1133
+ */
1134
+ function parseRelatedPathsBlock(text) {
1135
+ const match = text.match(/RELATED_PATHS_BEGIN\s*\n?([\s\S]*?)\n?RELATED_PATHS_END/);
1136
+ if (!match || match[1] === void 0) return null;
1137
+ const seen = /* @__PURE__ */ new Set();
1138
+ const out = [];
1139
+ for (const raw of match[1].split("\n")) {
1140
+ const line = raw.replace(/^```.*$/, "").trim();
1141
+ if (!line) continue;
1142
+ const cleaned = line.replace(/^[-*]\s+/, "").trim();
1143
+ if (!cleaned || seen.has(cleaned)) continue;
1144
+ seen.add(cleaned);
1145
+ out.push(cleaned);
794
1146
  }
795
- return fileURLToPath(new URL("./vitest.config.ts", import.meta.url));
1147
+ return out;
796
1148
  }
797
1149
  //#endregion
798
- //#region src/runtime/spawn-vitest.ts
1150
+ //#region src/runtime/agent-browser-bin.ts
799
1151
  const require$2 = createRequire(import.meta.url);
800
- function resolveVitestBin() {
801
- const pkgPath = require$2.resolve("vitest/package.json");
802
- const pkg = require$2(pkgPath);
803
- const binRel = typeof pkg.bin === "string" ? pkg.bin : pkg.bin?.vitest;
804
- if (!binRel) throw new Error(`vitest package.json has no bin entry (resolved at ${pkgPath})`);
805
- return resolve(dirname(pkgPath), binRel);
1152
+ function hasAgentBrowserShim(dir) {
1153
+ try {
1154
+ statSync(join(dir, "agent-browser"));
1155
+ return true;
1156
+ } catch {
1157
+ return false;
1158
+ }
806
1159
  }
807
- async function spawnVitestCaptured(args, opts = {}) {
808
- const child = spawnVitestChild(args, opts, "pipe");
809
- const [stdout, stderr, exitCode] = await Promise.all([
810
- drain(child.stdout),
811
- drain(child.stderr),
812
- waitExit(child)
813
- ]);
814
- return {
815
- exitCode,
816
- stdout,
817
- stderr
818
- };
819
- }
820
- async function spawnVitestTeed(args, opts = {}) {
821
- const child = spawnVitestChild(args, opts, "pipe");
822
- const [stdout, stderr, exitCode] = await Promise.all([
823
- teeDrain(child.stdout, process.stdout),
824
- teeDrain(child.stderr, process.stderr),
825
- waitExit(child)
826
- ]);
827
- return {
828
- exitCode,
829
- stdout,
830
- stderr
831
- };
832
- }
833
- function spawnVitestStreaming(args, opts = {}) {
834
- const child = spawnVitestChild(args, opts, "pipe");
835
- return {
836
- child,
837
- stdout: child.stdout,
838
- stderr: child.stderr,
839
- exited: waitExit(child)
840
- };
841
- }
842
- function spawnVitestChild(args, opts, stdio) {
843
- const vitestBin = resolveVitestBin();
844
- return spawn(process.execPath, [vitestBin, ...args], {
845
- cwd: opts.cwd,
846
- env: opts.env ?? process.env,
847
- stdio: [
848
- "ignore",
849
- stdio,
850
- stdio
851
- ]
852
- });
853
- }
854
- async function drain(stream) {
855
- stream.setEncoding("utf8");
856
- let buf = "";
857
- for await (const chunk of stream) buf += chunk;
858
- return buf;
859
- }
860
- async function teeDrain(stream, sink) {
861
- stream.setEncoding("utf8");
862
- let buf = "";
863
- for await (const chunk of stream) {
864
- buf += chunk;
865
- sink.write(chunk);
866
- }
867
- return buf;
868
- }
869
- function waitExit(child) {
870
- return new Promise((resolvePromise, rejectPromise) => {
871
- child.once("exit", (code) => resolvePromise(code ?? 0));
872
- child.once("error", rejectPromise);
873
- });
874
- }
875
- //#endregion
876
- //#region src/runtime/agent-browser-bin.ts
877
- const require$1 = createRequire(import.meta.url);
878
- function hasAgentBrowserShim(dir) {
879
- try {
880
- statSync(join(dir, "agent-browser"));
881
- return true;
882
- } catch {
883
- return false;
884
- }
885
- }
886
- /**
887
- * Walks up from `start` looking for a `node_modules/.bin/agent-browser` shim.
888
- * Returns the .bin directory containing the shim, or null if none is found.
889
- */
890
- function findNodeModulesBin(start) {
891
- let cur = start;
892
- while (true) {
893
- const candidate = join(cur, "node_modules", ".bin");
894
- if (hasAgentBrowserShim(candidate)) return candidate;
895
- const parent = dirname(cur);
896
- if (parent === cur) return null;
897
- cur = parent;
898
- }
1160
+ /**
1161
+ * Walks up from `start` looking for a `node_modules/.bin/agent-browser` shim.
1162
+ * Returns the .bin directory containing the shim, or null if none is found.
1163
+ */
1164
+ function findNodeModulesBin(start) {
1165
+ let cur = start;
1166
+ while (true) {
1167
+ const candidate = join(cur, "node_modules", ".bin");
1168
+ if (hasAgentBrowserShim(candidate)) return candidate;
1169
+ const parent = dirname(cur);
1170
+ if (parent === cur) return null;
1171
+ cur = parent;
1172
+ }
899
1173
  }
900
1174
  /**
901
1175
  * Resolves the directory containing the `agent-browser` shim that npm/pnpm
@@ -908,10 +1182,10 @@ function findNodeModulesBin(start) {
908
1182
  function resolveAgentBrowserBinDir() {
909
1183
  const fromCwd = findNodeModulesBin(process.cwd());
910
1184
  if (fromCwd) return fromCwd;
911
- const fromSelf = findNodeModulesBin(dirname(require$1.resolve("agent-browser/package.json")));
1185
+ const fromSelf = findNodeModulesBin(dirname(require$2.resolve("agent-browser/package.json")));
912
1186
  if (fromSelf) return fromSelf;
913
1187
  try {
914
- const candidate = join(dirname(require$1.resolve("agent-browser/package.json")), "node_modules", ".bin");
1188
+ const candidate = join(dirname(require$2.resolve("agent-browser/package.json")), "node_modules", ".bin");
915
1189
  if (hasAgentBrowserShim(candidate)) return candidate;
916
1190
  } catch {}
917
1191
  return null;
@@ -971,54 +1245,197 @@ function formatAgentBrowserUnavailableMessage() {
971
1245
  ].join("\n");
972
1246
  }
973
1247
  //#endregion
974
- //#region src/runtime/env-vars.ts
975
- const ENV_VAR_RE = /\$\{([A-Z_][A-Z0-9_]*)\}|\$([A-Z_][A-Z0-9_]*)/g;
1248
+ //#region src/runtime/replay-validate.ts
1249
+ const SHORT_TIMEOUT_MS = 5e3;
1250
+ const ASSERT_TIMEOUT_MS = 1e4;
976
1251
  /**
977
- * Returns true if the value contains at least one `$VAR` or `${VAR}` reference.
1252
+ * Convert one recorded action into the `agent-browser` arg list that would
1253
+ * exercise it. Returns `null` for actions that should not be validated
1254
+ * (snapshot has no side effect; assert types whose codegen forms aren't
1255
+ * directly verifiable here fall through to the caller's `unverifiable`
1256
+ * fallback).
978
1257
  */
979
- function hasEnvRef(value) {
980
- ENV_VAR_RE.lastIndex = 0;
981
- return ENV_VAR_RE.test(value);
1258
+ function actionToAbArgs(action, sessionName) {
1259
+ const base = ["--session", sessionName];
1260
+ const sub = (s) => s === void 0 ? "" : resolveEnvRefs(s);
1261
+ switch (action.command) {
1262
+ case "cookies_clear": return [
1263
+ ...base,
1264
+ "cookies",
1265
+ "clear"
1266
+ ];
1267
+ case "open": return [
1268
+ ...base,
1269
+ "open",
1270
+ sub(action.value).replace(/^["']|["']$/g, "")
1271
+ ];
1272
+ case "click": return [
1273
+ ...base,
1274
+ "click",
1275
+ sub(action.selector)
1276
+ ];
1277
+ case "dblclick": return [
1278
+ ...base,
1279
+ "dblclick",
1280
+ sub(action.selector)
1281
+ ];
1282
+ case "fill":
1283
+ case "type": return [
1284
+ ...base,
1285
+ "fill",
1286
+ sub(action.selector),
1287
+ sub(action.value)
1288
+ ];
1289
+ case "check": return [
1290
+ ...base,
1291
+ "check",
1292
+ sub(action.selector)
1293
+ ];
1294
+ case "uncheck": return [
1295
+ ...base,
1296
+ "uncheck",
1297
+ sub(action.selector)
1298
+ ];
1299
+ case "press": return [
1300
+ ...base,
1301
+ "press",
1302
+ sub(action.value)
1303
+ ];
1304
+ case "select": return [
1305
+ ...base,
1306
+ "select",
1307
+ sub(action.selector),
1308
+ sub(action.value)
1309
+ ];
1310
+ case "hover": return [
1311
+ ...base,
1312
+ "hover",
1313
+ sub(action.selector)
1314
+ ];
1315
+ case "scroll": {
1316
+ const args = [action.direction ?? "down", ...action.pixels ? [action.pixels] : []];
1317
+ return [
1318
+ ...base,
1319
+ "scroll",
1320
+ ...args
1321
+ ];
1322
+ }
1323
+ case "drag": return [
1324
+ ...base,
1325
+ "drag",
1326
+ sub(action.selector),
1327
+ sub(action.target)
1328
+ ];
1329
+ case "wait": {
1330
+ const raw = sub(action.selector);
1331
+ if (!raw) return null;
1332
+ if (/^\d+$/.test(raw)) return null;
1333
+ if (raw.startsWith("text=")) return [
1334
+ ...base,
1335
+ "wait",
1336
+ "--text",
1337
+ raw.slice(5),
1338
+ "--timeout",
1339
+ String(SHORT_TIMEOUT_MS)
1340
+ ];
1341
+ return [
1342
+ ...base,
1343
+ "wait",
1344
+ raw,
1345
+ "--timeout",
1346
+ String(SHORT_TIMEOUT_MS)
1347
+ ];
1348
+ }
1349
+ case "snapshot": return null;
1350
+ case "assert": return assertToAbArgs(action, sub, sessionName);
1351
+ }
982
1352
  }
983
- /**
984
- * Resolve every `$VAR` / `${VAR}` reference against the current process env.
985
- *
986
- * Missing variables expand to the empty string, mirroring `sh` behaviour.
987
- * Throwing would force ccqa to be invoked with every var set even for
988
- * unused setups, which is more user-hostile than letting the test fail
989
- * downstream with a clearer message ("login form rejected: empty password").
990
- */
991
- function resolveEnvRefs(value) {
992
- return value.replace(ENV_VAR_RE, (_, braced, plain) => {
993
- const name = braced ?? plain ?? "";
994
- return process.env[name] ?? "";
995
- });
1353
+ function assertToAbArgs(action, sub, sessionName) {
1354
+ const base = ["--session", sessionName];
1355
+ const val = sub(action.value ?? action.observation);
1356
+ const sel = sub(action.selector ?? action.observation);
1357
+ switch (action.assertType) {
1358
+ case "text_visible":
1359
+ if (!val) return null;
1360
+ return [
1361
+ ...base,
1362
+ "wait",
1363
+ "--text",
1364
+ val,
1365
+ "--timeout",
1366
+ String(ASSERT_TIMEOUT_MS)
1367
+ ];
1368
+ case "text_not_visible": return null;
1369
+ case "element_visible":
1370
+ if (!sel) return null;
1371
+ return [
1372
+ ...base,
1373
+ "wait",
1374
+ sel,
1375
+ "--timeout",
1376
+ String(ASSERT_TIMEOUT_MS)
1377
+ ];
1378
+ case "element_not_visible": return null;
1379
+ case "url_contains": return null;
1380
+ case "element_enabled":
1381
+ case "element_disabled":
1382
+ case "element_checked":
1383
+ case "element_unchecked":
1384
+ if (!sel || sel.startsWith("text=") || sel.startsWith("[aria-label=")) return null;
1385
+ return [
1386
+ ...base,
1387
+ "wait",
1388
+ sel,
1389
+ "--timeout",
1390
+ String(ASSERT_TIMEOUT_MS)
1391
+ ];
1392
+ default: return null;
1393
+ }
1394
+ }
1395
+ function validateActions(actions, opts) {
1396
+ const kept = [];
1397
+ const dropped = [];
1398
+ let skipUntilSideEffect = false;
1399
+ for (let i = 0; i < actions.length; i++) {
1400
+ const action = actions[i];
1401
+ if (skipUntilSideEffect && isPassiveCommand(action.command)) {
1402
+ dropped.push({
1403
+ index: i,
1404
+ action,
1405
+ reason: "skipped after a preceding action failed"
1406
+ });
1407
+ continue;
1408
+ }
1409
+ skipUntilSideEffect = false;
1410
+ const args = actionToAbArgs(action, opts.sessionName);
1411
+ if (args === null) {
1412
+ kept.push(action);
1413
+ continue;
1414
+ }
1415
+ const result = spawnAB(args);
1416
+ if (result.status === 0) {
1417
+ kept.push(action);
1418
+ continue;
1419
+ }
1420
+ dropped.push({
1421
+ index: i,
1422
+ action,
1423
+ reason: (result.stderr.trim() || result.stdout.trim() || `agent-browser exit ${result.status ?? "?"}`).slice(0, 200)
1424
+ });
1425
+ skipUntilSideEffect = true;
1426
+ }
1427
+ return {
1428
+ kept,
1429
+ dropped
1430
+ };
996
1431
  }
997
1432
  /**
998
- * Embed `$VAR` / `${VAR}` as a JS template-literal expression that reads
999
- * `process.env.VAR ?? ""` at runtime. Used by `ccqa generate` so the test
1000
- * script never bakes in the secret value.
1001
- *
1002
- * Returns a JavaScript string-literal expression (template literal when env
1003
- * refs are present, plain string literal otherwise).
1004
- *
1005
- * Examples:
1006
- * "${PASSWORD}" -> '`${process.env.PASSWORD ?? ""}`'
1007
- * "user-${SUFFIX}@x.com" -> '`user-${process.env.SUFFIX ?? ""}@x.com`'
1008
- * "literal value" -> '"literal value"'
1433
+ * Passive (read-only) commands whose only effect is observation. When a
1434
+ * preceding action fails, dropping these too is the right move because
1435
+ * they were trying to observe state the failed action would have set up.
1009
1436
  */
1010
- function envRefsToJsExpression(value) {
1011
- if (!hasEnvRef(value)) return JSON.stringify(value);
1012
- const escaped = value.replace(/\\/g, "\\\\").replace(/`/g, "\\`").replace(/\$\{/g, (match, offset, source) => {
1013
- ENV_VAR_RE.lastIndex = 0;
1014
- let m;
1015
- while ((m = ENV_VAR_RE.exec(source)) !== null) if (m.index === offset) return "${";
1016
- return "\\${";
1017
- });
1018
- ENV_VAR_RE.lastIndex = 0;
1019
- return `\`${escaped.replace(ENV_VAR_RE, (_, braced, plain) => {
1020
- return `\${process.env.${braced ?? plain ?? ""} ?? ""}`;
1021
- })}\``;
1437
+ function isPassiveCommand(cmd) {
1438
+ return cmd === "snapshot" || cmd === "wait" || cmd === "assert";
1022
1439
  }
1023
1440
  //#endregion
1024
1441
  //#region src/cli/trace.ts
@@ -1038,29 +1455,35 @@ async function runTrace(featureName, specName, model) {
1038
1455
  throw e;
1039
1456
  }
1040
1457
  await ensureCcqaDir();
1458
+ await warnStaleBlockArtifacts();
1041
1459
  const spec = parseTestSpec(await readSpecFile(featureName, specName));
1042
- const hasSetups = (spec.setups?.length ?? 0) > 0;
1460
+ const expanded = expandSpec(spec, { blocks: await loadAllBlocks() });
1043
1461
  meta("spec", spec.title);
1044
- meta("url", spec.baseUrl);
1045
- if (hasSetups) meta("setups", spec.setups.map((s) => s.name).join(", "));
1046
- meta("steps", spec.steps.length);
1462
+ meta("steps", expanded.length);
1463
+ const includes = collectIncludedBlockNames(spec);
1464
+ if (includes.length > 0) meta("blocks", includes.join(", "));
1047
1465
  blank();
1048
1466
  const sessionName = generateSessionName();
1049
- if (hasSetups) {
1050
- info("Running setup procedures...");
1051
- await runSetups(spec.setups, sessionName);
1052
- blank();
1053
- }
1054
- const systemPrompt = buildTraceSystemPrompt(spec, {
1055
- sessionName,
1056
- skipCookiesClear: hasSetups
1467
+ const systemPrompt = buildTraceSystemPrompt({
1468
+ title: spec.title,
1469
+ steps: expanded,
1470
+ sessionName
1057
1471
  });
1058
- const prompt = buildTracePrompt(spec);
1472
+ const prompt = buildTracePrompt(spec.title);
1059
1473
  info("Running agent-browser session...");
1060
1474
  blank();
1061
1475
  const routeSteps = [];
1062
1476
  let overallStatus = "passed";
1063
1477
  const traceActions = [];
1478
+ let currentStepId;
1479
+ let relatedPathsBuffer = null;
1480
+ const withStepId = (action) => {
1481
+ if (!action) return null;
1482
+ return currentStepId ? {
1483
+ ...action,
1484
+ stepId: currentStepId
1485
+ } : action;
1486
+ };
1064
1487
  const { isError } = await invokeClaudeStreaming({
1065
1488
  prompt,
1066
1489
  systemPrompt,
@@ -1076,7 +1499,7 @@ async function runTrace(featureName, specName, model) {
1076
1499
  },
1077
1500
  model,
1078
1501
  onAbAction: (abAction) => {
1079
- const action = parseAbAction(abAction);
1502
+ const action = withStepId(parseAbAction(abAction));
1080
1503
  if (action) traceActions.push(action);
1081
1504
  },
1082
1505
  onAbActionFailed: () => {
@@ -1087,10 +1510,19 @@ async function runTrace(featureName, specName, model) {
1087
1510
  for (const block of msg.message.content ?? []) {
1088
1511
  if (block.type !== "text" || !block.text) continue;
1089
1512
  const text = block.text;
1090
- const statusLine = parseStatusLine(text);
1091
- if (statusLine) step(statusLine.type, statusLine.stepId, statusLine.detail);
1513
+ if (relatedPathsBuffer !== null) relatedPathsBuffer += text + "\n";
1514
+ else {
1515
+ const idx = text.indexOf("RELATED_PATHS_BEGIN");
1516
+ if (idx !== -1) relatedPathsBuffer = text.slice(idx) + "\n";
1517
+ }
1092
1518
  for (const line of text.split("\n")) {
1093
1519
  const trimmed = line.trim();
1520
+ const status = parseStatusLine(line);
1521
+ if (status) {
1522
+ if (status.type === "STEP_START" && status.stepId) currentStepId = status.stepId;
1523
+ step(status.type, status.stepId, status.detail);
1524
+ continue;
1525
+ }
1094
1526
  if (trimmed.startsWith("ROUTE_STEP|")) {
1095
1527
  const routeStep = parseRouteStep(trimmed);
1096
1528
  if (routeStep) {
@@ -1098,56 +1530,51 @@ async function runTrace(featureName, specName, model) {
1098
1530
  if (routeStep.status === "FAILED") overallStatus = "failed";
1099
1531
  }
1100
1532
  } else if (trimmed.startsWith("AB_ACTION|snapshot|") || trimmed.startsWith("AB_ACTION|assert|")) {
1101
- const action = parseAbAction(trimmed);
1533
+ const action = withStepId(parseAbAction(trimmed));
1102
1534
  if (action) traceActions.push(action);
1103
1535
  }
1104
1536
  }
1105
1537
  }
1106
1538
  });
1107
1539
  if (isError) overallStatus = "failed";
1540
+ const validatedActions = validateAndReport(traceActions);
1108
1541
  const route = {
1109
1542
  specName,
1110
1543
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
1111
1544
  status: overallStatus,
1112
1545
  steps: routeSteps
1113
1546
  };
1114
- const [routePath, actionsPath] = await Promise.all([saveRoute(featureName, specName, route), saveTraceActions(featureName, specName, traceActions)]);
1547
+ const [routePath, actionsPath] = await Promise.all([saveRoute(featureName, specName, route), saveTraceActions(featureName, specName, validatedActions)]);
1115
1548
  blank();
1116
1549
  meta("route", routePath);
1117
1550
  meta("saved", actionsPath);
1118
- meta("actions", traceActions.length);
1551
+ meta("actions", validatedActions.length);
1119
1552
  meta("status", overallStatus.toUpperCase());
1553
+ const relatedPaths = relatedPathsBuffer !== null ? parseRelatedPathsBlock(relatedPathsBuffer) : null;
1554
+ if (relatedPaths !== null) {
1555
+ const written = await updateSpecRelatedPaths(featureName, specName, relatedPaths);
1556
+ if (written) meta("relatedPaths", `${relatedPaths.length} path(s) written to ${written}`);
1557
+ } else warn("trace did not emit a RELATED_PATHS block; drift --changed cannot scope this spec");
1120
1558
  hint(`run 'ccqa generate ${featureName}/${specName}' to generate a test script`);
1121
1559
  }
1122
1560
  /**
1123
- * Execute setup procedures by running their test.spec.ts via vitest with a fixed session name.
1124
- * Creates a temporary runner script that sets the session and imports each setup's test body.
1561
+ * Run the post-trace replay validation and emit user-visible drop reports.
1562
+ * Splitting this out keeps `runTrace` readable; the function is pure aside
1563
+ * from `log.*` and the agent-browser invocations inside `validateActions`.
1125
1564
  */
1126
- async function runSetups(setups, sessionName) {
1127
- for (const ref of setups) {
1128
- info(` setup: ${ref.name}`);
1129
- const scriptPath = join(getSetupDir(ref.name), "test.spec.ts");
1130
- let script = await readFile(scriptPath, "utf-8").catch(() => {
1131
- throw new Error(`Setup test script not found: ${scriptPath}. Run \`ccqa generate-setup ${ref.name}\` first.`);
1132
- });
1133
- for (const [key, value] of Object.entries(ref.params ?? {})) script = script.replaceAll(`{{${key}}}`, resolveEnvRefs(value));
1134
- script = script.replace(/process\.env\.AGENT_BROWSER_SESSION\s*\|?\|?=\s*`.+`;/, `process.env.AGENT_BROWSER_SESSION = ${JSON.stringify(sessionName)};`);
1135
- const tmpPath = join(getSetupDir(ref.name), `_run.spec.ts`);
1136
- await writeFile(tmpPath, script, "utf-8");
1137
- try {
1138
- const { exitCode, stdout, stderr } = await spawnVitestCaptured([
1139
- "run",
1140
- "--config",
1141
- bundledVitestConfigPath(),
1142
- tmpPath
1143
- ]);
1144
- process.stdout.write(stdout);
1145
- if (stderr) process.stderr.write(stderr);
1146
- if (exitCode !== 0) throw new Error(`Setup '${ref.name}' failed (exit ${exitCode})`);
1147
- } finally {
1148
- await unlink(tmpPath).catch(() => {});
1149
- }
1565
+ function validateAndReport(actions) {
1566
+ if (actions.length === 0) return actions;
1567
+ const sessionName = `${generateSessionName()}-validate`;
1568
+ blank();
1569
+ info("post-trace validation (replaying recorded actions)...");
1570
+ const { kept, dropped } = validateActions(actions, { sessionName });
1571
+ if (dropped.length === 0) {
1572
+ meta("validated", `${kept.length}/${actions.length} kept`);
1573
+ return kept;
1150
1574
  }
1575
+ for (const d of dropped) warn(`dropped action #${d.index + 1} (${d.action.command}${d.action.selector ? " " + d.action.selector : ""}): ${d.reason}`);
1576
+ meta("validated", `${kept.length}/${actions.length} kept (${dropped.length} dropped)`);
1577
+ return kept;
1151
1578
  }
1152
1579
  function parseStatusLine(text) {
1153
1580
  for (const line of text.split("\n")) {
@@ -1244,21 +1671,32 @@ function parseAbAction(line) {
1244
1671
  }
1245
1672
  //#endregion
1246
1673
  //#region src/codegen/actions-to-script.ts
1247
- function actionsToScript(actions, title, setupScripts) {
1674
+ function actionsToScript(input) {
1675
+ const { actions, testName, stepMarkers = [] } = input;
1248
1676
  const parts = [...[
1249
1677
  `import { test } from "vitest";`,
1250
1678
  `import { spawnSync } from "node:child_process";`,
1251
- `import { ab, abWait, abAssertTextVisible, abAssertVisible, abAssertNotVisible, abAssertUrl, abAssertEnabled, abAssertDisabled, abAssertChecked, abAssertUnchecked } from "ccqa/test-helpers";`,
1679
+ `import { ${[
1680
+ "ab",
1681
+ "abWait",
1682
+ "abAssertTextVisible",
1683
+ "abAssertVisible",
1684
+ "abAssertNotVisible",
1685
+ "abAssertUrl",
1686
+ "abAssertEnabled",
1687
+ "abAssertDisabled",
1688
+ "abAssertChecked",
1689
+ "abAssertUnchecked"
1690
+ ].join(", ")} } from "ccqa/test-helpers";`,
1252
1691
  "",
1253
- `// Single session shared across all tests — reset per run via cookies clear in first test.`,
1254
- `// Use ||= so an outer harness (e.g. ccqa generate's auto-fix loop) can pre-set the session`,
1255
- `// name and inspect the same session after the run finishes.`,
1692
+ `// Single session shared across the run. Use ||= so an outer harness`,
1693
+ `// (e.g. ccqa generate's auto-fix loop) can pre-set the session name`,
1694
+ `// and inspect the same session after the run finishes.`,
1256
1695
  `process.env.AGENT_BROWSER_SESSION ||= \`ccqa-run-\${Date.now()}\`;`,
1257
1696
  ""
1258
1697
  ]];
1259
- if (setupScripts?.length) for (const setup of setupScripts) parts.push(`test("setup: ${setup.name}", () => {`, setup.body, "}, 3 * 60 * 1000);", "");
1260
- const body = actionsToLines(actions).map((l) => ` ${l}`).join("\n");
1261
- parts.push(`test(${JSON.stringify(title)}, () => {`, body, "}, 5 * 60 * 1000);", "");
1698
+ const body = actionsToLines(actions, stepMarkers).map((l) => ` ${l}`).join("\n");
1699
+ parts.push(`test(${JSON.stringify(testName)}, () => {`, body, "}, 5 * 60 * 1000);", "");
1262
1700
  return parts.join("\n");
1263
1701
  }
1264
1702
  /** Commands that interact with page elements and need the page to be loaded */
@@ -1273,11 +1711,18 @@ const ELEMENT_COMMANDS = new Set([
1273
1711
  "hover",
1274
1712
  "drag"
1275
1713
  ]);
1276
- function actionsToLines(actions) {
1714
+ function actionsToLines(actions, stepMarkers) {
1277
1715
  const lines = [];
1278
1716
  let prevLine = null;
1279
1717
  let prevCommand = null;
1280
- for (const action of actions) {
1718
+ const markerByIndex = new Map(stepMarkers.map((m) => [m.actionIndex, m]));
1719
+ for (let i = 0; i < actions.length; i++) {
1720
+ const marker = markerByIndex.get(i);
1721
+ if (marker) {
1722
+ if (lines.length > 0) lines.push("");
1723
+ lines.push(`// step: ${marker.stepId} [${marker.source}]`);
1724
+ }
1725
+ const action = actions[i];
1281
1726
  const line = actionToLine(action);
1282
1727
  if (line === null) continue;
1283
1728
  if (line === prevLine) continue;
@@ -1296,16 +1741,16 @@ function actionToLine(action) {
1296
1741
  if ("selector" in action && isRefSelector(action.selector)) return null;
1297
1742
  switch (action.command) {
1298
1743
  case "cookies_clear": return `ab("cookies", "clear");`;
1299
- case "open": return `ab("open", ${j((action.value ?? "").replace(/^["']|["']$/g, ""))});`;
1744
+ case "open": return `ab("open", ${jExpr((action.value ?? "").replace(/^["']|["']$/g, ""))});`;
1300
1745
  case "snapshot": return action.observation ? `// ${action.observation}` : null;
1301
1746
  case "click": return `ab("click", ${j(action.selector)});`;
1302
1747
  case "dblclick": return `ab("dblclick", ${j(action.selector)});`;
1303
- case "fill": return `ab("fill", ${j(action.selector)}, ${j(action.value)});`;
1304
- case "type": return `ab("fill", ${j(action.selector)}, ${j(action.value)});`;
1748
+ case "fill": return `ab("fill", ${j(action.selector)}, ${jExpr(action.value)});`;
1749
+ case "type": return `ab("fill", ${j(action.selector)}, ${jExpr(action.value)});`;
1305
1750
  case "check": return `ab("check", ${j(action.selector)});`;
1306
1751
  case "uncheck": return `ab("uncheck", ${j(action.selector)});`;
1307
- case "press": return `ab("press", ${j(action.value)});`;
1308
- case "select": return `ab("select", ${j(action.selector)}, ${j(action.value)});`;
1752
+ case "press": return `ab("press", ${jExpr(action.value)});`;
1753
+ case "select": return `ab("select", ${j(action.selector)}, ${jExpr(action.value)});`;
1309
1754
  case "hover": return `ab("hover", ${j(action.selector)});`;
1310
1755
  case "scroll": return `ab("scroll", ${[action.direction ?? "down", ...action.pixels ? [action.pixels] : []].map(j).join(", ")});`;
1311
1756
  case "drag": return `ab("drag", ${j(action.selector)}, ${j(action.target)});`;
@@ -1321,10 +1766,10 @@ function actionToLine(action) {
1321
1766
  let assertLine = null;
1322
1767
  switch (action.assertType) {
1323
1768
  case "text_visible":
1324
- if (val) assertLine = `abAssertTextVisible(${j(val)});`;
1769
+ if (val) assertLine = `abAssertTextVisible(${jExpr(val)});`;
1325
1770
  break;
1326
1771
  case "text_not_visible":
1327
- if (val) assertLine = `abAssertNotVisible(${j("text=" + val)}, 180_000);`;
1772
+ if (val) assertLine = `abAssertNotVisible(${jExpr("text=" + val)}, 180_000);`;
1328
1773
  break;
1329
1774
  case "element_visible":
1330
1775
  if (sel) assertLine = `abAssertVisible(${j(sel)});`;
@@ -1333,7 +1778,7 @@ function actionToLine(action) {
1333
1778
  if (sel) assertLine = `abAssertNotVisible(${j(sel)});`;
1334
1779
  break;
1335
1780
  case "url_contains":
1336
- if (val) assertLine = `abAssertUrl(${j(val)});`;
1781
+ if (val) assertLine = `abAssertUrl(${jExpr(val)});`;
1337
1782
  break;
1338
1783
  case "element_enabled":
1339
1784
  if (sel && !sel.startsWith("text=") && !sel.startsWith("[aria-label=")) assertLine = `abAssertEnabled(${j(sel)});`;
@@ -1356,6 +1801,14 @@ function actionToLine(action) {
1356
1801
  }
1357
1802
  /** JSON.stringify — produces a quoted string literal safe for embedding in TS source. */
1358
1803
  const j = (s) => JSON.stringify(s);
1804
+ /**
1805
+ * Like `j`, but recognises `$VAR` / `${VAR}` env-ref forms in the value and
1806
+ * emits them as `${process.env.VAR ?? ""}` template-literal substitutions
1807
+ * instead of baking the literal `$VAR` string into the script. Used for
1808
+ * values that came from a spec or block param: form fills, opened URLs,
1809
+ * assertion texts/URLs.
1810
+ */
1811
+ const jExpr = (s) => envRefsToJsExpression(s);
1359
1812
  //#endregion
1360
1813
  //#region src/prompts/codegen.ts
1361
1814
  function buildCleanupPrompt(actions) {
@@ -1388,6 +1841,109 @@ ${actions.map((a, i) => {
1388
1841
  }).join("\n")}`;
1389
1842
  }
1390
1843
  //#endregion
1844
+ //#region src/codegen/cleanup.ts
1845
+ /**
1846
+ * Best-effort cleanup of a recorded action list. Hands the actions to
1847
+ * Claude with the cleanup prompt and parses the returned JSON array; on
1848
+ * any failure (Claude error, malformed JSON, empty array) falls back to
1849
+ * the original input so the caller can always proceed.
1850
+ *
1851
+ * Note: the prompt deliberately does not surface the `stepId` field.
1852
+ * Callers that need to preserve stepIds across cleanup (only `ccqa generate`
1853
+ * today) must re-attach them after this returns.
1854
+ */
1855
+ async function cleanupActions$1(actions, model) {
1856
+ try {
1857
+ const { result, isError } = await invokeClaudeStreaming({
1858
+ prompt: buildCleanupPrompt(actions),
1859
+ disableBuiltinTools: true,
1860
+ maxTurns: 1,
1861
+ model
1862
+ }, () => {});
1863
+ if (isError || !result) return actions;
1864
+ const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
1865
+ const parsed = JSON.parse(json);
1866
+ if (Array.isArray(parsed) && parsed.length > 0) return parsed;
1867
+ } catch {}
1868
+ return actions;
1869
+ }
1870
+ //#endregion
1871
+ //#region src/runtime/bundled-config.ts
1872
+ const CANDIDATES = [
1873
+ "../runtime/vitest.config.mjs",
1874
+ "./vitest.config.mjs",
1875
+ "./vitest.config.ts"
1876
+ ];
1877
+ function bundledVitestConfigPath() {
1878
+ for (const rel of CANDIDATES) {
1879
+ const candidate = fileURLToPath(new URL(rel, import.meta.url));
1880
+ try {
1881
+ accessSync(candidate);
1882
+ return candidate;
1883
+ } catch {}
1884
+ }
1885
+ return fileURLToPath(new URL("./vitest.config.ts", import.meta.url));
1886
+ }
1887
+ //#endregion
1888
+ //#region src/runtime/spawn-vitest.ts
1889
+ const require$1 = createRequire(import.meta.url);
1890
+ function resolveVitestBin() {
1891
+ const pkgPath = require$1.resolve("vitest/package.json");
1892
+ const pkg = require$1(pkgPath);
1893
+ const binRel = typeof pkg.bin === "string" ? pkg.bin : pkg.bin?.vitest;
1894
+ if (!binRel) throw new Error(`vitest package.json has no bin entry (resolved at ${pkgPath})`);
1895
+ return resolve(dirname(pkgPath), binRel);
1896
+ }
1897
+ async function spawnVitestTeed(args, opts = {}) {
1898
+ const child = spawnVitestChild(args, opts, "pipe");
1899
+ const [stdout, stderr, exitCode] = await Promise.all([
1900
+ teeDrain(child.stdout, process.stdout),
1901
+ teeDrain(child.stderr, process.stderr),
1902
+ waitExit(child)
1903
+ ]);
1904
+ return {
1905
+ exitCode,
1906
+ stdout,
1907
+ stderr
1908
+ };
1909
+ }
1910
+ function spawnVitestStreaming(args, opts = {}) {
1911
+ const child = spawnVitestChild(args, opts, "pipe");
1912
+ return {
1913
+ child,
1914
+ stdout: child.stdout,
1915
+ stderr: child.stderr,
1916
+ exited: waitExit(child)
1917
+ };
1918
+ }
1919
+ function spawnVitestChild(args, opts, stdio) {
1920
+ const vitestBin = resolveVitestBin();
1921
+ return spawn(process.execPath, [vitestBin, ...args], {
1922
+ cwd: opts.cwd,
1923
+ env: opts.env ?? process.env,
1924
+ stdio: [
1925
+ "ignore",
1926
+ stdio,
1927
+ stdio
1928
+ ]
1929
+ });
1930
+ }
1931
+ async function teeDrain(stream, sink) {
1932
+ stream.setEncoding("utf8");
1933
+ let buf = "";
1934
+ for await (const chunk of stream) {
1935
+ buf += chunk;
1936
+ sink.write(chunk);
1937
+ }
1938
+ return buf;
1939
+ }
1940
+ function waitExit(child) {
1941
+ return new Promise((resolvePromise, rejectPromise) => {
1942
+ child.once("exit", (code) => resolvePromise(code ?? 0));
1943
+ child.once("error", rejectPromise);
1944
+ });
1945
+ }
1946
+ //#endregion
1391
1947
  //#region src/diagnose/apply.ts
1392
1948
  function applyDiagnosis(script, diagnosis) {
1393
1949
  switch (diagnosis.type) {
@@ -1438,6 +1994,7 @@ function applyTiming(script, fixes) {
1438
1994
  summary: summary.join("; ")
1439
1995
  };
1440
1996
  }
1997
+ const REMOVABLE_ASSERT_RE = /\b(?:abAssert\w*|abWait)\b/;
1441
1998
  function applyOverAssertion(script, lineNumbers) {
1442
1999
  if (lineNumbers.length === 0) return {
1443
2000
  applied: false,
@@ -1450,13 +2007,13 @@ function applyOverAssertion(script, lineNumbers) {
1450
2007
  const idx = line - 1;
1451
2008
  if (idx < 0 || idx >= lines.length) continue;
1452
2009
  const content = lines[idx];
1453
- if (!/abAssert/.test(content)) continue;
2010
+ if (!REMOVABLE_ASSERT_RE.test(content)) continue;
1454
2011
  removed.push(`line ${line}: ${content.trim()}`);
1455
2012
  lines.splice(idx, 1);
1456
2013
  }
1457
2014
  if (removed.length === 0) return {
1458
2015
  applied: false,
1459
- reason: "no abAssert lines matched the proposed line numbers"
2016
+ reason: "no abAssert/abWait lines matched the proposed line numbers"
1460
2017
  };
1461
2018
  return {
1462
2019
  applied: true,
@@ -1502,7 +2059,7 @@ function previewDiff(before, after) {
1502
2059
  //#endregion
1503
2060
  //#region src/diagnose/prompt.ts
1504
2061
  function buildDiagnosePrompt(input) {
1505
- const { script, specMarkdown, actions, failureLog, pageSnapshot, outputLanguage = "en" } = input;
2062
+ const { script, specYaml, actions, failureLog, pageSnapshot, outputLanguage = "en" } = input;
1506
2063
  const numbered = script.split("\n").map((l, i) => `${i + 1}: ${l}`).join("\n");
1507
2064
  return `You are diagnosing a failing E2E test. The test was generated from a recorded trace of the original interaction. Compare the failing run against the original spec and recorded actions to determine WHY the test failed and what the right fix is.
1508
2065
 
@@ -1593,11 +2150,11 @@ Pick exactly ONE category. The output JSON must follow the shape for that catego
1593
2150
  - Your **final** assistant message must start with \`{\` and end with \`}\` — a single JSON object, nothing before or after. No prose preamble like "Confirmed: ...", no markdown fences, no commentary, no tool calls in the same turn. If you have an analysis sentence, put it in the \`reasoning\` field.
1594
2151
  - Line numbers refer to the numbered test script below (1-based).
1595
2152
  - For SELECTOR_DRIFT, \`oldSelector\` must match a substring of the script at that line; \`newSelector\` must be backed by a concrete file:line you read with Grep/Read (do not invent). Cite the evidence in \`reasoning\`.
1596
- - For OVER_ASSERTION, only include lines that contain assert calls (\`abAssert*\`).
1597
- - Cross-check assertions against the spec markdown. If the spec doesn't require the assertion, OVER_ASSERTION is the better diagnosis than SELECTOR_DRIFT.
2153
+ - For OVER_ASSERTION, only include lines that contain assert calls (\`abAssert*\`) or existence-checking waits (\`abWait\`); a recorded \`abWait("[selector]")\` is an implicit existence assertion and a valid removal candidate when the spec never required that element to be present.
2154
+ - Cross-check assertions against the spec YAML. If the spec doesn't require the assertion, OVER_ASSERTION is the better diagnosis than SELECTOR_DRIFT.
1598
2155
 
1599
- ## Test Spec (test-spec.md)
1600
- ${specMarkdown}
2156
+ ## Test Spec (spec.yaml)
2157
+ ${specYaml}
1601
2158
 
1602
2159
  ## Recorded Actions (actions.json summary)
1603
2160
  ${actions.map((a, i) => {
@@ -1808,8 +2365,7 @@ function normaliseSleepFixes(raw) {
1808
2365
  const line = typeof item["line"] === "number" ? item["line"] : null;
1809
2366
  if (line === null) continue;
1810
2367
  const reason = typeof item["reason"] === "string" ? item["reason"] : "";
1811
- const kind = item["kind"];
1812
- if (kind === "insert" || typeof item["seconds"] === "number" && item["increase_to"] === void 0) {
2368
+ if (item["kind"] === "insert") {
1813
2369
  const seconds = typeof item["seconds"] === "number" ? item["seconds"] : null;
1814
2370
  if (seconds === null) continue;
1815
2371
  out.push({
@@ -1818,9 +2374,7 @@ function normaliseSleepFixes(raw) {
1818
2374
  seconds,
1819
2375
  reason
1820
2376
  });
1821
- continue;
1822
- }
1823
- if (kind === "increase" || typeof item["increase_to"] === "number") {
2377
+ } else if (item["kind"] === "increase") {
1824
2378
  const increaseTo = typeof item["increase_to"] === "number" ? item["increase_to"] : null;
1825
2379
  if (increaseTo === null) continue;
1826
2380
  out.push({
@@ -1829,7 +2383,6 @@ function normaliseSleepFixes(raw) {
1829
2383
  increase_to: increaseTo,
1830
2384
  reason
1831
2385
  });
1832
- continue;
1833
2386
  }
1834
2387
  }
1835
2388
  return out;
@@ -2014,7 +2567,7 @@ const DEFAULT_CONFIDENCE_THRESHOLD = .8;
2014
2567
  * or the diagnose loop chose to bail out early.
2015
2568
  */
2016
2569
  async function runAutoFixLoop(input) {
2017
- const { scriptPath, initialRun, specMarkdown, actions, maxRetries, mode, runVitest, agentBrowserSession, outputLanguage, model } = input;
2570
+ const { scriptPath, initialRun, specYaml, actions, maxRetries, mode, runVitest, agentBrowserSession, outputLanguage, model } = input;
2018
2571
  let { exitCode, output, currentScript } = initialRun;
2019
2572
  if (exitCode === 0) return true;
2020
2573
  for (let attempt = 1; attempt <= maxRetries; attempt++) {
@@ -2025,7 +2578,7 @@ async function runAutoFixLoop(input) {
2025
2578
  else fix("page snapshot unavailable; continuing without it");
2026
2579
  const fixed = await diagnoseAndFix({
2027
2580
  script: currentScript,
2028
- specMarkdown,
2581
+ specYaml,
2029
2582
  actions,
2030
2583
  failureLog: output,
2031
2584
  pageSnapshot: pageSnapshot ?? void 0,
@@ -2046,10 +2599,10 @@ async function runAutoFixLoop(input) {
2046
2599
  return false;
2047
2600
  }
2048
2601
  async function diagnoseAndFix(input) {
2049
- const { script, specMarkdown, actions, failureLog, pageSnapshot, mode, outputLanguage, model } = input;
2602
+ const { script, specYaml, actions, failureLog, pageSnapshot, mode, outputLanguage, model } = input;
2050
2603
  const outcome = await timedPhase("diagnose", () => diagnose({
2051
2604
  script,
2052
- specMarkdown,
2605
+ specYaml,
2053
2606
  actions,
2054
2607
  failureLog,
2055
2608
  pageSnapshot,
@@ -2084,7 +2637,7 @@ async function diagnoseAndFix(input) {
2084
2637
  return apply.script;
2085
2638
  }
2086
2639
  if (decision === "skip-low-confidence") {
2087
- fix(`confidence ${result.confidence.toFixed(2)} below threshold ${DEFAULT_CONFIDENCE_THRESHOLD}; skipping (--no-interactive)`);
2640
+ fix(`confidence ${result.confidence.toFixed(2)} below threshold ${DEFAULT_CONFIDENCE_THRESHOLD}; skipping (mode: ${mode})`);
2088
2641
  handoffToUser(result, outcome.raw, outputLanguage);
2089
2642
  return null;
2090
2643
  }
@@ -2108,10 +2661,15 @@ async function diagnoseAndFix(input) {
2108
2661
  process.exit(1);
2109
2662
  }
2110
2663
  }
2664
+ /**
2665
+ * Map a diagnosis to one of three actions. `auto` previously bypassed the
2666
+ * confidence threshold; it no longer does — a low-confidence guess can
2667
+ * corrupt working code, and CI wants "apply obvious fixes, fail loudly on
2668
+ * the rest" rather than "apply every guess".
2669
+ */
2111
2670
  function decide(result, mode) {
2112
- if (mode === "auto") return "apply-auto";
2113
2671
  const highConfidence = result.confidence >= DEFAULT_CONFIDENCE_THRESHOLD;
2114
- if (mode === "non-interactive") return highConfidence ? "apply-auto" : "skip-low-confidence";
2672
+ if (mode === "auto" || mode === "non-interactive") return highConfidence ? "apply-auto" : "skip-low-confidence";
2115
2673
  return highConfidence ? "apply-auto" : "interactive";
2116
2674
  }
2117
2675
  function reportDiagnosis(result) {
@@ -2148,27 +2706,27 @@ function handoffMessage(diagnosis, language) {
2148
2706
  }
2149
2707
  function handoffEn(diagnosis) {
2150
2708
  switch (diagnosis.type) {
2151
- case "DATA_MISSING": return [`application-side issue: required data is missing. ${diagnosis.reason}`, "next step: seed the data (or update test-spec.md prerequisites), then re-run trace + generate."];
2709
+ case "DATA_MISSING": return [`application-side issue: required data is missing. ${diagnosis.reason}`, "next step: seed the data (or update spec.yaml prerequisites), then re-run trace + generate."];
2152
2710
  case "UNKNOWN": return [`could not classify the failure. ${diagnosis.reason}`, "next step: read the failure log above, decide whether the test or the app is wrong, and fix manually. consider re-running ccqa trace if the recorded flow no longer matches the live app."];
2153
2711
  case "SELECTOR_DRIFT": return [
2154
2712
  `selector likely drifted but auto-apply was not safe.`,
2155
2713
  `proposed: line ${diagnosis.line}: "${diagnosis.oldSelector}" → "${diagnosis.newSelector}" (${diagnosis.reason}).`,
2156
2714
  "next step: confirm in the live app and either accept the proposal manually, or re-run ccqa trace to recapture the new selector."
2157
2715
  ];
2158
- case "OVER_ASSERTION": return [`assertion may not be required by the spec. lines: ${diagnosis.lines.join(", ")} (${diagnosis.reason}).`, "next step: cross-check test-spec.md. either delete the assertion from the test, or tighten the spec to require it."];
2716
+ case "OVER_ASSERTION": return [`assertion may not be required by the spec. lines: ${diagnosis.lines.join(", ")} (${diagnosis.reason}).`, "next step: cross-check spec.yaml. either delete the assertion from the test, or tighten the spec to require it."];
2159
2717
  case "TIMING_ISSUE": return [`timing fix proposed but couldn't be applied automatically.`, "next step: insert a sleep manually before the failing line, or re-run with a higher confidence trace."];
2160
2718
  }
2161
2719
  }
2162
2720
  function handoffJa(diagnosis) {
2163
2721
  switch (diagnosis.type) {
2164
- case "DATA_MISSING": return [`アプリ側の問題: 必要なデータが不足しています。${diagnosis.reason}`, "次のステップ: データを seed する(または test-spec.md の prerequisites を更新)してから ccqa trace + generate をやり直してください。"];
2722
+ case "DATA_MISSING": return [`アプリ側の問題: 必要なデータが不足しています。${diagnosis.reason}`, "次のステップ: データを seed する(または spec.yaml の prerequisites を更新)してから ccqa trace + generate をやり直してください。"];
2165
2723
  case "UNKNOWN": return [`失敗を分類できませんでした。${diagnosis.reason}`, "次のステップ: 上の失敗ログを確認し、テストとアプリのどちらが原因か判断して手動で修正してください。記録した手順がアプリの現状と合わない場合は ccqa trace の再実行を検討してください。"];
2166
2724
  case "SELECTOR_DRIFT": return [
2167
2725
  "selector が変わった可能性が高いですが、自動適用は安全でないと判断しました。",
2168
2726
  `提案: 行 ${diagnosis.line}: "${diagnosis.oldSelector}" → "${diagnosis.newSelector}" (${diagnosis.reason})`,
2169
2727
  "次のステップ: アプリで新 selector を確認し、手動で適用するか ccqa trace をやり直して新しい selector を取り直してください。"
2170
2728
  ];
2171
- case "OVER_ASSERTION": return [`spec が要求していない assertion の可能性があります。対象行: ${diagnosis.lines.join(", ")} (${diagnosis.reason})`, "次のステップ: test-spec.md と照合して、テスト側の assertion を削るか、spec 側を更新してください。"];
2729
+ case "OVER_ASSERTION": return [`spec が要求していない assertion の可能性があります。対象行: ${diagnosis.lines.join(", ")} (${diagnosis.reason})`, "次のステップ: spec.yaml と照合して、テスト側の assertion を削るか、spec 側を更新してください。"];
2172
2730
  case "TIMING_ISSUE": return ["timing 関連の修正案は出ましたが、自動適用できませんでした。", "次のステップ: 失敗行の前に手動で sleep を入れるか、より信頼度の高い trace を取り直してください。"];
2173
2731
  }
2174
2732
  }
@@ -2204,18 +2762,24 @@ async function runGenerate(featureName, specName, maxRetries, mode, force, useSn
2204
2762
  meta("actions", actions.length);
2205
2763
  const specContent = await readSpecFile(featureName, specName);
2206
2764
  const spec = parseTestSpec(specContent);
2207
- const setupScripts = await loadSetupScripts(spec.setups);
2208
- if (setupScripts.length > 0) meta("setups", setupScripts.map((s) => s.name).join(", "));
2765
+ const expanded = expandSpec(spec, { blocks: await loadAllBlocks() });
2766
+ await warnStaleBlockArtifacts();
2767
+ meta("steps", expanded.length);
2209
2768
  meta("fix-mode", mode);
2210
2769
  meta("language", outputLanguage);
2211
2770
  blank();
2212
- const cleanedActions = await cleanupActions$1(actions, model);
2771
+ const cleanedActions = await cleanupActions(actions, model);
2213
2772
  if (cleanedActions.length !== actions.length) meta("cleaned", cleanedActions.length);
2214
- const scriptPath = await saveTestScript(featureName, specName, actionsToScript(cleanedActions, spec.title, setupScripts.length > 0 ? setupScripts : void 0));
2773
+ const markers = buildStepMarkers(expanded, cleanedActions);
2774
+ const scriptPath = await saveTestScript(featureName, specName, actionsToScript({
2775
+ actions: cleanedActions,
2776
+ testName: spec.title,
2777
+ stepMarkers: markers
2778
+ }));
2215
2779
  meta("saved", scriptPath);
2216
2780
  blank();
2217
2781
  const agentBrowserSession = useSnapshot ? `ccqa-generate-${Date.now()}` : void 0;
2218
- const runVitestForSession = (path) => runVitest$1(path, agentBrowserSession);
2782
+ const runVitestForSession = (path) => runVitest(path, agentBrowserSession);
2219
2783
  let signalHandler = null;
2220
2784
  if (agentBrowserSession) {
2221
2785
  await closeSession(agentBrowserSession);
@@ -2234,7 +2798,7 @@ async function runGenerate(featureName, specName, maxRetries, mode, force, useSn
2234
2798
  if (await runAutoFixLoop({
2235
2799
  scriptPath,
2236
2800
  initialRun,
2237
- specMarkdown: specContent,
2801
+ specYaml: specContent,
2238
2802
  actions: cleanedActions,
2239
2803
  maxRetries,
2240
2804
  mode,
@@ -2256,6 +2820,30 @@ async function runGenerate(featureName, specName, maxRetries, mode, force, useSn
2256
2820
  if (agentBrowserSession) await closeSession(agentBrowserSession);
2257
2821
  }
2258
2822
  }
2823
+ /**
2824
+ * Build the per-step markers consumed by `actionsToScript`. Each action's
2825
+ * `stepId` (assigned at trace time from the last `STEP_START|...` line)
2826
+ * groups contiguous actions; we emit one marker at the first action of
2827
+ * each contiguous run. Unknown step ids are skipped rather than mis-labelled.
2828
+ */
2829
+ function buildStepMarkers(steps, actions) {
2830
+ const stepById = new Map(steps.map((s) => [s.id, s]));
2831
+ const markers = [];
2832
+ let lastEmittedStepId = null;
2833
+ for (let i = 0; i < actions.length; i++) {
2834
+ const id = actions[i].stepId;
2835
+ if (!id || id === lastEmittedStepId) continue;
2836
+ const step = stepById.get(id);
2837
+ if (!step) continue;
2838
+ markers.push({
2839
+ actionIndex: i,
2840
+ stepId: step.id,
2841
+ source: step.source
2842
+ });
2843
+ lastEmittedStepId = id;
2844
+ }
2845
+ return markers;
2846
+ }
2259
2847
  async function confirmOverwrite(path) {
2260
2848
  if (!process.stdin.isTTY) {
2261
2849
  warn(`${path} exists and stdin is not a TTY; refusing to overwrite. Pass --force to allow.`);
@@ -2275,67 +2863,7 @@ async function confirmOverwrite(path) {
2275
2863
  rl.close();
2276
2864
  }
2277
2865
  }
2278
- async function loadSetupScripts(setups) {
2279
- if (!setups?.length) return [];
2280
- const result = [];
2281
- for (const ref of setups) {
2282
- const scriptPath = join(getSetupDir(ref.name), "test.spec.ts");
2283
- const resolved = replacePlaceholders(extractTestBody(await readFile(scriptPath, "utf-8").catch(() => {
2284
- throw new Error(`Setup test script not found: ${scriptPath}. Run \`ccqa generate-setup ${ref.name}\` first.`);
2285
- })), ref.params ?? {});
2286
- result.push({
2287
- name: ref.name,
2288
- body: resolved
2289
- });
2290
- }
2291
- return result;
2292
- }
2293
- /**
2294
- * Extract the test body (statements inside the test callback) from a setup
2295
- * test script.
2296
- *
2297
- * Locates the first arrow callback (`=> {`) after a top-level `test(` call
2298
- * and returns the text between the matching `{` and `}`. Handles both
2299
- * single-line and multi-line `test(...)` formatting (the latter is what
2300
- * prettier produces).
2301
- *
2302
- * Brace tracking is naive (string/regex/comment literals are not parsed
2303
- * specially), but setup test scripts are themselves generated by ccqa and
2304
- * follow a fixed shape, so this is sufficient in practice.
2305
- */
2306
- function extractTestBody(script) {
2307
- const testCallMatch = /\btest\s*\(/.exec(script);
2308
- if (!testCallMatch) return "";
2309
- const arrowIdx = script.indexOf("=> {", testCallMatch.index);
2310
- if (arrowIdx === -1) return "";
2311
- const bodyStart = arrowIdx + 4;
2312
- let depth = 1;
2313
- let i = bodyStart;
2314
- for (; i < script.length; i++) {
2315
- const ch = script[i];
2316
- if (ch === "{") depth++;
2317
- else if (ch === "}") {
2318
- depth--;
2319
- if (depth === 0) break;
2320
- }
2321
- }
2322
- if (depth !== 0) return "";
2323
- return script.slice(bodyStart, i).replace(/^\n/, "").replace(/\n\s*$/, "");
2324
- }
2325
- function replacePlaceholders(body, params) {
2326
- let result = body;
2327
- for (const [key, value] of Object.entries(params)) if (hasEnvRef(value)) {
2328
- const expr = envRefsToJsExpression(value);
2329
- const re = new RegExp(`(["'])\\{\\{${escapeRegExp(key)}\\}\\}\\1`, "g");
2330
- result = result.replace(re, expr);
2331
- result = result.replaceAll(`{{${key}}}`, value);
2332
- } else result = result.replaceAll(`{{${key}}}`, value);
2333
- return result;
2334
- }
2335
- function escapeRegExp(s) {
2336
- return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
2337
- }
2338
- async function runVitest$1(scriptPath, agentBrowserSession) {
2866
+ async function runVitest(scriptPath, agentBrowserSession) {
2339
2867
  const { exitCode, stdout, stderr } = await spawnVitestTeed([
2340
2868
  "run",
2341
2869
  "--config",
@@ -2352,557 +2880,159 @@ async function runVitest$1(scriptPath, agentBrowserSession) {
2352
2880
  currentScript
2353
2881
  };
2354
2882
  }
2355
- async function cleanupActions$1(actions, model) {
2356
- try {
2357
- const { result, isError } = await invokeClaudeStreaming({
2358
- prompt: buildCleanupPrompt(actions),
2359
- disableBuiltinTools: true,
2360
- maxTurns: 1,
2361
- model
2362
- }, () => {});
2363
- if (isError || !result) return actions;
2364
- const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
2365
- const parsed = JSON.parse(json);
2366
- if (Array.isArray(parsed) && parsed.length > 0) return parsed;
2367
- } catch {}
2368
- return actions;
2369
- }
2370
- //#endregion
2371
- //#region src/cli/run.ts
2372
- const USER_VITEST_CONFIG = resolve(".ccqa/vitest.config.ts");
2373
- async function resolveVitestConfig() {
2374
- try {
2375
- await access(USER_VITEST_CONFIG);
2376
- return USER_VITEST_CONFIG;
2377
- } catch {
2378
- return bundledVitestConfigPath();
2379
- }
2883
+ async function cleanupActions(actions, model) {
2884
+ const cleaned = await cleanupActions$1(actions, model);
2885
+ return cleaned === actions ? actions : reattachStepIds(cleaned, actions);
2380
2886
  }
2381
- const runCommand = new Command("run").argument("[target]", "Spec to run: '<feature>/<spec>', '<feature>', or omit for all").description("Run generated agent-browser test scripts").action(async (target) => {
2382
- await runTests(target);
2383
- });
2384
- async function runTests(target) {
2385
- header("run", target);
2386
- const specs = await resolveSpecs(target);
2387
- if (specs.length === 0) {
2388
- error("no test scripts found");
2389
- hint("run 'ccqa generate <feature>/<spec>' first to generate tests");
2390
- process.exit(1);
2391
- }
2392
- const tmpDir = await mkdtemp(join(tmpdir(), "ccqa-run-"));
2393
- const summaries = [];
2394
- let overallExitCode = 0;
2395
- const vitestConfig = await resolveVitestConfig();
2396
- try {
2397
- for (let i = 0; i < specs.length; i++) {
2398
- const { featureName, specName } = specs[i];
2399
- const scriptFile = await getTestScript(featureName, specName);
2400
- if (!scriptFile) {
2401
- warn(`${featureName}/${specName}: no test.spec.ts found`);
2402
- continue;
2403
- }
2404
- run(`${featureName}/${specName}`);
2405
- meta("test", scriptFile);
2406
- blank();
2407
- const reportFile = join(tmpDir, `report-${i}.json`);
2408
- const proc = spawnVitestStreaming([
2409
- "run",
2410
- "--config",
2411
- vitestConfig,
2412
- scriptFile,
2413
- "--reporter=json",
2414
- `--outputFile.json=${reportFile}`
2415
- ]);
2416
- await Promise.all([streamFiltered(proc.stdout, process.stdout), streamFiltered(proc.stderr, process.stderr)]);
2417
- const exitCode = await proc.exited;
2418
- if (exitCode !== 0) overallExitCode = exitCode;
2419
- const report = await readReport(reportFile);
2420
- summaries.push({
2421
- featureName,
2422
- specName,
2423
- scriptFile,
2424
- report,
2425
- exitCode
2426
- });
2427
- blank();
2887
+ /**
2888
+ * The Claude cleanup pass returns a pruned array without the `stepId` field
2889
+ * (the prompt deliberately doesn't expose it — that would make the prompt
2890
+ * easier to misformat). Re-attach stepIds here by replaying the cleaned
2891
+ * stream against the original and matching the next compatible action.
2892
+ *
2893
+ * Algorithm: walk both arrays in lockstep. For each cleaned action, scan
2894
+ * forward in `original` (from the last-matched cursor) for the next entry
2895
+ * with the same `command` + `selector` + `value` + `assertType` shape, and
2896
+ * borrow its `stepId`. Cleaned actions Claude invented from thin air (rare,
2897
+ * and explicitly forbidden by the prompt) end up with no stepId — codegen
2898
+ * just won't emit a step marker for that index, which is the same outcome
2899
+ * as a wholly stepId-less actions.json.
2900
+ *
2901
+ * The matching is forward-only so that if cleanup keeps two identical fills
2902
+ * (e.g. typing the same value twice intentionally), they're paired to the
2903
+ * first and second occurrence in the original not both to the first.
2904
+ */
2905
+ function reattachStepIds(cleaned, original) {
2906
+ let cursor = 0;
2907
+ const out = [];
2908
+ for (const c of cleaned) {
2909
+ let matched = null;
2910
+ for (let i = cursor; i < original.length; i++) if (sameShape(c, original[i])) {
2911
+ matched = original[i];
2912
+ cursor = i + 1;
2913
+ break;
2428
2914
  }
2429
- printSummary(summaries);
2430
- } finally {
2431
- await rm(tmpDir, {
2432
- recursive: true,
2433
- force: true
2915
+ if (matched?.stepId) out.push({
2916
+ ...c,
2917
+ stepId: matched.stepId
2434
2918
  });
2919
+ else out.push(c);
2435
2920
  }
2436
- process.exit(overallExitCode);
2437
- }
2438
- async function readReport(path) {
2439
- try {
2440
- const raw = await readFile(path, "utf8");
2441
- return JSON.parse(raw);
2442
- } catch {
2443
- return null;
2444
- }
2921
+ return out;
2445
2922
  }
2446
- const useColor = process.stdout.isTTY && process.env.NO_COLOR == null;
2447
- const C = {
2448
- reset: useColor ? "\x1B[0m" : "",
2449
- bold: useColor ? "\x1B[1m" : "",
2450
- dim: useColor ? "\x1B[2m" : "",
2451
- green: useColor ? "\x1B[32m" : "",
2452
- red: useColor ? "\x1B[31m" : "",
2453
- yellow: useColor ? "\x1B[33m" : "",
2454
- cyan: useColor ? "\x1B[36m" : "",
2455
- gray: useColor ? "\x1B[90m" : ""
2456
- };
2457
- function printSummary(summaries) {
2458
- process.stdout.write(`\n${C.cyan}${C.bold}──────── ccqa summary ────────${C.reset}\n\n`);
2459
- let totalTests = 0;
2460
- let totalPassed = 0;
2461
- let totalFailed = 0;
2462
- let totalSkipped = 0;
2463
- for (const s of summaries) {
2464
- const header = `${C.bold}${s.featureName}/${s.specName}${C.reset}`;
2465
- if (!s.report) {
2466
- const icon = s.exitCode === 0 ? `${C.green}✔${C.reset}` : `${C.red}✖${C.reset}`;
2467
- process.stdout.write(`${icon} ${header} ${C.dim}(no report)${C.reset}\n`);
2468
- continue;
2469
- }
2470
- totalTests += s.report.numTotalTests;
2471
- totalPassed += s.report.numPassedTests;
2472
- totalFailed += s.report.numFailedTests;
2473
- totalSkipped += s.report.numPendingTests;
2474
- const ok = s.report.success;
2475
- const icon = ok ? `${C.green}✔${C.reset}` : `${C.red}✖${C.reset}`;
2476
- const countColor = ok ? C.green : C.red;
2477
- process.stdout.write(`${icon} ${header} ${countColor}${s.report.numPassedTests}/${s.report.numTotalTests}${C.reset} ${C.dim}passed${C.reset}\n`);
2478
- for (const file of s.report.testResults) for (const a of file.assertionResults) {
2479
- const aIcon = assertionIcon(a.status);
2480
- const dur = a.duration != null ? ` ${C.gray}${formatDuration(a.duration)}${C.reset}` : "";
2481
- process.stdout.write(` ${aIcon} ${a.fullName}${dur}\n`);
2482
- if (a.status === "failed" && a.failureMessages?.length) for (const msg of a.failureMessages) {
2483
- const firstLine = msg.split("\n")[0] ?? msg;
2484
- process.stdout.write(` ${C.red}${firstLine}${C.reset}\n`);
2485
- }
2486
- }
2487
- }
2488
- const specsPassed = summaries.filter((s) => s.exitCode === 0).length;
2489
- const specsFailed = summaries.filter((s) => s.exitCode !== 0).length;
2490
- process.stdout.write("\n");
2491
- process.stdout.write(` ${C.bold}Specs${C.reset} ${summaries.length} (${C.green}${specsPassed} passed${C.reset}, ${specsFailed > 0 ? C.red : C.dim}${specsFailed} failed${C.reset})\n`);
2492
- process.stdout.write(` ${C.bold}Tests${C.reset} ${totalTests} (${C.green}${totalPassed} passed${C.reset}, ${totalFailed > 0 ? C.red : C.dim}${totalFailed} failed${C.reset}, ${C.yellow}${totalSkipped} skipped${C.reset})\n`);
2493
- process.stdout.write("\n");
2923
+ function sameShape(a, b) {
2924
+ return a.command === b.command && (a.selector ?? "") === (b.selector ?? "") && (a.value ?? "") === (b.value ?? "") && (a.assertType ?? "") === (b.assertType ?? "");
2494
2925
  }
2495
- function assertionIcon(status) {
2496
- switch (status) {
2497
- case "passed": return `${C.green}✔${C.reset}`;
2498
- case "failed": return `${C.red}✖${C.reset}`;
2499
- case "skipped":
2500
- case "pending":
2501
- case "todo": return `${C.yellow}◌${C.reset}`;
2502
- }
2926
+ //#endregion
2927
+ //#region src/claude/extract-json.ts
2928
+ /**
2929
+ * Pulls a JSON object out of a Claude completion. Accepts either a fenced
2930
+ * ```json block or a bare `{...}` payload that constitutes the whole reply.
2931
+ * Returns null when neither shape is present.
2932
+ */
2933
+ function extractJsonBlock(text) {
2934
+ const fenced = text.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
2935
+ if (fenced && fenced[1]) return fenced[1].trim();
2936
+ const trimmed = text.trim();
2937
+ if (trimmed.startsWith("{") && trimmed.endsWith("}")) return trimmed;
2938
+ return null;
2503
2939
  }
2504
- function formatDuration(ms) {
2505
- if (ms < 1e3) return `${Math.round(ms)}ms`;
2506
- return `${(ms / 1e3).toFixed(2)}s`;
2940
+ //#endregion
2941
+ //#region src/prompts/draft.ts
2942
+ function buildNamingSystemPrompt() {
2943
+ return `You name a new ccqa test case based on the user's intent and the existing feature tree.
2944
+
2945
+ ccqa test cases live under \`.ccqa/features/<featureName>/test-cases/<specName>/spec.yaml\`.
2946
+
2947
+ ## Naming rules
2948
+
2949
+ - featureName and specName are kebab-case ASCII (lowercase, words separated by '-').
2950
+ - featureName: a broad area (e.g. "tasks", "auth", "billing", "search").
2951
+ - specName: a short scenario name (e.g. "create-and-complete", "login-with-email", "search-by-tag").
2952
+ - Reuse existing featureName when the user's intent fits an existing area. Only invent a new featureName when the existing tree clearly does not cover the area.
2953
+ - specName must NOT collide with an existing spec under the chosen feature. If the natural name collides, pick a different one that distinguishes the new scenario from the existing ones.
2954
+ - Use the codebase (Read/Grep/Glob) sparingly to confirm domain vocabulary if helpful. Do not over-explore.
2955
+
2956
+ ## Output (STRICT)
2957
+
2958
+ Output ONE fenced \`\`\`json block, nothing else outside it:
2959
+
2960
+ {
2961
+ "featureName": "<kebab-case>",
2962
+ "specName": "<kebab-case>",
2963
+ "reason": "<one short sentence: why this name and how it relates to existing specs>"
2507
2964
  }
2508
- const NOISE_LINE_PATTERNS = [/^JSON report written to /];
2509
- async function streamFiltered(source, sink) {
2510
- source.setEncoding("utf8");
2511
- let buffer = "";
2512
- for await (const chunk of source) {
2513
- buffer += chunk;
2514
- let nl = buffer.indexOf("\n");
2515
- while (nl !== -1) {
2516
- const line = buffer.slice(0, nl);
2517
- buffer = buffer.slice(nl + 1);
2518
- if (!NOISE_LINE_PATTERNS.some((p) => p.test(line))) sink.write(line + "\n");
2519
- nl = buffer.indexOf("\n");
2520
- }
2521
- }
2522
- if (buffer.length > 0 && !NOISE_LINE_PATTERNS.some((p) => p.test(buffer))) sink.write(buffer);
2965
+ `;
2523
2966
  }
2524
- async function resolveSpecs(target) {
2525
- if (!target) return listAllSpecs();
2526
- if (target.includes("/")) {
2527
- const { featureName, specName } = parseSpecPath(target);
2528
- return [{
2529
- featureName,
2530
- specName
2531
- }];
2532
- }
2533
- return (await listSpecsForFeature(target)).map((specName) => ({
2534
- featureName: target,
2535
- specName
2536
- }));
2967
+ function buildNamingPrompt(intent, tree) {
2968
+ return `## User intent
2969
+
2970
+ ${intent}
2971
+
2972
+ ## Existing feature tree
2973
+
2974
+ ${tree.length === 0 ? "(no existing features yet)" : tree.map((f) => {
2975
+ const specLines = f.specs.length === 0 ? " (no specs yet)" : f.specs.map((s) => ` - ${s.specName}`).join("\n");
2976
+ return `- ${f.featureName}/\n${specLines}`;
2977
+ }).join("\n")}
2978
+
2979
+ ## Task
2980
+
2981
+ Pick featureName and specName for the new test case. Follow the naming rules. Avoid colliding with any existing specName under the chosen feature.
2982
+ `;
2537
2983
  }
2538
- //#endregion
2539
- //#region src/cli/trace-setup.ts
2540
- const traceSetupCommand = new Command("trace-setup").argument("<name>", "Setup name to trace (e.g. login)").description("Trace a setup procedure using dummy placeholder values").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").action(async (name, opts) => {
2541
- await runTraceSetup(name, opts.model);
2542
- });
2543
- async function runTraceSetup(name, model) {
2544
- header("trace-setup", name);
2545
- try {
2546
- meta("agent-browser", assertAgentBrowserAvailable());
2547
- } catch (e) {
2548
- if (e instanceof AgentBrowserUnavailableError) {
2549
- error(formatAgentBrowserUnavailableMessage());
2550
- process.exit(1);
2551
- }
2552
- throw e;
2553
- }
2554
- await ensureCcqaDir();
2555
- const spec = parseSetupSpec(await readSetupSpecFile(name));
2556
- const resolvedSpec = replacePlaceholdersWithDummies(spec);
2557
- const secretsToScrub = buildSecretsToScrub(spec);
2558
- meta("setup", spec.title);
2559
- meta("steps", spec.steps.length);
2560
- if (spec.placeholders) meta("placeholders", Object.keys(spec.placeholders).join(", "));
2561
- blank();
2562
- const systemPrompt = buildSetupTraceSystemPrompt(resolvedSpec);
2563
- const prompt = buildSetupTracePrompt(resolvedSpec);
2564
- info("Running agent-browser session...");
2565
- blank();
2566
- const routeSteps = [];
2567
- let overallStatus = "passed";
2568
- const traceActions = [];
2569
- const { isError } = await invokeClaudeStreaming({
2570
- prompt,
2571
- systemPrompt,
2572
- allowedTools: [
2573
- "Bash(*)",
2574
- "Read",
2575
- "Grep",
2576
- "Glob"
2577
- ],
2578
- env: {
2579
- PATH: pathWithAgentBrowserShim(process.env["PATH"]),
2580
- ANTHROPIC_API_KEY: ""
2581
- },
2582
- model,
2583
- onAbAction: (abAction) => {
2584
- const action = parseAbAction(scrubSecrets(abAction, secretsToScrub));
2585
- if (action) traceActions.push(action);
2586
- },
2587
- onAbActionFailed: () => {
2588
- traceActions.pop();
2589
- }
2590
- }, (msg) => {
2591
- if (msg.type !== "assistant") return;
2592
- for (const block of msg.message.content ?? []) {
2593
- if (block.type !== "text" || !block.text) continue;
2594
- const text = block.text;
2595
- const statusLine = parseStatusLine(text);
2596
- if (statusLine) step(statusLine.type, statusLine.stepId, statusLine.detail);
2597
- for (const line of text.split("\n")) {
2598
- const trimmed = line.trim();
2599
- if (trimmed.startsWith("ROUTE_STEP|")) {
2600
- const routeStep = parseRouteStep(trimmed);
2601
- if (routeStep) {
2602
- routeSteps.push(routeStep);
2603
- if (routeStep.status === "FAILED") overallStatus = "failed";
2604
- }
2605
- } else if (trimmed.startsWith("AB_ACTION|snapshot|") || trimmed.startsWith("AB_ACTION|assert|")) {
2606
- const action = parseAbAction(scrubSecrets(trimmed, secretsToScrub));
2607
- if (action) traceActions.push(action);
2608
- }
2609
- }
2610
- }
2611
- });
2612
- if (isError) overallStatus = "failed";
2613
- const route = {
2614
- specName: name,
2615
- timestamp: (/* @__PURE__ */ new Date()).toISOString(),
2616
- status: overallStatus,
2617
- steps: routeSteps
2618
- };
2619
- const [routePath, actionsPath] = await Promise.all([saveSetupRoute(name, route), saveSetupActions(name, traceActions)]);
2620
- blank();
2621
- meta("route", routePath);
2622
- meta("saved", actionsPath);
2623
- meta("actions", traceActions.length);
2624
- meta("status", overallStatus.toUpperCase());
2625
- hint(`run 'ccqa generate-setup ${name}' to generate and validate the setup`);
2626
- }
2627
- function replacePlaceholdersWithDummies(spec) {
2628
- if (!spec.placeholders) return spec;
2629
- const dummies = spec.placeholders;
2630
- const resolve = (text) => {
2631
- let result = text;
2632
- for (const [key, def] of Object.entries(dummies)) result = result.replaceAll(`{{${key}}}`, resolveEnvRefs(def.dummy));
2633
- return result;
2634
- };
2635
- return {
2636
- ...spec,
2637
- steps: spec.steps.map((step) => ({
2638
- ...step,
2639
- instruction: resolve(step.instruction),
2640
- expected: resolve(step.expected)
2641
- }))
2642
- };
2643
- }
2644
- /**
2645
- * Build the substitution map used to scrub real secret values out of
2646
- * recorded actions before they are written to actions.json.
2647
- *
2648
- * For each placeholder whose dummy contains env refs, store
2649
- * <resolved-value> -> <original ${VAR} string>
2650
- * so that an `ab fill ... <secret>` line records the placeholder string
2651
- * instead of the secret. Empty resolved values are skipped — they would
2652
- * otherwise replace incidental empty strings in the recorded actions.
2653
- */
2654
- function buildSecretsToScrub(spec) {
2655
- const map = /* @__PURE__ */ new Map();
2656
- if (!spec.placeholders) return map;
2657
- const dummies = spec.placeholders;
2658
- for (const def of Object.values(dummies)) {
2659
- if (!hasEnvRef(def.dummy)) continue;
2660
- const resolved = resolveEnvRefs(def.dummy);
2661
- if (!resolved) continue;
2662
- map.set(resolved, def.dummy);
2663
- }
2664
- return map;
2665
- }
2666
- /** Replace every occurrence of a recorded secret with its `${VAR}` placeholder. */
2667
- function scrubSecrets(line, secrets) {
2668
- if (secrets.size === 0) return line;
2669
- let result = line;
2670
- for (const [secret, placeholder] of secrets) {
2671
- if (!result.includes(secret)) continue;
2672
- result = result.split(secret).join(placeholder);
2673
- }
2674
- return result;
2675
- }
2676
- //#endregion
2677
- //#region src/cli/generate-setup.ts
2678
- const generateSetupCommand = new Command("generate-setup").argument("<name>", "Setup name to generate (e.g. login)").description("Clean up, validate, and templatize setup actions").option("--max-retries <n>", "Maximum number of auto-fix retries", "3").option("--from-dummy", "Resume from existing test.dummy.spec.ts (after manual fix)").option("--auto", "Apply auto-fixes without confirmation regardless of confidence (CI use)").option("--no-interactive", "Never prompt; only auto-apply when confidence is high, otherwise give up").option("--language <bcp47>", "Language for diagnose reasoning / hint text (e.g. 'en', 'ja')", "en").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").action(async (name, opts) => {
2679
- const mode = resolveMode(opts);
2680
- await runGenerateSetup(name, parseInt(opts.maxRetries, 10), opts.fromDummy ?? false, mode, opts.language ?? "en", opts.model);
2681
- });
2682
- async function runGenerateSetup(name, maxRetries, fromDummy, mode, outputLanguage, model) {
2683
- header("generate-setup", name);
2684
- await ensureCcqaDir();
2685
- const specContent = await readSetupSpecFile(name);
2686
- const spec = parseSetupSpec(specContent);
2687
- const dummyPath = join(getSetupDir(name), "test.dummy.spec.ts");
2688
- const finalPath = join(getSetupDir(name), "test.spec.ts");
2689
- let cleanedActions = [];
2690
- if (fromDummy) {
2691
- if (!await stat(dummyPath).then(() => true).catch(() => false)) {
2692
- warn(`test.dummy.spec.ts not found. Run without --from-dummy first.`);
2693
- process.exit(1);
2694
- }
2695
- info("Resuming from existing test.dummy.spec.ts");
2696
- } else {
2697
- const { actions } = await getSetupActions(name);
2698
- meta("setup", spec.title);
2699
- meta("actions", actions.length);
2700
- meta("fix-mode", mode);
2701
- meta("language", outputLanguage);
2702
- blank();
2703
- cleanedActions = await cleanupActions(actions, model);
2704
- if (cleanedActions.length !== actions.length) meta("cleaned", cleanedActions.length);
2705
- await writeFile(dummyPath, actionsToScript(cleanedActions, spec.title), "utf-8");
2706
- meta("saved", dummyPath);
2707
- }
2708
- blank();
2709
- const agentBrowserSession = `ccqa-generate-setup-${name}-${Date.now()}`;
2710
- const runVitestForSession = (path) => runVitestResolved(path, agentBrowserSession);
2711
- await closeSession(agentBrowserSession);
2712
- const signalHandler = () => {
2713
- closeSession(agentBrowserSession).finally(() => process.exit(130));
2714
- };
2715
- process.once("SIGINT", signalHandler);
2716
- process.once("SIGTERM", signalHandler);
2717
- try {
2718
- const initialRun = await timedPhase("vitest run #1", () => runVitestForSession(dummyPath), "run");
2719
- let passed = initialRun.exitCode === 0;
2720
- if (!passed) passed = await runAutoFixLoop({
2721
- scriptPath: dummyPath,
2722
- initialRun,
2723
- specMarkdown: specContent,
2724
- actions: cleanedActions,
2725
- maxRetries,
2726
- mode,
2727
- runVitest: runVitestForSession,
2728
- agentBrowserSession,
2729
- outputLanguage,
2730
- model
2731
- });
2732
- if (!passed) {
2733
- warn("auto-fix exhausted; setup test still failing");
2734
- hint(`edit ${dummyPath} manually, then run: ccqa generate-setup ${name} --from-dummy`);
2735
- process.exit(1);
2736
- }
2737
- await writeFile(finalPath, reversePlaceholdersInScript(await readFile(dummyPath, "utf8"), spec.placeholders), "utf-8");
2738
- await unlink(dummyPath).catch(() => {});
2739
- blank();
2740
- meta("saved", finalPath);
2741
- hint(`setup '${name}' is ready; reference it in test-spec.md with setups: [{name: ${name}, params: {...}}]`);
2742
- } finally {
2743
- process.off("SIGINT", signalHandler);
2744
- process.off("SIGTERM", signalHandler);
2745
- await closeSession(agentBrowserSession);
2746
- }
2747
- }
2748
- /**
2749
- * Replace dummy values with {{placeholder}} directly in the test script text.
2750
- * Longer dummy values are replaced first to avoid partial matches.
2751
- */
2752
- function reversePlaceholdersInScript(script, placeholders) {
2753
- if (!placeholders) return script;
2754
- const entries = Object.entries(placeholders).sort((a, b) => b[1].dummy.length - a[1].dummy.length);
2755
- let result = script;
2756
- for (const [key, def] of entries) result = result.replaceAll(def.dummy, `{{${key}}}`);
2757
- return result;
2758
- }
2759
- async function runVitest(scriptPath, agentBrowserSession) {
2760
- const { exitCode, stdout, stderr } = await spawnVitestTeed([
2761
- "run",
2762
- "--config",
2763
- bundledVitestConfigPath(),
2764
- scriptPath
2765
- ], agentBrowserSession ? { env: {
2766
- ...process.env,
2767
- AGENT_BROWSER_SESSION: agentBrowserSession
2768
- } } : {});
2769
- const currentScript = await readFile(scriptPath, "utf8");
2770
- return {
2771
- exitCode,
2772
- output: stdout + stderr,
2773
- currentScript
2774
- };
2775
- }
2776
- /**
2777
- * Run vitest on `test.dummy.spec.ts`, but transparently expand any `${VAR}`
2778
- * env refs to real values for the duration of the run. The original file is
2779
- * preserved unchanged so subsequent reverse-replace still sees the env-ref
2780
- * literals. Auto-fix edits the original file (via writeFile in callers), so
2781
- * we always re-read it before each invocation.
2782
- */
2783
- async function runVitestResolved(scriptPath, agentBrowserSession) {
2784
- const original = await readFile(scriptPath, "utf8");
2785
- if (!hasEnvRef(original)) return runVitest(scriptPath, agentBrowserSession);
2786
- const tmpPath = scriptPath.replace(/\.ts$/, ".__resolved.spec.ts");
2787
- await writeFile(tmpPath, resolveEnvRefs(original), "utf-8");
2788
- try {
2789
- const { exitCode, stdout, stderr } = await spawnVitestTeed([
2790
- "run",
2791
- "--config",
2792
- bundledVitestConfigPath(),
2793
- tmpPath
2794
- ], agentBrowserSession ? { env: {
2795
- ...process.env,
2796
- AGENT_BROWSER_SESSION: agentBrowserSession
2797
- } } : {});
2798
- return {
2799
- exitCode,
2800
- output: stdout + stderr,
2801
- currentScript: original
2802
- };
2803
- } finally {
2804
- await unlink(tmpPath).catch(() => {});
2805
- }
2806
- }
2807
- async function cleanupActions(actions, model) {
2808
- try {
2809
- const { result, isError } = await invokeClaudeStreaming({
2810
- prompt: buildCleanupPrompt(actions),
2811
- disableBuiltinTools: true,
2812
- maxTurns: 1,
2813
- model
2814
- }, () => {});
2815
- if (isError || !result) return actions;
2816
- const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
2817
- const parsed = JSON.parse(json);
2818
- if (Array.isArray(parsed) && parsed.length > 0) return parsed;
2819
- } catch {}
2820
- return actions;
2821
- }
2822
- //#endregion
2823
- //#region src/prompts/draft.ts
2824
- function buildNamingSystemPrompt() {
2825
- return `You name a new ccqa test case based on the user's intent and the existing feature tree.
2826
-
2827
- ccqa test cases live under \`.ccqa/features/<featureName>/test-cases/<specName>/test-spec.md\`.
2828
-
2829
- ## Naming rules
2984
+ function buildDraftSystemPrompt(blocks) {
2985
+ return `You are a QA engineer drafting and refining a ccqa spec.yaml.
2830
2986
 
2831
- - featureName and specName are kebab-case ASCII (lowercase, words separated by '-').
2832
- - featureName: a broad area (e.g. "tasks", "auth", "billing", "search").
2833
- - specName: a short scenario name (e.g. "create-and-complete", "login-with-email", "search-by-tag").
2834
- - Reuse existing featureName when the user's intent fits an existing area. Only invent a new featureName when the existing tree clearly does not cover the area.
2835
- - specName must NOT collide with an existing spec under the chosen feature. If the natural name collides, pick a different one that distinguishes the new scenario from the existing ones.
2836
- - Use the codebase (Read/Grep/Glob) sparingly to confirm domain vocabulary if helpful. Do not over-explore.
2837
-
2838
- ## Output (STRICT)
2839
-
2840
- Output ONE fenced \`\`\`json block, nothing else outside it:
2841
-
2842
- {
2843
- "featureName": "<kebab-case>",
2844
- "specName": "<kebab-case>",
2845
- "reason": "<one short sentence: why this name and how it relates to existing specs>"
2846
- }
2847
- `;
2848
- }
2849
- function buildNamingPrompt(intent, tree) {
2850
- return `## User intent
2987
+ The CLI runs you in a loop: each turn the user gives an intent (first run) or a refinement instruction (later runs). You read the codebase, validate the spec, and return a single JSON report. The CLI displays a diff and asks the user whether to apply.
2851
2988
 
2852
- ${intent}
2989
+ ## spec.yaml format (STRICT)
2853
2990
 
2854
- ## Existing feature tree
2991
+ Pure YAML no markdown body, no frontmatter dashes.
2855
2992
 
2856
- ${tree.length === 0 ? "(no existing features yet)" : tree.map((f) => {
2857
- const specLines = f.specs.length === 0 ? " (no specs yet)" : f.specs.map((s) => ` - ${s.specName}${s.title ? ` — ${s.title}` : ""}`).join("\n");
2858
- return `- ${f.featureName}/\n${specLines}`;
2859
- }).join("\n")}
2993
+ Top-level fields:
2994
+ - \`title\`: string (required) short human-readable name for the test
2995
+ - \`relatedPaths\`: array of glob string (optional) — source files this spec depends on, used by \`ccqa drift --changed\`
2996
+ - \`steps\`: array (required, at least one)
2860
2997
 
2861
- ## Task
2998
+ A step is one of two shapes:
2862
2999
 
2863
- Pick featureName and specName for the new test case. Follow the naming rules. Avoid colliding with any existing specName under the chosen feature.
2864
- `;
2865
- }
2866
- function buildDraftSystemPrompt() {
2867
- return `You are a QA engineer drafting and refining a ccqa test-spec.md.
2868
-
2869
- The CLI runs you in a loop: each turn the user gives an intent (first run) or a refinement instruction (later runs). You read the codebase, validate the spec, and return a single JSON report. The CLI displays a diff and asks the user whether to apply.
2870
-
2871
- ## test-spec.md format (STRICT)
3000
+ **Action step** a user-facing browser interaction:
3001
+ \`\`\`yaml
3002
+ - instruction: <imperative; include the URL directly or via \${ENV_VAR}>
3003
+ expected: <observable outcome — visible text, URL pattern, element state>
3004
+ \`\`\`
2872
3005
 
2873
- YAML frontmatter + Markdown body.
3006
+ **Include step** invoke a reusable block from \`.ccqa/blocks/<name>/spec.yaml\`:
3007
+ \`\`\`yaml
3008
+ - include: <block-name>
3009
+ params:
3010
+ <param-name>: <string value, can use \${ENV_VAR}>
3011
+ \`\`\`
2874
3012
 
2875
- Frontmatter fields:
2876
- - title: string (required)
2877
- - baseUrl: string (required, e.g. http://localhost:3000)
2878
- - prerequisites: string (optional, free text)
2879
- - setups: array of { name: string, params?: Record<string,string> } (optional)
3013
+ ## URLs
2880
3014
 
2881
- Body must contain a \`## Steps\` section followed by step blocks:
3015
+ Each step writes the URL it opens directly inside \`instruction\` (e.g. \`"\${APP_URL}/articles を開く"\`). Use \`\${ENV_VAR}\` references for environment-specific values.
2882
3016
 
2883
- \`\`\`
2884
- ### Step 1: <short title>
2885
- - **Instruction**: <imperative, one sentence>
2886
- - **Expected**: <observable outcome>
3017
+ ## Available blocks
2887
3018
 
2888
- ### Step 2: <short title>
2889
- ...
2890
- \`\`\`
3019
+ ${formatBlockList(blocks)}
2891
3020
 
2892
3021
  ## Quality rules
2893
3022
 
2894
3023
  - One user-facing action per step (login, click, fill, navigate, ...).
2895
- - **Expected** must be assertion-friendly: visible text, URL pattern, element state.
2896
- - Forbidden in **Expected**: timestamps, exact counts, session IDs, internal state.
3024
+ - \`expected\` must be assertion-friendly: visible text, URL pattern, element state.
3025
+ - Forbidden in \`expected\`: timestamps, exact counts, session IDs, internal state.
2897
3026
  - 3–8 steps is typical. Fewer means too coarse; more means too fine.
2898
3027
 
2899
3028
  ## Workflow (use Read / Grep / Glob extensively)
2900
3029
 
2901
- 1. Read the codebase under cwd to find concrete strings: routes, button labels, aria-labels, page titles, placeholders. Use those exact strings in **Expected**.
2902
- 2. If the spec references setups, Read \`.ccqa/setups/<name>/setup-spec.md\` and verify each \`params\` key matches the setup's \`placeholders\`.
2903
- 3. Validate the (current or proposed) spec on four axes emit one issue per finding:
2904
- - **assertable**: each Expected can be verified against a string/URL/state that exists in code.
2905
- - **setups**: referenced setup exists; params keys match placeholders.
3030
+ 1. Read the codebase under cwd to find concrete strings: routes, button labels, aria-labels, page titles, placeholders. Use those exact strings in \`expected\`.
3031
+ 2. If you use \`include:\` steps, verify each \`params\` key matches a declared param of the block (see the Available blocks list above).
3032
+ 3. Populate \`relatedPaths\` with **provisional** glob patterns pointing at the source files this spec touches: the route/page file for each URL the spec visits, plus the component files (or their parent feature directory) that render the aria-labels, placeholders, or visible texts the spec asserts on. Prefer directory globs (e.g. \`src/features/tasks/**\`) when several files in one area are involved. Be conservative — include a path if you're unsure rather than omit it. \`ccqa trace\` will refine this list later from real browser observations.
3033
+ 4. Validate the (current or proposed) spec on four axes emit one issue per finding:
3034
+ - **assertable**: each \`expected\` can be verified against a string/URL/state that exists in code.
3035
+ - **blocks**: every \`include\` resolves to a real block; every \`params\` key is declared on that block; every required param is provided.
2906
3036
  - **granularity**: not too coarse (multiple actions per step) nor too fine (snapshot-only steps); order is logical.
2907
3037
  - **unimplemented**: any feature mentioned in the spec that you cannot find in code.
2908
3038
 
@@ -2917,13 +3047,13 @@ Schema:
2917
3047
  "issues": [
2918
3048
  {
2919
3049
  "severity": "OK" | "WARN" | "ERROR",
2920
- "category": "assertable" | "setups" | "granularity" | "unimplemented",
3050
+ "category": "assertable" | "blocks" | "granularity" | "unimplemented",
2921
3051
  "stepId": "step-01" | null,
2922
3052
  "message": "<one-line summary>",
2923
3053
  "detail": "<optional, multiline explanation>"
2924
3054
  }
2925
3055
  ],
2926
- "patch": "<COMPLETE rewritten test-spec.md, or empty string if no changes>"
3056
+ "patch": "<COMPLETE rewritten spec.yaml, or empty string if no changes>"
2927
3057
  }
2928
3058
  \`\`\`
2929
3059
 
@@ -2931,123 +3061,640 @@ Schema:
2931
3061
 
2932
3062
  - \`patch\` must be the COMPLETE file content if non-empty (never a diff fragment).
2933
3063
  - The CLI replaces the file atomically with \`patch\`.
3064
+ - The patch must be valid YAML matching the schema above. The CLI re-parses it before applying; if it fails validation, the patch is rejected.
2934
3065
  - For **create** mode: produce a fresh spec from the user intent.
2935
3066
  - For **refine** mode with a non-empty user instruction: apply the user's request, plus fix any issues it introduces. Preserve the user's wording elsewhere.
2936
3067
  - For **refine** mode with an empty user instruction: only fix issues you find against the current spec; if everything is fine, return \`patch: ""\`.
2937
3068
  - If \`patch\` is the same as the current spec, return \`patch: ""\` instead.
2938
3069
  `;
2939
3070
  }
2940
- function buildDraftPrompt(input) {
2941
- const { mode, existing, userInput } = input;
2942
- if (mode === "create") return `## Mode
2943
-
2944
- create no spec exists yet at the target path. Produce a fresh test-spec.md.
2945
-
2946
- ## User intent
2947
-
2948
- ${userInput}
2949
-
2950
- ## Task
2951
-
2952
- Read the codebase under cwd. Discover concrete strings (routes, labels, titles). Produce a complete test-spec.md as the \`patch\` field, plus any issues you'd flag about your own draft.
2953
- `;
2954
- return `## Mode
2955
-
2956
- refine — a spec already exists. Apply the user's instruction (if any) and validate against the codebase.
2957
-
2958
- ## Current spec
2959
-
2960
- \`\`\`markdown
2961
- ${existing}\`\`\`
2962
-
2963
- ${userInput ? `## User refinement instruction\n\n${userInput}\n` : `## User refinement instruction\n\n(empty re-validate the current spec against the codebase; only emit a non-empty patch if something is actually wrong)\n`}
2964
- ## Task
2965
-
2966
- 1. Read the codebase under cwd and any referenced setups.
2967
- 2. If the user's instruction is non-empty, apply it to the spec.
2968
- 3. Validate the resulting spec on the four axes. Emit issues.
2969
- 4. Return the complete updated spec as \`patch\`. If no changes are needed, return \`patch: ""\`.
2970
- `;
3071
+ function formatBlockList(blocks) {
3072
+ if (blocks.length === 0) return "(no blocks defined yet — only action steps are available.)";
3073
+ return blocks.map((b) => {
3074
+ const paramLines = b.params.length === 0 ? " params: (none)" : b.params.map((p) => ` - ${p.name}${p.required ? "" : " (optional)"}${p.secret ? " [secret]" : ""}`).join("\n");
3075
+ return `- \`${b.name}\` ${b.title}\n${paramLines}`;
3076
+ }).join("\n");
3077
+ }
3078
+ function buildDraftPrompt(input) {
3079
+ const { mode, existing, userInput } = input;
3080
+ if (mode === "create") return `## Mode
3081
+
3082
+ create — no spec exists yet at the target path. Produce a fresh spec.yaml.
3083
+
3084
+ ## User intent
3085
+
3086
+ ${userInput}
3087
+
3088
+ ## Task
3089
+
3090
+ Read the codebase under cwd. Discover concrete strings (routes, labels, titles). Produce a complete spec.yaml as the \`patch\` field, plus any issues you'd flag about your own draft.
3091
+ `;
3092
+ return `## Mode
3093
+
3094
+ refine a spec already exists. Apply the user's instruction (if any) and validate against the codebase.
3095
+
3096
+ ## Current spec
3097
+
3098
+ \`\`\`yaml
3099
+ ${existing}\`\`\`
3100
+
3101
+ ${userInput ? `## User refinement instruction\n\n${userInput}\n` : `## User refinement instruction\n\n(empty — re-validate the current spec against the codebase; only emit a non-empty patch if something is actually wrong)\n`}## Task
3102
+
3103
+ 1. Read the codebase under cwd and any referenced blocks (\`.ccqa/blocks/<name>/spec.yaml\`).
3104
+ 2. If the user's instruction is non-empty, apply it to the spec.
3105
+ 3. Validate the resulting spec on the four axes. Emit issues.
3106
+ 4. Return the complete updated spec as \`patch\`. If no changes are needed, return \`patch: ""\`.
3107
+ `;
3108
+ }
3109
+ //#endregion
3110
+ //#region src/prompts/drift.ts
3111
+ function buildDriftSystemPrompt(blocks) {
3112
+ return `${buildDraftSystemPrompt(blocks)}
3113
+
3114
+ ## Drift mode
3115
+
3116
+ You are running non-interactively in CI. The user will not see or apply the patch — only the \`issues\` array.
3117
+
3118
+ - Always set \`patch\` to "" in your response.
3119
+ - Focus issue messages on what is **out of sync** between the spec and the current codebase: missing aria-labels, renamed routes, removed buttons, placeholders that no longer exist, include references that point to non-existent blocks.
3120
+ - Do NOT raise issues about stylistic preferences in the spec wording.
3121
+ - Treat \`category: unimplemented\` as the primary signal for drift: anything the spec asserts that you cannot find in code is a drift finding.
3122
+
3123
+ ## Drift severity policy (STRICT)
3124
+
3125
+ The CLI exits non-zero when any issue has \`severity: "ERROR"\` (default) or — with \`--severity warn\` — when any \`WARN\` is present. Pick severity by **whether a deterministic replay of this spec would fail today**, not by how confident you are in your own analysis.
3126
+
3127
+ ### CRITICAL: spec ↔ source mismatch is ERROR, not "vague phrasing" WARN
3128
+
3129
+ The most common false negative is treating a concrete spec/source mismatch as a WARN about "expected phrasing." It is not. Apply this decision rule **before** picking severity:
3130
+
3131
+ 1. **Pick the concrete strings the spec asserts** in each step's \`expected\` (visible text, aria-labels, button labels, route paths). For \`expected\` like "the Dashboard page is visible", the spec is asserting that the literal string "Dashboard" — or the page conceptually identified by that label — is rendered.
3132
+ 2. **Search the source** for those exact strings (\`Grep\` / \`Read\`) at the location the step references (the relevant page/component/route).
3133
+ 3. Classify:
3134
+ - **ERROR** — the source instead renders a *different* string in that location (e.g. spec says "Dashboard", the breadcrumb in \`DashboardPage.tsx\` now renders "Overview"). A replay against the current source would fail; a replay against a stale staging environment would pass and *hide* the drift — exactly the case drift CI exists to catch. Cite both sides in \`detail\`: the spec line and the file:line of the source mismatch.
3135
+ - **WARN (vague phrasing)** — the source's actual string IS present somewhere relevant; the \`expected\` just paraphrases it more loosely (e.g. spec says "the Save button is visible" and the source has both visible "Save" text and \`aria-label="Save"\`). Replay still passes; the spec could just be tightened.
3136
+ - **OK** — the spec's exact string appears in source at the relevant location.
3137
+
3138
+ Use **ERROR** when the spec would break on replay:
3139
+ - A selector the spec relies on (\`aria-label\`, \`placeholder\`, \`data-testid\`, button text) **does not exist anywhere in the source**.
3140
+ - A URL / route the spec navigates to is no longer defined.
3141
+ - An \`expected\` asserts a string or visible text that is no longer rendered by the relevant component.
3142
+ - The source renders a *different* string in the place the spec describes (per the decision rule above).
3143
+ - An \`include\` step references a block that does not exist under \`.ccqa/blocks/<name>/spec.yaml\`, or a \`params\` key is not declared on that block.
3144
+ - The spec references a feature/page that has been removed from the codebase.
3145
+
3146
+ Use **WARN** when the spec is still likely to work, but quality could improve:
3147
+ - The \`expected\` paraphrases a string that **still exists** in source (the literal target is findable, just imprecisely worded).
3148
+ - A step bundles multiple actions, or a needed intermediate verification step is missing.
3149
+ - Stable signals exist that the spec could leverage but currently doesn't.
3150
+ - You are unsure whether a referenced string exists (give the user the benefit of the doubt; do not hard-fail CI on uncertainty).
3151
+
3152
+ Use **OK** for axes you actively verified and found no issue.
3153
+
3154
+ If you cannot decide between ERROR and WARN, choose WARN. Reserve ERROR for findings you can back up with a specific file path or grep result that proves the drift.
3155
+
3156
+ Conversely: when you DO have a citation showing a concrete spec/source mismatch (per the decision rule above), you MUST use ERROR — "vague phrasing" WARN is not a safe fallback for an actual drift.
3157
+ `;
3158
+ }
3159
+ function buildDriftUserPrompt(existing) {
3160
+ return buildDraftPrompt({
3161
+ mode: "refine",
3162
+ existing,
3163
+ userInput: ""
3164
+ });
3165
+ }
3166
+ //#endregion
3167
+ //#region src/types.ts
3168
+ const RouteStepSchema = z.object({
3169
+ title: z.string(),
3170
+ action: z.string(),
3171
+ observation: z.string(),
3172
+ status: z.enum([
3173
+ "PASSED",
3174
+ "FAILED",
3175
+ "SKIPPED"
3176
+ ]),
3177
+ reason: z.string().optional()
3178
+ });
3179
+ z.object({
3180
+ specName: z.string(),
3181
+ timestamp: z.string(),
3182
+ status: z.enum(["passed", "failed"]),
3183
+ steps: z.array(RouteStepSchema)
3184
+ });
3185
+ const DraftIssueSchema = z.object({
3186
+ severity: z.enum([
3187
+ "OK",
3188
+ "WARN",
3189
+ "ERROR"
3190
+ ]),
3191
+ category: z.enum([
3192
+ "assertable",
3193
+ "blocks",
3194
+ "granularity",
3195
+ "unimplemented"
3196
+ ]),
3197
+ stepId: z.string().nullable(),
3198
+ message: z.string(),
3199
+ detail: z.string().optional()
3200
+ });
3201
+ const DraftReportSchema = z.object({
3202
+ issues: z.array(DraftIssueSchema),
3203
+ patch: z.string()
3204
+ });
3205
+ const DRAFT_CATEGORY_LABEL = {
3206
+ assertable: "Assertability",
3207
+ blocks: "Block references",
3208
+ granularity: "Step granularity",
3209
+ unimplemented: "Unimplemented checks"
3210
+ };
3211
+ const DraftNamingSchema = z.object({
3212
+ featureName: z.string().min(1),
3213
+ specName: z.string().min(1),
3214
+ reason: z.string().optional()
3215
+ });
3216
+ //#endregion
3217
+ //#region src/drift/analyze.ts
3218
+ const DEFAULT_CONCURRENCY$1 = 3;
3219
+ /**
3220
+ * Run drift checks against a list of pre-collected targets. Pure library
3221
+ * function: no commander, no process.exit, no stdout writes. Callers handle
3222
+ * presentation. `cli/drift` does the full sweep with `--changed` scoping;
3223
+ * `cli/run` calls this with just the failing specs after vitest.
3224
+ */
3225
+ async function analyzeDrift(input) {
3226
+ const { targets, cwd, blocks, concurrency = DEFAULT_CONCURRENCY$1, model, onSpecStart } = input;
3227
+ const results = new Array(targets.length);
3228
+ let cursor = 0;
3229
+ const worker = async () => {
3230
+ while (true) {
3231
+ const idx = cursor++;
3232
+ if (idx >= targets.length) return;
3233
+ const target = targets[idx];
3234
+ onSpecStart?.(target);
3235
+ results[idx] = await checkSpec(target, {
3236
+ cwd,
3237
+ blocks,
3238
+ model
3239
+ });
3240
+ }
3241
+ };
3242
+ const pool = Array.from({ length: Math.min(concurrency, targets.length) }, () => worker());
3243
+ await Promise.all(pool);
3244
+ return results;
3245
+ }
3246
+ async function checkSpec(target, opts) {
3247
+ const { featureName, specName } = target;
3248
+ const existing = await tryReadSpecFile(featureName, specName, opts.cwd);
3249
+ if (existing === null) return {
3250
+ target,
3251
+ ok: false,
3252
+ issues: [],
3253
+ error: `spec file disappeared after enumeration: ${featureName}/${specName}`
3254
+ };
3255
+ const { result, isError } = await invokeClaudeStreaming({
3256
+ prompt: buildDriftUserPrompt(existing),
3257
+ systemPrompt: buildDriftSystemPrompt(opts.blocks),
3258
+ allowedTools: [
3259
+ "Read",
3260
+ "Grep",
3261
+ "Glob"
3262
+ ],
3263
+ silenceBashLog: true,
3264
+ cwd: opts.cwd,
3265
+ ...opts.model ? { model: opts.model } : {}
3266
+ }, (_msg) => {});
3267
+ if (isError) return {
3268
+ target,
3269
+ ok: false,
3270
+ issues: [],
3271
+ error: "Claude returned an error result"
3272
+ };
3273
+ const json = extractJsonBlock(result);
3274
+ if (!json) return {
3275
+ target,
3276
+ ok: false,
3277
+ issues: [],
3278
+ error: "Claude did not return a json block"
3279
+ };
3280
+ let report;
3281
+ try {
3282
+ report = DraftReportSchema.parse(JSON.parse(json));
3283
+ } catch (e) {
3284
+ return {
3285
+ target,
3286
+ ok: false,
3287
+ issues: [],
3288
+ error: `failed to parse drift report: ${e.message}`
3289
+ };
3290
+ }
3291
+ return {
3292
+ target,
3293
+ ok: true,
3294
+ issues: report.issues
3295
+ };
3296
+ }
3297
+ //#endregion
3298
+ //#region src/drift/format.ts
3299
+ /**
3300
+ * Render drift results as a string. The CLI commands and the `run` failure
3301
+ * hook are the only callers; both want the formatted output returned so
3302
+ * they can prefix / interleave / pipe it as needed.
3303
+ */
3304
+ function renderDrift(results, format, cwd) {
3305
+ if (format === "json") return renderJson(results);
3306
+ if (format === "github") return renderGithub(results, cwd);
3307
+ return renderText(results);
3308
+ }
3309
+ const HEAVY_RULE = "═".repeat(72);
3310
+ function renderText(results) {
3311
+ const out = [];
3312
+ for (const r of results) {
3313
+ out.push("");
3314
+ const heading = `══ ${r.target.featureName}/${r.target.specName} `;
3315
+ const tail = "═".repeat(Math.max(3, 72 - heading.length));
3316
+ out.push(`${heading}${tail}`);
3317
+ if (r.error) {
3318
+ out.push(` ERROR ${r.error}`);
3319
+ continue;
3320
+ }
3321
+ const errors = r.issues.filter((i) => i.severity === "ERROR");
3322
+ const warnings = r.issues.filter((i) => i.severity === "WARN");
3323
+ const passed = r.issues.filter((i) => i.severity === "OK");
3324
+ if (errors.length === 0 && warnings.length === 0) {
3325
+ const label = passed.length === 1 ? "check" : "checks";
3326
+ const detail = passed.length > 0 ? `all ${passed.length} ${label} passed` : "no issues";
3327
+ out.push(` ✓ ${detail}`);
3328
+ continue;
3329
+ }
3330
+ for (const issue of errors) appendFinding(out, "ERROR", issue);
3331
+ for (const issue of warnings) appendFinding(out, "WARN", issue);
3332
+ if (passed.length > 0) {
3333
+ const names = passed.map((i) => DRAFT_CATEGORY_LABEL[i.category]).join(", ");
3334
+ out.push("");
3335
+ out.push(` ✓ passed (${passed.length}): ${names}`);
3336
+ }
3337
+ }
3338
+ out.push("");
3339
+ out.push(HEAVY_RULE);
3340
+ const totals = summarize(results);
3341
+ out.push(` specs ${results.length} (${totals.errored} errored)`);
3342
+ out.push(` findings ${totals.error} error, ${totals.warn} warn, ${totals.ok} ok`);
3343
+ out.push("");
3344
+ return out.join("\n");
3345
+ }
3346
+ function appendFinding(out, level, issue) {
3347
+ const stepPart = issue.stepId ? ` ${issue.stepId}` : "";
3348
+ out.push("");
3349
+ out.push(` ${level} ${DRAFT_CATEGORY_LABEL[issue.category]}${stepPart}`);
3350
+ out.push(` ${issue.message}`);
3351
+ if (issue.detail) out.push(` └ ${issue.detail.replace(/\n/g, "\n ")}`);
3352
+ }
3353
+ function renderJson(results) {
3354
+ const payload = { specs: results.map((r) => ({
3355
+ feature: r.target.featureName,
3356
+ spec: r.target.specName,
3357
+ ok: r.ok,
3358
+ ...r.error ? { error: r.error } : {},
3359
+ issues: r.issues.map((i) => ({
3360
+ severity: i.severity,
3361
+ category: i.category,
3362
+ stepId: i.stepId,
3363
+ message: i.message,
3364
+ ...i.detail ? { detail: i.detail } : {}
3365
+ }))
3366
+ })) };
3367
+ return `${JSON.stringify(payload, null, 2)}\n`;
3368
+ }
3369
+ function renderGithub(results, cwd) {
3370
+ const repoRoot = process.env["GITHUB_WORKSPACE"] ?? process.cwd();
3371
+ const lines = [];
3372
+ for (const r of results) {
3373
+ const file = githubRelPath(cwd, repoRoot, r.target.featureName, r.target.specName);
3374
+ if (r.error) {
3375
+ lines.push(`::error file=${file}::${escapeGhMessage(r.error)}`);
3376
+ continue;
3377
+ }
3378
+ for (const issue of r.issues) {
3379
+ if (issue.severity === "OK") continue;
3380
+ const level = issue.severity === "ERROR" ? "error" : "warning";
3381
+ const title = `${r.target.featureName}/${r.target.specName} — ${issue.category}${issue.stepId ? ` (${issue.stepId})` : ""}`;
3382
+ const body = issue.detail ? `${issue.message}\n${issue.detail}` : issue.message;
3383
+ lines.push(`::${level} file=${file},title=${escapeGhProp(title)}::${escapeGhMessage(body)}`);
3384
+ }
3385
+ }
3386
+ return lines.length === 0 ? "" : `${lines.join("\n")}\n`;
3387
+ }
3388
+ function githubRelPath(cwd, repoRoot, featureName, specName) {
3389
+ const abs = resolve(cwd, ".ccqa", "features", featureName, "test-cases", specName, "spec.yaml");
3390
+ const rel = relative(repoRoot, abs);
3391
+ return rel.startsWith("..") ? abs : rel;
3392
+ }
3393
+ function escapeGhMessage(s) {
3394
+ return s.replace(/%/g, "%25").replace(/\r/g, "%0D").replace(/\n/g, "%0A");
3395
+ }
3396
+ function escapeGhProp(s) {
3397
+ return s.replace(/%/g, "%25").replace(/\r/g, "%0D").replace(/\n/g, "%0A").replace(/,/g, "%2C").replace(/:/g, "%3A");
3398
+ }
3399
+ function summarize(results) {
3400
+ let error = 0;
3401
+ let warn = 0;
3402
+ let ok = 0;
3403
+ let errored = 0;
3404
+ for (const r of results) {
3405
+ if (r.error) errored++;
3406
+ for (const issue of r.issues) if (issue.severity === "ERROR") error++;
3407
+ else if (issue.severity === "WARN") warn++;
3408
+ else ok++;
3409
+ }
3410
+ return {
3411
+ error,
3412
+ warn,
3413
+ ok,
3414
+ errored
3415
+ };
3416
+ }
3417
+ //#endregion
3418
+ //#region src/drift/exit-code.ts
3419
+ /**
3420
+ * Map drift results to an exit code. Spec-level errors (Claude call failed)
3421
+ * always fail; otherwise ERROR severity always fails, WARN fails only when
3422
+ * the threshold is `warn`.
3423
+ */
3424
+ function determineExitCode(results, threshold) {
3425
+ for (const r of results) {
3426
+ if (r.error) return 1;
3427
+ for (const issue of r.issues) {
3428
+ if (issue.severity === "ERROR") return 1;
3429
+ if (threshold === "warn" && issue.severity === "WARN") return 1;
3430
+ }
3431
+ }
3432
+ return 0;
3433
+ }
3434
+ //#endregion
3435
+ //#region src/drift/auth.ts
3436
+ /**
3437
+ * Probe whether the host has any credential the Anthropic SDK can pick up:
3438
+ * 1. ANTHROPIC_API_KEY env var (CI / scripted use)
3439
+ * 2. ~/.claude/.credentials.json (local Claude Code login)
3440
+ *
3441
+ * `run --drift` is opt-in, so the caller will only consult this after the
3442
+ * user has asked for drift. We never throw — auth absence is a normal flow
3443
+ * that surfaces as "drift analysis skipped".
3444
+ */
3445
+ function driftAuthAvailable() {
3446
+ const key = process.env["ANTHROPIC_API_KEY"];
3447
+ if (typeof key === "string" && key.length > 0) return { ok: true };
3448
+ if (existsSync(join(homedir(), ".claude", ".credentials.json"))) return { ok: true };
3449
+ return {
3450
+ ok: false,
3451
+ reason: "no ANTHROPIC_API_KEY / claude login"
3452
+ };
3453
+ }
3454
+ //#endregion
3455
+ //#region src/cli/run.ts
3456
+ const USER_VITEST_CONFIG = resolve(".ccqa/vitest.config.ts");
3457
+ async function resolveVitestConfig() {
3458
+ try {
3459
+ await access(USER_VITEST_CONFIG);
3460
+ return USER_VITEST_CONFIG;
3461
+ } catch {
3462
+ return bundledVitestConfigPath();
3463
+ }
3464
+ }
3465
+ const runCommand = new Command("run").argument("[target]", "Spec to run: '<feature>/<spec>', '<feature>', or omit for all").description("Run generated agent-browser test scripts. Pass --drift to invoke a Claude-driven drift analysis on each failing spec (skipped silently when no test fails). Requires ANTHROPIC_API_KEY or a local Claude login.").option("--drift", "On vitest failure, run drift analysis on the failing specs").option("--drift-strict", "Treat drift ERROR findings as a run failure (exit 1 even if vitest passed). Implies --drift.").option("--format <fmt>", "Output format for the drift block: text | json | github", "text").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Used by --drift only. Overrides CCQA_MODEL.").action(async (target, opts) => {
3466
+ await runTests(target, opts);
3467
+ });
3468
+ async function runTests(target, opts) {
3469
+ header("run", target);
3470
+ const specs = await resolveSpecs(target);
3471
+ if (specs.length === 0) {
3472
+ error("no test scripts found");
3473
+ hint("run 'ccqa generate <feature>/<spec>' first to generate tests");
3474
+ process.exit(1);
3475
+ }
3476
+ const tmpDir = await mkdtemp(join(tmpdir(), "ccqa-run-"));
3477
+ const summaries = [];
3478
+ let overallExitCode = 0;
3479
+ const vitestConfig = await resolveVitestConfig();
3480
+ try {
3481
+ for (let i = 0; i < specs.length; i++) {
3482
+ const { featureName, specName } = specs[i];
3483
+ const scriptFile = await getTestScript(featureName, specName);
3484
+ if (!scriptFile) {
3485
+ warn(`${featureName}/${specName}: no test.spec.ts found`);
3486
+ continue;
3487
+ }
3488
+ run(`${featureName}/${specName}`);
3489
+ meta("test", scriptFile);
3490
+ blank();
3491
+ const reportFile = join(tmpDir, `report-${i}.json`);
3492
+ const proc = spawnVitestStreaming([
3493
+ "run",
3494
+ "--config",
3495
+ vitestConfig,
3496
+ scriptFile,
3497
+ "--reporter=json",
3498
+ `--outputFile.json=${reportFile}`
3499
+ ]);
3500
+ await Promise.all([streamFiltered(proc.stdout, process.stdout), streamFiltered(proc.stderr, process.stderr)]);
3501
+ const exitCode = await proc.exited;
3502
+ if (exitCode !== 0) overallExitCode = exitCode;
3503
+ const report = await readReport(reportFile);
3504
+ summaries.push({
3505
+ featureName,
3506
+ specName,
3507
+ scriptFile,
3508
+ report,
3509
+ exitCode
3510
+ });
3511
+ blank();
3512
+ }
3513
+ printSummary(summaries);
3514
+ overallExitCode = await maybeRunDrift(summaries, opts, overallExitCode);
3515
+ } finally {
3516
+ await rm(tmpDir, {
3517
+ recursive: true,
3518
+ force: true
3519
+ });
3520
+ }
3521
+ process.exit(overallExitCode);
3522
+ }
3523
+ function failedSpec(s) {
3524
+ if (s.exitCode !== 0) return true;
3525
+ return (s.report?.numFailedTests ?? 0) > 0;
3526
+ }
3527
+ function parseDriftFormat(raw) {
3528
+ const v = raw ?? "text";
3529
+ if (v === "text" || v === "json" || v === "github") return v;
3530
+ error(`invalid --format: ${v} (expected text|json|github)`);
3531
+ process.exit(2);
3532
+ }
3533
+ /**
3534
+ * Choose which specs to drift-check. `--drift` is a fail-supplement: only the
3535
+ * specs that failed get a drift analysis (the goal is to *explain* a vitest
3536
+ * failure). `--drift-strict` is an audit: even passing specs are checked,
3537
+ * because the CI need is "fail loud if the spec lags behind the source",
3538
+ * which can absolutely happen while vitest is still green against a stale
3539
+ * staging environment.
3540
+ */
3541
+ function selectDriftTargets(summaries, opts) {
3542
+ if (opts.driftStrict) return summaries;
3543
+ if (opts.drift) return summaries.filter(failedSpec);
3544
+ return [];
3545
+ }
3546
+ /**
3547
+ * Opt-in post-vitest drift hook. With `--drift`, fires only when at least
3548
+ * one spec failed (supplemental signal). With `--drift-strict`, fires
3549
+ * unconditionally so a spec/source divergence is caught even when vitest
3550
+ * passed. Skips silently when auth is unavailable so the run's exit code
3551
+ * is determined by vitest alone.
3552
+ */
3553
+ async function maybeRunDrift(summaries, opts, currentExitCode) {
3554
+ const candidates = selectDriftTargets(summaries, opts);
3555
+ if (candidates.length === 0) return currentExitCode;
3556
+ const auth = driftAuthAvailable();
3557
+ if (!auth.ok) {
3558
+ info(`drift analysis skipped (${auth.reason})`);
3559
+ return currentExitCode;
3560
+ }
3561
+ const format = parseDriftFormat(opts.format);
3562
+ const cwd = process.cwd();
3563
+ const tree = await listFeatureTree(cwd);
3564
+ const targets = candidates.map((s) => {
3565
+ const spec = tree.find((f) => f.featureName === s.featureName)?.specs.find((sp) => sp.specName === s.specName);
3566
+ if (!spec) return null;
3567
+ const t = {
3568
+ featureName: s.featureName,
3569
+ specName: s.specName
3570
+ };
3571
+ if (spec.relatedPaths) t.relatedPaths = spec.relatedPaths;
3572
+ if (spec.includedBlocks) t.includedBlocks = spec.includedBlocks;
3573
+ return t;
3574
+ }).filter((t) => t !== null);
3575
+ if (targets.length === 0) {
3576
+ info("drift analysis skipped (no spec.yaml found for failing specs)");
3577
+ return currentExitCode;
3578
+ }
3579
+ const results = await analyzeDrift({
3580
+ targets,
3581
+ cwd,
3582
+ blocks: await loadAvailableBlocks(cwd),
3583
+ concurrency: Math.min(3, targets.length),
3584
+ ...opts.model ? { model: opts.model } : {},
3585
+ onSpecStart: (t) => {
3586
+ if (format === "text") info(`drift: checking ${t.featureName}/${t.specName}`);
3587
+ }
3588
+ });
3589
+ if (format === "text") process.stdout.write(`\n${C.cyan}${C.bold}──────── drift analysis ────────${C.reset}\n`);
3590
+ process.stdout.write(renderDrift(results, format, cwd));
3591
+ if (opts.driftStrict && determineExitCode(results, "error") !== 0) return currentExitCode || 1;
3592
+ return currentExitCode;
3593
+ }
3594
+ async function readReport(path) {
3595
+ try {
3596
+ const raw = await readFile(path, "utf8");
3597
+ return JSON.parse(raw);
3598
+ } catch {
3599
+ return null;
3600
+ }
3601
+ }
3602
+ const useColor = process.stdout.isTTY && process.env.NO_COLOR == null;
3603
+ const C = {
3604
+ reset: useColor ? "\x1B[0m" : "",
3605
+ bold: useColor ? "\x1B[1m" : "",
3606
+ dim: useColor ? "\x1B[2m" : "",
3607
+ green: useColor ? "\x1B[32m" : "",
3608
+ red: useColor ? "\x1B[31m" : "",
3609
+ yellow: useColor ? "\x1B[33m" : "",
3610
+ cyan: useColor ? "\x1B[36m" : "",
3611
+ gray: useColor ? "\x1B[90m" : ""
3612
+ };
3613
+ function printSummary(summaries) {
3614
+ process.stdout.write(`\n${C.cyan}${C.bold}──────── ccqa summary ────────${C.reset}\n\n`);
3615
+ let totalTests = 0;
3616
+ let totalPassed = 0;
3617
+ let totalFailed = 0;
3618
+ let totalSkipped = 0;
3619
+ for (const s of summaries) {
3620
+ const header = `${C.bold}${s.featureName}/${s.specName}${C.reset}`;
3621
+ if (!s.report) {
3622
+ const icon = s.exitCode === 0 ? `${C.green}✔${C.reset}` : `${C.red}✖${C.reset}`;
3623
+ process.stdout.write(`${icon} ${header} ${C.dim}(no report)${C.reset}\n`);
3624
+ continue;
3625
+ }
3626
+ totalTests += s.report.numTotalTests;
3627
+ totalPassed += s.report.numPassedTests;
3628
+ totalFailed += s.report.numFailedTests;
3629
+ totalSkipped += s.report.numPendingTests;
3630
+ const ok = s.report.success;
3631
+ const icon = ok ? `${C.green}✔${C.reset}` : `${C.red}✖${C.reset}`;
3632
+ const countColor = ok ? C.green : C.red;
3633
+ process.stdout.write(`${icon} ${header} ${countColor}${s.report.numPassedTests}/${s.report.numTotalTests}${C.reset} ${C.dim}passed${C.reset}\n`);
3634
+ for (const file of s.report.testResults) for (const a of file.assertionResults) {
3635
+ const aIcon = assertionIcon(a.status);
3636
+ const dur = a.duration != null ? ` ${C.gray}${formatDuration(a.duration)}${C.reset}` : "";
3637
+ process.stdout.write(` ${aIcon} ${a.fullName}${dur}\n`);
3638
+ if (a.status === "failed" && a.failureMessages?.length) for (const msg of a.failureMessages) {
3639
+ const firstLine = msg.split("\n")[0] ?? msg;
3640
+ process.stdout.write(` ${C.red}${firstLine}${C.reset}\n`);
3641
+ }
3642
+ }
3643
+ }
3644
+ const specsPassed = summaries.filter((s) => s.exitCode === 0).length;
3645
+ const specsFailed = summaries.filter((s) => s.exitCode !== 0).length;
3646
+ process.stdout.write("\n");
3647
+ process.stdout.write(` ${C.bold}Specs${C.reset} ${summaries.length} (${C.green}${specsPassed} passed${C.reset}, ${specsFailed > 0 ? C.red : C.dim}${specsFailed} failed${C.reset})\n`);
3648
+ process.stdout.write(` ${C.bold}Tests${C.reset} ${totalTests} (${C.green}${totalPassed} passed${C.reset}, ${totalFailed > 0 ? C.red : C.dim}${totalFailed} failed${C.reset}, ${C.yellow}${totalSkipped} skipped${C.reset})\n`);
3649
+ process.stdout.write("\n");
3650
+ }
3651
+ function assertionIcon(status) {
3652
+ switch (status) {
3653
+ case "passed": return `${C.green}✔${C.reset}`;
3654
+ case "failed": return `${C.red}✖${C.reset}`;
3655
+ case "skipped":
3656
+ case "pending":
3657
+ case "todo": return `${C.yellow}◌${C.reset}`;
3658
+ }
3659
+ }
3660
+ function formatDuration(ms) {
3661
+ if (ms < 1e3) return `${Math.round(ms)}ms`;
3662
+ return `${(ms / 1e3).toFixed(2)}s`;
3663
+ }
3664
+ const NOISE_LINE_PATTERNS = [/^JSON report written to /];
3665
+ async function streamFiltered(source, sink) {
3666
+ source.setEncoding("utf8");
3667
+ let buffer = "";
3668
+ for await (const chunk of source) {
3669
+ buffer += chunk;
3670
+ let nl = buffer.indexOf("\n");
3671
+ while (nl !== -1) {
3672
+ const line = buffer.slice(0, nl);
3673
+ buffer = buffer.slice(nl + 1);
3674
+ if (!NOISE_LINE_PATTERNS.some((p) => p.test(line))) sink.write(line + "\n");
3675
+ nl = buffer.indexOf("\n");
3676
+ }
3677
+ }
3678
+ if (buffer.length > 0 && !NOISE_LINE_PATTERNS.some((p) => p.test(buffer))) sink.write(buffer);
3679
+ }
3680
+ async function resolveSpecs(target) {
3681
+ if (!target) return listAllSpecs();
3682
+ if (target.includes("/")) {
3683
+ const { featureName, specName } = parseSpecPath(target);
3684
+ return [{
3685
+ featureName,
3686
+ specName
3687
+ }];
3688
+ }
3689
+ return (await listSpecsForFeature(target)).map((specName) => ({
3690
+ featureName: target,
3691
+ specName
3692
+ }));
2971
3693
  }
2972
- //#endregion
2973
- //#region src/types.ts
2974
- const TestStepSchema = z.object({
2975
- id: z.string(),
2976
- title: z.string(),
2977
- instruction: z.string(),
2978
- expected: z.string()
2979
- });
2980
- const SetupRefSchema = z.object({
2981
- name: z.string(),
2982
- params: z.record(z.string(), z.string()).optional()
2983
- });
2984
- z.object({
2985
- title: z.string(),
2986
- baseUrl: z.string(),
2987
- prerequisites: z.string().optional(),
2988
- setups: z.array(SetupRefSchema).optional(),
2989
- steps: z.array(TestStepSchema)
2990
- });
2991
- const PlaceholderDefSchema = z.object({
2992
- dummy: z.string(),
2993
- description: z.string().optional()
2994
- });
2995
- z.object({
2996
- title: z.string(),
2997
- placeholders: z.record(z.string(), PlaceholderDefSchema).optional(),
2998
- steps: z.array(TestStepSchema)
2999
- });
3000
- const RouteStepSchema = z.object({
3001
- title: z.string(),
3002
- action: z.string(),
3003
- observation: z.string(),
3004
- status: z.enum([
3005
- "PASSED",
3006
- "FAILED",
3007
- "SKIPPED"
3008
- ]),
3009
- reason: z.string().optional()
3010
- });
3011
- z.object({
3012
- specName: z.string(),
3013
- timestamp: z.string(),
3014
- status: z.enum(["passed", "failed"]),
3015
- steps: z.array(RouteStepSchema)
3016
- });
3017
- const DraftIssueSchema = z.object({
3018
- severity: z.enum([
3019
- "OK",
3020
- "WARN",
3021
- "ERROR"
3022
- ]),
3023
- category: z.enum([
3024
- "assertable",
3025
- "setups",
3026
- "granularity",
3027
- "unimplemented"
3028
- ]),
3029
- stepId: z.string().nullable(),
3030
- message: z.string(),
3031
- detail: z.string().optional()
3032
- });
3033
- const DraftReportSchema = z.object({
3034
- issues: z.array(DraftIssueSchema),
3035
- patch: z.string()
3036
- });
3037
- const DraftNamingSchema = z.object({
3038
- featureName: z.string().min(1),
3039
- specName: z.string().min(1),
3040
- reason: z.string().optional()
3041
- });
3042
3694
  //#endregion
3043
3695
  //#region src/cli/draft.ts
3044
- const CATEGORY_LABEL = {
3045
- assertable: "Assertability",
3046
- setups: "Setup references",
3047
- granularity: "Step granularity",
3048
- unimplemented: "Unimplemented checks"
3049
- };
3050
- const draftCommand = new Command("draft").argument("[feature/spec]", "Optional spec path (e.g. tasks/create-and-complete). If omitted, Claude proposes one from your intent.").description("Interactively draft and refine a test-spec.md with Claude Code").option("--instruction <text>", "Non-interactive single-shot instruction (skips the interactive loop)").option("--apply", "Auto-apply each generated patch without [y/N] confirmation", false).action(async (specPath, opts) => {
3696
+ const CATEGORY_LABEL = DRAFT_CATEGORY_LABEL;
3697
+ const draftCommand = new Command("draft").argument("[feature/spec]", "Optional spec path (e.g. tasks/create-and-complete). If omitted, Claude proposes one from your intent.").description("Interactively draft and refine a spec.yaml with Claude Code").option("--instruction <text>", "Non-interactive single-shot instruction (skips the interactive loop)").option("--apply", "Auto-apply each generated patch without [y/N] confirmation", false).action(async (specPath, opts) => {
3051
3698
  await ensureCcqaDir();
3052
3699
  let featureName;
3053
3700
  let specName;
@@ -3097,7 +3744,7 @@ async function runDraft(featureName, specName, opts, prefilledIntent) {
3097
3744
  async function runOneTurn(input) {
3098
3745
  const { featureName, specName, existing, userInput, autoApply } = input;
3099
3746
  const isFirstRun = existing === null;
3100
- const systemPrompt = buildDraftSystemPrompt();
3747
+ const systemPrompt = buildDraftSystemPrompt(await loadAvailableBlocks());
3101
3748
  const userPrompt = buildDraftPrompt({
3102
3749
  mode: isFirstRun ? "create" : "refine",
3103
3750
  existing: existing ?? "",
@@ -3261,10 +3908,7 @@ async function proposeNaming(opts) {
3261
3908
  const tree = await listFeatureTree();
3262
3909
  const treeForPrompt = tree.map((f) => ({
3263
3910
  featureName: f.featureName,
3264
- specs: f.specs.map((s) => ({
3265
- specName: s.specName,
3266
- ...s.title ? { title: s.title } : {}
3267
- }))
3911
+ specs: f.specs.map((s) => ({ specName: s.specName }))
3268
3912
  }));
3269
3913
  info("Proposing a feature/spec name based on your intent...");
3270
3914
  const { result, isError } = await invokeClaudeStreaming({
@@ -3367,13 +4011,6 @@ function ensureUnique(tree, featureName, specName) {
3367
4011
  specName: `${specName}-${Date.now()}`
3368
4012
  };
3369
4013
  }
3370
- function extractJsonBlock(text) {
3371
- const fenced = text.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
3372
- if (fenced && fenced[1]) return fenced[1].trim();
3373
- const trimmed = text.trim();
3374
- if (trimmed.startsWith("{") && trimmed.endsWith("}")) return trimmed;
3375
- return null;
3376
- }
3377
4014
  function printUnifiedDiff(before, after) {
3378
4015
  const lines = computeLineDiff(before.split("\n"), after.split("\n"));
3379
4016
  for (const line of lines) process.stdout.write(line + "\n");
@@ -3421,6 +4058,426 @@ function truncate(s, n) {
3421
4058
  return s.slice(s.length - n);
3422
4059
  }
3423
4060
  //#endregion
4061
+ //#region src/drift/affected.ts
4062
+ const execFileP = promisify(execFile);
4063
+ /**
4064
+ * Resolve the base ref to diff against for `ccqa drift --changed`.
4065
+ * Precedence: explicit override > GITHUB_BASE_REF > origin/main.
4066
+ */
4067
+ function resolveBaseRef(explicit) {
4068
+ if (explicit && explicit.length > 0) return explicit;
4069
+ const ghBase = process.env["GITHUB_BASE_REF"];
4070
+ if (ghBase && ghBase.length > 0) return ghBase.startsWith("origin/") ? ghBase : `origin/${ghBase}`;
4071
+ return "origin/main";
4072
+ }
4073
+ /**
4074
+ * Run `git diff --name-status base...HEAD` from `cwd` and return one entry per
4075
+ * changed file. Renames are reported under their NEW path with status
4076
+ * "renamed" — the OLD path is dropped because the spec mapping is against the
4077
+ * post-rename layout.
4078
+ *
4079
+ * Paths are re-rooted to be relative to `cwd`, not the git repo root. In a
4080
+ * monorepo where `cwd` is a sub-package (e.g. `apps/foo`), git emits paths
4081
+ * relative to the repo root, but specs declare relatedPaths relative to
4082
+ * their own package. Changes outside `cwd` are dropped so an unrelated PR
4083
+ * can never accidentally scope a sub-package's specs in.
4084
+ */
4085
+ async function getChangedFiles(base, cwd) {
4086
+ const [{ stdout: rootOut }, { stdout: diffOut }] = await Promise.all([execFileP("git", ["rev-parse", "--show-toplevel"], { cwd }), execFileP("git", [
4087
+ "diff",
4088
+ "--name-status",
4089
+ "-M",
4090
+ `${base}...HEAD`
4091
+ ], {
4092
+ cwd,
4093
+ maxBuffer: 32 * 1024 * 1024
4094
+ })]);
4095
+ return rerootChangedFiles(parseGitDiffOutput(diffOut), rootOut.trim(), cwd);
4096
+ }
4097
+ /**
4098
+ * Convert paths in `entries` from git-repo-root relative to `cwd` relative,
4099
+ * dropping anything outside `cwd`. Exported for unit tests.
4100
+ */
4101
+ function rerootChangedFiles(entries, repoRoot, cwd) {
4102
+ const prefix = relative(repoRoot, cwd);
4103
+ if (!prefix) return entries;
4104
+ const out = [];
4105
+ for (const e of entries) {
4106
+ const rel = relative(prefix, e.path);
4107
+ if (rel.startsWith("..") || rel === "") continue;
4108
+ out.push({
4109
+ ...e,
4110
+ path: rel
4111
+ });
4112
+ }
4113
+ return out;
4114
+ }
4115
+ function parseGitDiffOutput(stdout) {
4116
+ const out = [];
4117
+ for (const line of stdout.split("\n")) {
4118
+ if (!line.trim()) continue;
4119
+ const parts = line.split(" ");
4120
+ const code = parts[0];
4121
+ if (!code) continue;
4122
+ if (code.startsWith("R")) {
4123
+ const newPath = parts[2];
4124
+ if (newPath) out.push({
4125
+ path: newPath,
4126
+ status: "renamed"
4127
+ });
4128
+ continue;
4129
+ }
4130
+ if (code.startsWith("C")) {
4131
+ const newPath = parts[2];
4132
+ if (newPath) out.push({
4133
+ path: newPath,
4134
+ status: "added"
4135
+ });
4136
+ continue;
4137
+ }
4138
+ const path = parts[1];
4139
+ if (!path) continue;
4140
+ switch (code[0]) {
4141
+ case "A":
4142
+ out.push({
4143
+ path,
4144
+ status: "added"
4145
+ });
4146
+ break;
4147
+ case "M":
4148
+ case "T":
4149
+ out.push({
4150
+ path,
4151
+ status: "modified"
4152
+ });
4153
+ break;
4154
+ case "D":
4155
+ out.push({
4156
+ path,
4157
+ status: "deleted"
4158
+ });
4159
+ break;
4160
+ default: out.push({
4161
+ path,
4162
+ status: "modified"
4163
+ });
4164
+ }
4165
+ }
4166
+ return out;
4167
+ }
4168
+ function stripLeadingDotSlash(s) {
4169
+ return s.startsWith("./") ? s.slice(2) : s;
4170
+ }
4171
+ const REGEX_CACHE = /* @__PURE__ */ new Map();
4172
+ /** Compiles `pattern` to a RegExp, memoized so repeated `--changed` matches don't re-build. */
4173
+ function compileGlob(pattern) {
4174
+ const cached = REGEX_CACHE.get(pattern);
4175
+ if (cached) return cached;
4176
+ const compiled = globToRegExp(stripLeadingDotSlash(pattern));
4177
+ REGEX_CACHE.set(pattern, compiled);
4178
+ return compiled;
4179
+ }
4180
+ function globToRegExp(pattern) {
4181
+ let re = "^";
4182
+ let i = 0;
4183
+ while (i < pattern.length) {
4184
+ const ch = pattern[i];
4185
+ if (ch === "?") {
4186
+ re += "[^/]";
4187
+ i++;
4188
+ continue;
4189
+ }
4190
+ if (ch !== "*") {
4191
+ re += /[.+^${}()|[\]\\]/.test(ch) ? "\\" + ch : ch;
4192
+ i++;
4193
+ continue;
4194
+ }
4195
+ if (pattern[i + 1] !== "*") {
4196
+ re += "[^/]*";
4197
+ i++;
4198
+ continue;
4199
+ }
4200
+ const hasLeadingSlash = re.endsWith("/");
4201
+ const hasTrailingSlash = pattern[i + 2] === "/";
4202
+ if (hasLeadingSlash) re = re.slice(0, -1);
4203
+ if (hasLeadingSlash || hasTrailingSlash) re += "(?:/?.*)?";
4204
+ else re += ".*";
4205
+ i += hasTrailingSlash ? 3 : 2;
4206
+ }
4207
+ return new RegExp(re + "$");
4208
+ }
4209
+ /**
4210
+ * Returns true if `changedPath` is covered by any of `relatedPaths`. An empty
4211
+ * `relatedPaths` returns false — callers handle the "unscoped spec" case
4212
+ * separately (treat the spec as always-affected) before calling this.
4213
+ */
4214
+ function isPathAffectedBy(changedPath, relatedPaths) {
4215
+ const stripped = stripLeadingDotSlash(changedPath);
4216
+ for (const pattern of relatedPaths) if (compileGlob(pattern).test(stripped)) return true;
4217
+ return false;
4218
+ }
4219
+ //#endregion
4220
+ //#region src/drift/route-new-files.ts
4221
+ /**
4222
+ * Lightweight Claude call: given a list of new files in the PR and the existing
4223
+ * specs (with their relatedPaths globs as a hint), return the spec keys (in
4224
+ * "<feature>/<spec>" form) that the new files plausibly affect.
4225
+ *
4226
+ * Conservative by design — false positives are safer than false negatives,
4227
+ * because a missed spec turns into undetected drift in CI. When the router
4228
+ * call itself fails, we log a warning rather than fail-close: the surrounding
4229
+ * glob match is the primary signal; the router only adds coverage for new
4230
+ * paths no glob captures.
4231
+ */
4232
+ async function routeNewFilesToSpecs(input) {
4233
+ const { newFiles, specs, cwd, model } = input;
4234
+ const empty = /* @__PURE__ */ new Set();
4235
+ if (newFiles.length === 0 || specs.length === 0) return empty;
4236
+ const { result, isError } = await invokeClaudeStreaming({
4237
+ prompt: buildRouterPrompt(await Promise.all(newFiles.map(async (path) => ({
4238
+ path,
4239
+ head: await readHead(join(cwd, path))
4240
+ }))), specs),
4241
+ systemPrompt: buildRouterSystemPrompt(),
4242
+ allowedTools: [
4243
+ "Read",
4244
+ "Grep",
4245
+ "Glob"
4246
+ ],
4247
+ silenceBashLog: true,
4248
+ cwd,
4249
+ ...model ? { model } : {}
4250
+ }, (_msg) => {});
4251
+ if (isError) {
4252
+ warn("new-file router: Claude returned an error; skipping router signal");
4253
+ return empty;
4254
+ }
4255
+ const json = extractJsonBlock(result);
4256
+ if (!json) {
4257
+ warn("new-file router: no JSON block in response; skipping router signal");
4258
+ return empty;
4259
+ }
4260
+ let parsed;
4261
+ try {
4262
+ parsed = JSON.parse(json);
4263
+ } catch (e) {
4264
+ warn(`new-file router: failed to parse JSON (${e.message}); skipping router signal`);
4265
+ return empty;
4266
+ }
4267
+ const out = /* @__PURE__ */ new Set();
4268
+ const validKeys = new Set(specs.map((s) => `${s.featureName}/${s.specName}`));
4269
+ if (typeof parsed === "object" && parsed !== null && "affectedSpecs" in parsed) {
4270
+ const arr = parsed.affectedSpecs;
4271
+ if (Array.isArray(arr)) {
4272
+ for (const item of arr) if (typeof item === "string" && validKeys.has(item)) out.add(item);
4273
+ }
4274
+ }
4275
+ return out;
4276
+ }
4277
+ async function readHead(absPath) {
4278
+ const content = await readFile(absPath, "utf-8").catch(() => "");
4279
+ if (!content) return "";
4280
+ return content.split("\n").slice(0, 40).join("\n");
4281
+ }
4282
+ function buildRouterSystemPrompt() {
4283
+ return `You triage which ccqa test specs are potentially affected by NEW source files added in a pull request.
4284
+
4285
+ You will receive:
4286
+ - A list of new files (path + first ~40 lines of each)
4287
+ - A list of existing specs with their declared relatedPaths globs
4288
+
4289
+ Your job: return the spec keys (in "<feature>/<spec>" form) whose behaviour might depend on any of the new files.
4290
+
4291
+ ## Rules
4292
+
4293
+ - Be **conservative**: when in doubt, include the spec. A spurious inclusion costs one extra drift check; a missed spec lets real drift slip through CI.
4294
+ - Use \`Read\`, \`Grep\`, \`Glob\` if you need to inspect the spec body or related code, but stay focused — this is a triage step, not a full review.
4295
+ - Ignore specs whose relatedPaths clearly point to a different area than every new file (e.g. \`src/auth/**\` specs vs new files only under \`src/billing/**\`).
4296
+ - Files like tests, generated code, build artifacts, vendor dirs typically do not affect any spec. Skip them.
4297
+
4298
+ ## Output (STRICT)
4299
+
4300
+ Output ONE fenced \`\`\`json block, nothing else:
4301
+
4302
+ \`\`\`json
4303
+ {
4304
+ "affectedSpecs": ["feature/spec", "feature/spec"]
4305
+ }
4306
+ \`\`\`
4307
+
4308
+ Use exactly the keys you saw in the input ("<feature>/<spec>"). Return an empty array if no spec is affected.
4309
+ `;
4310
+ }
4311
+ function buildRouterPrompt(previews, specs) {
4312
+ return `## New files
4313
+
4314
+ ${previews.map((p) => {
4315
+ const headBlock = p.head ? `\n\`\`\`\n${p.head}\n\`\`\`` : "\n(empty or unreadable)";
4316
+ return `### ${p.path}${headBlock}`;
4317
+ }).join("\n\n")}
4318
+
4319
+ ## Existing specs
4320
+
4321
+ ${specs.map((s) => {
4322
+ const paths = s.relatedPaths.length === 0 ? " (no relatedPaths declared)" : s.relatedPaths.map((p) => ` - ${p}`).join("\n");
4323
+ return `- ${s.featureName}/${s.specName}\n${paths}`;
4324
+ }).join("\n")}
4325
+
4326
+ ## Task
4327
+
4328
+ Return the spec keys that might be affected by any of the new files. Conservative inclusion is preferred over missing real drift.
4329
+ `;
4330
+ }
4331
+ //#endregion
4332
+ //#region src/cli/drift.ts
4333
+ const DEFAULT_CONCURRENCY = 3;
4334
+ const driftCommand = new Command("drift").argument("[feature/spec]", "Optional spec id. If omitted, every spec under .ccqa/features/ is checked.").description("Check whether each spec.yaml is still in sync with the current codebase (CI-friendly, no patches applied).").option("--format <fmt>", "Output format: text | json | github", "text").option("--severity <level>", "Exit non-zero on this severity or higher: warn | error", "error").option("--concurrency <n>", `Parallel spec checks (default: ${DEFAULT_CONCURRENCY})`).option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").option("--cwd <path>", "Working directory used as both the .ccqa root and the codebase Claude reads. Useful for monorepos. Defaults to process.cwd().").option("--changed", "Restrict drift checks to specs whose relatedPaths intersect the git diff against --base (or, in CI, $GITHUB_BASE_REF, else origin/main). New files are routed to specs via a single lightweight Claude call.").option("--base <ref>", "Base ref to diff against when --changed is set. Defaults to $GITHUB_BASE_REF (CI) or origin/main.").action(async (specPath, opts) => {
4335
+ const format = parseFormat(opts.format);
4336
+ const threshold = parseSeverity(opts.severity);
4337
+ const concurrency = parseConcurrency(opts.concurrency);
4338
+ const cwd = opts.cwd ? resolve(opts.cwd) : process.cwd();
4339
+ await ensureCcqaDir(cwd);
4340
+ if (opts.changed && specPath) {
4341
+ error("--changed and an explicit spec id cannot be combined; --changed only applies to a full sweep");
4342
+ process.exit(2);
4343
+ }
4344
+ let targets = await collectTargets(specPath, cwd);
4345
+ if (targets.length === 0) exitWithNoSpecs(format, "no test specs found under .ccqa/features/");
4346
+ if (format === "text") {
4347
+ header("drift", specPath ?? `${targets.length} spec${targets.length > 1 ? "s" : ""}`);
4348
+ if (opts.cwd) meta("cwd", cwd);
4349
+ }
4350
+ if (opts.changed) {
4351
+ const total = targets.length;
4352
+ targets = await filterByChanged({
4353
+ targets,
4354
+ cwd,
4355
+ baseOverride: opts.base,
4356
+ format,
4357
+ model: opts.model
4358
+ });
4359
+ if (format === "text") meta("scoped", `${targets.length} of ${total} spec${total > 1 ? "s" : ""}`);
4360
+ if (targets.length === 0) exitWithNoSpecs(format, "no specs intersect the changed file set; nothing to check");
4361
+ }
4362
+ const blocks = await loadAvailableBlocks(cwd);
4363
+ const results = await analyzeDrift({
4364
+ targets,
4365
+ cwd,
4366
+ blocks,
4367
+ concurrency,
4368
+ ...opts.model ? { model: opts.model } : {},
4369
+ onSpecStart: (t) => {
4370
+ if (format === "text") info(`checking ${t.featureName}/${t.specName}`);
4371
+ }
4372
+ });
4373
+ process.stdout.write(renderDrift(results, format, cwd));
4374
+ process.exit(determineExitCode(results, threshold));
4375
+ });
4376
+ function exitWithNoSpecs(format, message) {
4377
+ if (format === "json") process.stdout.write(`${JSON.stringify({ specs: [] }, null, 2)}\n`);
4378
+ else if (format === "text") info(message);
4379
+ process.exit(0);
4380
+ }
4381
+ async function filterByChanged(input) {
4382
+ const { targets, cwd, baseOverride, format, model } = input;
4383
+ const base = resolveBaseRef(baseOverride);
4384
+ let changed;
4385
+ try {
4386
+ changed = await getChangedFiles(base, cwd);
4387
+ } catch (e) {
4388
+ error(`failed to run 'git diff' against ${base}: ${e.message}`);
4389
+ process.exit(2);
4390
+ }
4391
+ if (format === "text") {
4392
+ meta("changed-base", base);
4393
+ meta("changed-files", changed.length);
4394
+ }
4395
+ if (changed.length === 0) return [];
4396
+ const newFiles = changed.filter((f) => f.status === "added");
4397
+ const existingChanges = changed.filter((f) => f.status !== "added");
4398
+ const affected = /* @__PURE__ */ new Set();
4399
+ const touchedBlockNames = /* @__PURE__ */ new Set();
4400
+ for (const f of changed) {
4401
+ const blockName = parseBlockPath(f.path);
4402
+ if (blockName) touchedBlockNames.add(blockName);
4403
+ }
4404
+ for (const t of targets) {
4405
+ if (!t.relatedPaths) {
4406
+ affected.add(specKey(t));
4407
+ continue;
4408
+ }
4409
+ if (existingChanges.some((f) => isPathAffectedBy(f.path, t.relatedPaths)) || newFiles.some((f) => isPathAffectedBy(f.path, t.relatedPaths))) {
4410
+ affected.add(specKey(t));
4411
+ continue;
4412
+ }
4413
+ if (t.includedBlocks?.some((name) => touchedBlockNames.has(name))) affected.add(specKey(t));
4414
+ }
4415
+ if (newFiles.length > 0) {
4416
+ if (format === "text") info(`routing ${newFiles.length} new file(s) to specs via Claude...`);
4417
+ const routed = await routeNewFilesToSpecs({
4418
+ newFiles: newFiles.map((f) => f.path),
4419
+ specs: targets.filter((t) => t.relatedPaths).map((t) => ({
4420
+ featureName: t.featureName,
4421
+ specName: t.specName,
4422
+ relatedPaths: t.relatedPaths
4423
+ })),
4424
+ cwd,
4425
+ model
4426
+ });
4427
+ for (const key of routed) affected.add(key);
4428
+ }
4429
+ return targets.filter((t) => affected.has(specKey(t)));
4430
+ }
4431
+ async function collectTargets(specPath, cwd) {
4432
+ const tree = await listFeatureTree(cwd);
4433
+ if (specPath) {
4434
+ const { featureName, specName } = parseSpecPath(specPath);
4435
+ const spec = tree.find((f) => f.featureName === featureName)?.specs.find((s) => s.specName === specName);
4436
+ if (!spec?.hasSpecFile) {
4437
+ error(`spec not found: ${featureName}/${specName} (under ${cwd})`);
4438
+ process.exit(1);
4439
+ }
4440
+ return [{
4441
+ featureName,
4442
+ specName,
4443
+ includedBlocks: spec.includedBlocks ?? []
4444
+ }];
4445
+ }
4446
+ const out = [];
4447
+ for (const feature of tree) for (const spec of feature.specs) {
4448
+ if (!spec.hasSpecFile) continue;
4449
+ const t = {
4450
+ featureName: feature.featureName,
4451
+ specName: spec.specName
4452
+ };
4453
+ if (spec.relatedPaths) t.relatedPaths = spec.relatedPaths;
4454
+ if (spec.includedBlocks) t.includedBlocks = spec.includedBlocks;
4455
+ out.push(t);
4456
+ }
4457
+ return out;
4458
+ }
4459
+ function parseFormat(raw) {
4460
+ const v = raw ?? "text";
4461
+ if (v === "text" || v === "json" || v === "github") return v;
4462
+ error(`invalid --format: ${v} (expected text|json|github)`);
4463
+ process.exit(2);
4464
+ }
4465
+ function parseSeverity(raw) {
4466
+ const v = raw ?? "error";
4467
+ if (v === "warn" || v === "error") return v;
4468
+ error(`invalid --severity: ${v} (expected warn|error)`);
4469
+ process.exit(2);
4470
+ }
4471
+ function parseConcurrency(raw) {
4472
+ if (raw === void 0) return DEFAULT_CONCURRENCY;
4473
+ const n = Number.parseInt(raw, 10);
4474
+ if (!Number.isFinite(n) || n < 1) {
4475
+ error(`invalid --concurrency: ${raw} (expected positive integer)`);
4476
+ process.exit(2);
4477
+ }
4478
+ return n;
4479
+ }
4480
+ //#endregion
3424
4481
  //#region src/cli/index.ts
3425
4482
  const packageJsonPath = resolvePackageJson();
3426
4483
  const { version } = JSON.parse(readFileSync(packageJsonPath, "utf8"));
@@ -3437,11 +4494,10 @@ function resolvePackageJson() {
3437
4494
  const program = new Command();
3438
4495
  program.name("ccqa").description("E2E test CLI using Claude Code + agent-browser").version(version);
3439
4496
  program.addCommand(draftCommand);
4497
+ program.addCommand(driftCommand);
3440
4498
  program.addCommand(traceCommand);
3441
4499
  program.addCommand(generateCommand);
3442
4500
  program.addCommand(runCommand);
3443
- program.addCommand(traceSetupCommand);
3444
- program.addCommand(generateSetupCommand);
3445
4501
  program.parse();
3446
4502
  //#endregion
3447
4503
  export {};