ccqa 0.3.10 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/bin/ccqa.mjs CHANGED
@@ -1,33 +1,41 @@
1
1
  #!/usr/bin/env node
2
+ import { n as spawnAB } from "../spawn-ab-BxjEhA5e.mjs";
2
3
  import { createRequire } from "node:module";
3
4
  import { Command } from "commander";
4
- import { accessSync, readFileSync, statSync } from "node:fs";
5
+ import { accessSync, existsSync, readFileSync, statSync } from "node:fs";
5
6
  import { fileURLToPath } from "node:url";
6
- import { access, mkdir, mkdtemp, readFile, readdir, rm, stat, unlink, writeFile } from "node:fs/promises";
7
- import { delimiter, dirname, join, relative, resolve } from "node:path";
7
+ import { access, mkdir, mkdtemp, readFile, readdir, rm, stat, writeFile } from "node:fs/promises";
8
8
  import { query } from "@anthropic-ai/claude-agent-sdk";
9
- import matter from "gray-matter";
9
+ import { delimiter, dirname, join, relative, resolve } from "node:path";
10
+ import { parse, stringify } from "yaml";
11
+ import { ZodError, z } from "zod";
10
12
  import { execFile, spawn } from "node:child_process";
11
13
  import { createInterface } from "node:readline";
12
- import { tmpdir } from "node:os";
14
+ import { homedir, tmpdir } from "node:os";
13
15
  import { createInterface as createInterface$1 } from "node:readline/promises";
14
- import { z } from "zod";
15
16
  import { promisify } from "node:util";
16
17
  //#region src/prompts/trace.ts
17
18
  function generateSessionName() {
18
19
  return `ccqa-trace-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-")}`;
19
20
  }
20
- function buildTraceSystemPrompt(spec, options) {
21
- return buildTraceSystemPromptInner(spec, options, true);
22
- }
23
- function buildTraceSystemPromptInner(spec, options, emitRelatedPaths) {
24
- const sessionName = options?.sessionName ?? generateSessionName();
25
- const skipCookiesClear = options?.skipCookiesClear ?? false;
26
- const stepsText = spec.steps.map((step) => `### ${step.id}: ${step.title}
21
+ /**
22
+ * Build the trace system prompt. `input.steps` is a flat list with includes
23
+ * already expanded (each step carries id / source / instruction / expected).
24
+ * The spec opens URLs via explicit step instructions (e.g.
25
+ * `instruction: "${APP_URL}/articles を開く"`).
26
+ *
27
+ * In v0.4 every spec is traced from scratch — block contents are inlined
28
+ * into the spec's own step list at expand time, so the prompt has no
29
+ * special "this is a block" mode. The `source` tag on each step still
30
+ * distinguishes spec-native steps from inlined block steps for the
31
+ * `// step:` comments in the eventual codegen output.
32
+ */
33
+ function buildTraceSystemPrompt(input) {
34
+ const sessionName = input.sessionName ?? generateSessionName();
35
+ const stepsText = input.steps.map((step) => `### ${step.id} [${step.source}]
27
36
  - **Instruction**: ${step.instruction}
28
37
  - **Expected**: ${step.expected}`).join("\n\n");
29
- const prereqText = spec.prerequisites ? `## Prerequisites\n${spec.prerequisites}\n\n` : "";
30
- const relatedPathsBlock = emitRelatedPaths ? buildRelatedPathsInstruction() : "";
38
+ const relatedPathsBlock = buildRelatedPathsInstruction();
31
39
  return `You are an expert QA engineer executing a browser E2E test. Execute each step precisely and record every browser action as a structured log line.
32
40
 
33
41
  ## Session
@@ -48,7 +56,8 @@ agent-browser --session SESSION uncheck "<selector>"
48
56
  agent-browser --session SESSION press <Key>
49
57
  agent-browser --session SESSION select "<selector>" "<value>"
50
58
  agent-browser --session SESSION hover "<selector>"
51
- agent-browser --session SESSION wait --text "<text>"
59
+ agent-browser --session SESSION wait --text "<text>" [--timeout <ms>]
60
+ agent-browser --session SESSION wait "<selector>" [--timeout <ms>] [--state visible|hidden]
52
61
  agent-browser --session SESSION cookies clear
53
62
  \`\`\`
54
63
 
@@ -83,17 +92,18 @@ agent-browser --session SESSION cookies clear
83
92
 
84
93
  ## Test Specification
85
94
 
86
- Title: ${spec.title}
87
- Base URL: ${spec.baseUrl}
95
+ Title: ${input.title}
88
96
 
89
- ${prereqText}## Steps
97
+ Each step's instruction names the URL to open directly (or via \`\${ENV_VAR}\`). Open exactly the URL the step says to open.
98
+
99
+ ## Steps
90
100
 
91
101
  ${stepsText}
92
102
 
93
103
  ## Execution Workflow
94
104
 
95
105
  For each step:
96
- 1. Emit \`STEP_START|<step-id>|<step-title>\`
106
+ 1. Emit \`STEP_START|<step-id>|<short description of what this step does>\`
97
107
  2. Run \`snapshot\` and identify selectors from the ARIA tree
98
108
  3. Execute the action using an ALLOWED selector
99
109
  4. Emit \`AB_ACTION|...\` for every browser action (see below)
@@ -180,6 +190,15 @@ AB_ACTION|assert|<assertType>|<selector or "">|<value or "">|<observation>
180
190
 
181
191
  The selector in AB_ACTION must be one of the ALLOWED formats above.
182
192
 
193
+ **CRITICAL — record only successful actions.** The AB_ACTION stream is the
194
+ canonical replay sequence: every line in it must be reproducible on a fresh
195
+ browser session. Therefore:
196
+
197
+ - If you tried a selector and \`agent-browser\` returned a non-zero exit (selector not found, element not interactable, timeout): **do NOT emit \`AB_ACTION|...\`** for that attempt. Take a fresh snapshot, switch selector, and only emit the AB_ACTION for the call that finally succeeded.
198
+ - If you explored multiple selectors for the same logical action (e.g. tried \`[aria-label='Email']\`, it failed, then \`[placeholder='Email']\` worked): emit AB_ACTION for the **working selector only**. The failed attempt must not appear in the trace.
199
+ - The same rule applies to \`AB_ACTION|assert|...\` lines: only emit them for assertions you actually verified on the current page in the current snapshot. Never declare an assertion against a selector you have not just confirmed visible — even if you intended to use it earlier.
200
+ - If a step ultimately fails after retries: emit \`ASSERTION_FAILED\` and STOP. Do NOT leave half-recorded actions for the failed step in the AB_ACTION stream.
201
+
183
202
  ## Assertion Protocol
184
203
 
185
204
  After verifying each step, emit \`AB_ACTION|assert\` lines for each signal you confirmed.
@@ -211,9 +230,36 @@ After verifying each step, emit \`AB_ACTION|assert\` lines for each signal you c
211
230
  **Selector rules for assert actions — CRITICAL:**
212
231
  - Use the **same ALLOWED formats** as browser actions — never invent aria-label values
213
232
  - Only use \`[aria-label='...']\` if that **exact** aria-label string appears in the current ARIA snapshot output
214
- - When unsure, prefer \`text_visible\`/\`text_not_visible\` (no selector needed) over guessing a selector
233
+ - When unsure, prefer \`text_visible\`/\`text_not_visible\` (no selector needed) over guessing a selector — but still pre-verify with \`wait --text\` per the MUST-VERIFY rule below; \`alt\`-attribute "text" will not match.
215
234
  - For \`element_disabled\`/\`element_enabled\`: use a CSS class selector if no aria-label is confirmed in the snapshot
216
235
 
236
+ **MUST-VERIFY rule — STRICT (applies to every assert except \`url_contains\`):**
237
+
238
+ The \`snapshot\` output is the **accessibility tree**: a semantic view. \`agent-browser\` queries the **real DOM**. They DO NOT always match. Two known traps:
239
+
240
+ 1. *Selector trap*: a snapshot row like \`textbox "Email address"\` is reachable via \`[placeholder='...']\` but **NOT** via \`[aria-label='...']\` if no \`aria-label\` attribute is actually set — the browser inferred the label from \`<label for=>\` / surrounding text / \`placeholder\`.
241
+ 2. *Text trap*: a snapshot row like \`link "Dashboard"\` may come from \`<a><img alt="Dashboard"></a>\` — the visible "text" is an \`alt\` attribute, not a text node. \`text_visible\` (which scans visible text nodes via \`wait --text\`) will NOT find it.
242
+
243
+ Before emitting an \`AB_ACTION|assert|...\` line, **verify the assertion form actually resolves on the live page**:
244
+
245
+ \`\`\`bash
246
+ # element_visible / element_enabled / element_disabled / element_checked / element_unchecked
247
+ agent-browser --session SESSION wait "<selector>" --timeout 3000
248
+
249
+ # element_not_visible
250
+ agent-browser --session SESSION wait "<selector>" --state hidden --timeout 3000
251
+
252
+ # text_visible
253
+ agent-browser --session SESSION wait --text "<text>" --timeout 3000
254
+
255
+ # text_not_visible
256
+ agent-browser --session SESSION wait --text "<text>" --state hidden --timeout 3000
257
+ \`\`\`
258
+
259
+ Apply the "record only successful actions" rule from the AB_ACTION section above. **Additionally**, when *no* form verifies — e.g. you tried \`[aria-label='X']\`, \`[placeholder='X']\`, and \`text=X\` and they all timed out, or the "text" turned out to be an \`alt\` / aria-label — **DROP the assertion entirely**. Fewer, real assertions beat invented ones that fail at replay. Prefer swapping a failed \`text_visible\` for an \`element_visible\` against the link/button selector when the visible label came from \`alt\` / aria-label.
260
+
261
+ \`url_contains\` is exempt — it checks the current URL string, not the DOM/accessibility tree.
262
+
217
263
  **Examples:**
218
264
  \`\`\`
219
265
  AB_ACTION|assert|url_contains|||/dashboard|Navigated to dashboard
@@ -229,7 +275,7 @@ AB_ACTION|assert|text_visible|||Success|Confirmation message appeared
229
275
  Emit exactly one status line per step (outside any code block):
230
276
 
231
277
  \`\`\`
232
- STEP_START|<step-id>|<step-title>
278
+ STEP_START|<step-id>|<short description of what this step does>
233
279
  STEP_DONE|<step-id>|<what was verified>
234
280
  ASSERTION_FAILED|<step-id>|<category: app-bug|env-issue|auth-blocked|missing-test-data|selector-drift|agent-misread>: <reason>
235
281
  STEP_SKIPPED|<step-id>|<reason>
@@ -242,37 +288,29 @@ RUN_COMPLETED|failed|<summary>
242
288
  After each step (outside any code block):
243
289
 
244
290
  \`\`\`
245
- ROUTE_STEP|<step-id>|<step-title>|ACTION:<what you did>|OBSERVATION:<what you verified>|STATUS:<PASSED|FAILED|SKIPPED>
291
+ ROUTE_STEP|<step-id>|<short description>|ACTION:<what you did>|OBSERVATION:<what you verified>|STATUS:<PASSED|FAILED|SKIPPED>
246
292
  \`\`\`
247
293
 
248
294
  ${relatedPathsBlock}## Start
249
295
 
250
- ${skipCookiesClear ? `A setup procedure has already been executed in this session. Do NOT clear cookies — keep the existing session state.
296
+ Begin by clearing cookies, then proceed straight to the first step's instruction.
251
297
 
252
298
  \`\`\`bash
253
- agent-browser --session ${sessionName} open ${spec.baseUrl}
254
- \`\`\`
255
-
256
- Emit:
257
- \`\`\`
258
- AB_ACTION|open|${spec.baseUrl}
259
- \`\`\`` : `\`\`\`bash
260
299
  agent-browser --session ${sessionName} cookies clear
261
- agent-browser --session ${sessionName} open ${spec.baseUrl}
262
300
  \`\`\`
263
301
 
264
302
  Emit:
265
303
  \`\`\`
266
304
  AB_ACTION|cookies_clear
267
- AB_ACTION|open|${spec.baseUrl}
268
- \`\`\``}
305
+ \`\`\`
269
306
 
270
- Then emit \`STEP_START|step-01|...\` and begin.`;
307
+ Then emit \`STEP_START|step-01|...\` and execute the first step. The first step is responsible for opening the initial URL.
308
+ `;
271
309
  }
272
310
  function buildRelatedPathsInstruction() {
273
311
  return `## Post-run: emit \`relatedPaths\` block
274
312
 
275
- After all steps are complete (regardless of pass/fail) and **before** \`RUN_COMPLETED\`, you MUST emit a single \`RELATED_PATHS\` block. The host (not you) writes these paths into the spec's frontmatter — your only job is to emit the block.
313
+ After all steps are complete (regardless of pass/fail) and **before** \`RUN_COMPLETED\`, you MUST emit a single \`RELATED_PATHS\` block. The host (not you) writes these paths into the spec — your only job is to emit the block.
276
314
 
277
315
  \`relatedPaths\` is a list of glob patterns identifying the source files this spec depends on. CI uses them to decide whether a code change should trigger a drift check for this spec.
278
316
 
@@ -303,18 +341,8 @@ Emit the block outside any other code block, on its own lines. If the test could
303
341
 
304
342
  `;
305
343
  }
306
- function buildTracePrompt(spec) {
307
- return `Execute the test for "${spec.title}" at ${spec.baseUrl}.`;
308
- }
309
- function buildSetupTraceSystemPrompt(spec) {
310
- return buildTraceSystemPromptInner({
311
- title: spec.title,
312
- baseUrl: "about:blank",
313
- steps: spec.steps
314
- }, void 0, false);
315
- }
316
- function buildSetupTracePrompt(spec) {
317
- return `Execute the setup procedure "${spec.title}". Follow each step precisely.`;
344
+ function buildTracePrompt(title) {
345
+ return `Execute the test for "${title}". Each step's instruction includes the URL or selector context it needs.`;
318
346
  }
319
347
  //#endregion
320
348
  //#region src/cli/logger.ts
@@ -390,6 +418,11 @@ async function invokeClaudeStreaming(options, onEvent) {
390
418
  const { prompt, systemPrompt, allowedTools, disableBuiltinTools = false, maxTurns, env, model, cwd, onAbAction, onAbActionFailed, silenceBashLog = false } = options;
391
419
  const resolvedModel = resolveModel(model);
392
420
  let lastAbToolUseId = null;
421
+ const claimAbToolUse = (toolUseId) => {
422
+ if (toolUseId !== lastAbToolUseId) return false;
423
+ lastAbToolUseId = null;
424
+ return true;
425
+ };
393
426
  const sdkOptions = {
394
427
  systemPrompt,
395
428
  maxTurns,
@@ -424,13 +457,17 @@ async function invokeClaudeStreaming(options, onEvent) {
424
457
  } else lastAbToolUseId = null;
425
458
  return {};
426
459
  }] }],
460
+ PostToolUse: [{ hooks: [async (input) => {
461
+ if (input.hook_event_name !== "PostToolUse") return {};
462
+ if (input.tool_name !== "Bash") return {};
463
+ if (!isBashToolResponseError(input.tool_response)) return {};
464
+ if (claimAbToolUse(input.tool_use_id) && onAbActionFailed) onAbActionFailed();
465
+ return {};
466
+ }] }],
427
467
  PostToolUseFailure: [{ hooks: [async (input) => {
428
468
  if (input.hook_event_name !== "PostToolUseFailure") return {};
429
469
  if (input.tool_name !== "Bash") return {};
430
- if (input.tool_use_id === lastAbToolUseId && onAbActionFailed) {
431
- onAbActionFailed();
432
- lastAbToolUseId = null;
433
- }
470
+ if (claimAbToolUse(input.tool_use_id) && onAbActionFailed) onAbActionFailed();
434
471
  return {};
435
472
  }] }]
436
473
  } : void 0
@@ -500,6 +537,26 @@ function isBlockedAbSubcommand(cmd) {
500
537
  const sub = extractAbSubcommand(cmd);
501
538
  return sub !== null && BLOCKED_AB_SUBCOMMANDS.has(sub);
502
539
  }
540
+ /**
541
+ * Detects "the Bash tool returned an error" from a SDK PostToolUse hook's
542
+ * `tool_response`. The SDK can shape this two ways depending on how Claude
543
+ * Code reports Bash failures:
544
+ *
545
+ * - `{ is_error: true, ... }` — the canonical Bash failure shape
546
+ * - `{ output, exitCode, killed?, ... }` — the BashOutput shape; treat
547
+ * non-zero exit / kill as error
548
+ *
549
+ * We accept either. Anything else (including missing fields) is treated as a
550
+ * successful response so we never roll back over an unrelated tool call.
551
+ */
552
+ function isBashToolResponseError(tool_response) {
553
+ if (tool_response === null || typeof tool_response !== "object") return false;
554
+ const r = tool_response;
555
+ if (r["is_error"] === true) return true;
556
+ if (typeof r["exitCode"] === "number" && r["exitCode"] !== 0) return true;
557
+ if (r["killed"] === true) return true;
558
+ return false;
559
+ }
503
560
  /** Returns true if any argument to an agent-browser command uses a @ref selector (e.g. @e14). */
504
561
  function hasRefSelector(cmd) {
505
562
  const abIdx = cmd.indexOf("agent-browser");
@@ -561,87 +618,255 @@ async function* replayMockMessages(path) {
561
618
  }
562
619
  }
563
620
  //#endregion
621
+ //#region src/runtime/env-vars.ts
622
+ const ENV_VAR_RE = /\$\{([A-Z_][A-Z0-9_]*)\}|\$([A-Z_][A-Z0-9_]*)/g;
623
+ const ANY_VAR_RE = /\$\{([A-Za-z_][A-Za-z0-9_]*)\}|\$([A-Za-z_][A-Za-z0-9_]*)/g;
624
+ /**
625
+ * Replace every `$NAME` / `${NAME}` reference in `value` using `lookup`. When
626
+ * `lookup` returns `undefined`, the original reference text is preserved
627
+ * (callers that want empty-string substitution should wrap with `?? ""`).
628
+ */
629
+ function substituteVars(value, lookup) {
630
+ ANY_VAR_RE.lastIndex = 0;
631
+ return value.replace(ANY_VAR_RE, (match, braced, plain) => {
632
+ const replacement = lookup(braced ?? plain ?? "");
633
+ return replacement === void 0 ? match : replacement;
634
+ });
635
+ }
636
+ /**
637
+ * Resolve every `$VAR` / `${VAR}` reference against the current process env.
638
+ *
639
+ * Missing variables expand to the empty string, mirroring `sh` behaviour.
640
+ * Throwing would force ccqa to be invoked with every var set even for
641
+ * unused blocks, which is more user-hostile than letting the test fail
642
+ * downstream with a clearer message ("login form rejected: empty password").
643
+ */
644
+ function resolveEnvRefs(value) {
645
+ return value.replace(ENV_VAR_RE, (_, braced, plain) => {
646
+ const name = braced ?? plain ?? "";
647
+ return process.env[name] ?? "";
648
+ });
649
+ }
650
+ /**
651
+ * Embed `$VAR` / `${VAR}` as a JS template-literal expression that reads
652
+ * `process.env.VAR ?? ""` at runtime. Used by `ccqa generate` so the test
653
+ * script never bakes in the secret value.
654
+ *
655
+ * Returns a JavaScript string-literal expression (template literal when env
656
+ * refs are present, plain string literal otherwise).
657
+ *
658
+ * Examples:
659
+ * "${PASSWORD}" -> '`${process.env.PASSWORD ?? ""}`'
660
+ * "user-${SUFFIX}@x.com" -> '`user-${process.env.SUFFIX ?? ""}@x.com`'
661
+ * "literal value" -> '"literal value"'
662
+ */
663
+ function envRefsToJsExpression(value) {
664
+ return refsToJsExpression(value, () => null);
665
+ }
666
+ /**
667
+ * Generalised version of `envRefsToJsExpression`. Each `$NAME` / `${NAME}`
668
+ * reference in `value` is passed to `nameToExpr(name)` first:
669
+ *
670
+ * - If it returns a string, that string is interpolated as a JS expression
671
+ * (no quoting / no `?? ""` wrap — the caller decides the shape).
672
+ * - If it returns `null`, the reference is treated as a missing env var
673
+ * and expands to `process.env.<NAME> ?? ""` (the legacy behaviour).
674
+ *
675
+ * Used by the block codegen path: param names map to `params.<name>`,
676
+ * everything else falls through to `process.env.X ?? ""`.
677
+ */
678
+ function refsToJsExpression(value, nameToExpr) {
679
+ ANY_VAR_RE.lastIndex = 0;
680
+ if (!ANY_VAR_RE.test(value)) return JSON.stringify(value);
681
+ const escaped = value.replace(/\\/g, "\\\\").replace(/`/g, "\\`").replace(/\$\{/g, (_match, offset, source) => {
682
+ const probe = new RegExp(ANY_VAR_RE.source, "g");
683
+ let m;
684
+ while ((m = probe.exec(source)) !== null) if (m.index === offset) return "${";
685
+ return "\\${";
686
+ });
687
+ ANY_VAR_RE.lastIndex = 0;
688
+ return `\`${escaped.replace(ANY_VAR_RE, (_match, braced, plain) => {
689
+ const name = braced ?? plain ?? "";
690
+ const expr = nameToExpr(name);
691
+ return expr !== null ? `\${${expr}}` : `\${process.env.${name} ?? ""}`;
692
+ })}\``;
693
+ }
694
+ //#endregion
695
+ //#region src/spec/yaml-schema.ts
696
+ /**
697
+ * An action step: one user-facing browser interaction. `instruction` and
698
+ * `expected` are the natural-language description handed to Claude during
699
+ * `ccqa trace`. URLs live inside `instruction`, either verbatim or via
700
+ * `${ENV_VAR}` references (resolved at runtime).
701
+ */
702
+ const ActionStepSchema = z.object({
703
+ instruction: z.string().min(1),
704
+ expected: z.string().min(1)
705
+ }).strict();
706
+ /**
707
+ * An include step: invokes a reusable block (`.ccqa/blocks/<name>/spec.yaml`).
708
+ * `params` values are plain strings; env refs (`${VAR}`) inside them are
709
+ * resolved at expand time the same way step instructions are.
710
+ */
711
+ const IncludeStepSchema = z.object({
712
+ include: z.string().min(1),
713
+ params: z.record(z.string(), z.string()).optional()
714
+ }).strict();
715
+ /**
716
+ * A spec step is either an action step or an include step. The two are
717
+ * discriminated by the presence of the `include` key — see `isIncludeStep`.
718
+ */
719
+ const StepSchema = z.union([ActionStepSchema, IncludeStepSchema]);
720
+ /** Top-level spec schema. `.strict()` rejects any unknown key. */
721
+ const TestSpecSchema = z.object({
722
+ title: z.string().min(1),
723
+ relatedPaths: z.array(z.string().min(1)).optional(),
724
+ steps: z.array(StepSchema).min(1)
725
+ }).strict();
726
+ /**
727
+ * A block param declaration. `required` defaults to true; only explicit
728
+ * `required: false` makes it optional. `secret: true` flags the value as
729
+ * sensitive — codegen renders such values as `process.env.<NAME> ?? ""`
730
+ * template literals so the secret never ends up baked into test.spec.ts.
731
+ * `dummy` is a placeholder value surfaced by the draft / drift prompts
732
+ * (which see the block in isolation, before any include site exists);
733
+ * `description` is the param's semantic role, also consumed by those
734
+ * prompts and by spec authors browsing the block.
735
+ */
736
+ const BlockParamSchema = z.object({
737
+ name: z.string().min(1),
738
+ required: z.boolean().optional(),
739
+ secret: z.boolean().optional(),
740
+ dummy: z.string().optional(),
741
+ description: z.string().optional()
742
+ }).strict();
743
+ /**
744
+ * Block schema. Block steps are restricted to ActionStep — nested blocks are
745
+ * forbidden. Including a block from inside another block fails parsing here
746
+ * (the store layer maps the cryptic "Unrecognized key: 'include'" error into
747
+ * a targeted nested-block message).
748
+ */
749
+ const BlockSpecSchema = z.object({
750
+ title: z.string().min(1),
751
+ params: z.array(BlockParamSchema).optional(),
752
+ steps: z.array(ActionStepSchema).min(1)
753
+ }).strict();
754
+ /** Runtime predicate for the StepSchema union. */
755
+ function isIncludeStep(step) {
756
+ return "include" in step;
757
+ }
758
+ /** Returns true if a block param is required (default: true). */
759
+ function isParamRequired(param) {
760
+ return param.required !== false;
761
+ }
762
+ //#endregion
564
763
  //#region src/spec/parser.ts
565
- function parseTestSpec(content) {
566
- const { data, content: body } = matter(content);
567
- const steps = parseSteps(body);
568
- const prerequisites = parsePrerequisites(body);
569
- return {
570
- title: String(data["title"] ?? "Untitled"),
571
- baseUrl: String(data["baseUrl"] ?? "http://localhost:3000"),
572
- prerequisites: prerequisites || void 0,
573
- setups: parseSetupRefs(data["setups"]),
574
- relatedPaths: parseRelatedPaths(data["relatedPaths"]),
575
- steps
576
- };
764
+ /** Parse a spec.yaml. Schema rejections are rewritten with actionable messages. */
765
+ function parseTestSpec(content, source = "spec.yaml") {
766
+ const raw = parseYamlOrThrow(content, source);
767
+ try {
768
+ return TestSpecSchema.parse(raw);
769
+ } catch (e) {
770
+ throw enrichZodError(e, source, false);
771
+ }
577
772
  }
578
- function parseRelatedPaths(raw) {
579
- if (!Array.isArray(raw)) return void 0;
580
- const paths = [];
581
- for (const item of raw) if (typeof item === "string" && item.trim().length > 0) paths.push(item.trim());
582
- return paths.length > 0 ? paths : void 0;
773
+ /**
774
+ * Parse a block's spec.yaml. Block-specific errors include the targeted
775
+ * nested-block message (the underlying zod failure on an `include` key
776
+ * inside a block step is hard to read).
777
+ */
778
+ function parseBlockSpec(content, source = "block spec.yaml") {
779
+ const raw = parseYamlOrThrow(content, source);
780
+ try {
781
+ return BlockSpecSchema.parse(raw);
782
+ } catch (e) {
783
+ throw enrichZodError(e, source, true);
784
+ }
583
785
  }
584
- function parseSetupSpec(content) {
585
- const { data, content: body } = matter(content);
586
- const steps = parseSteps(body);
587
- const placeholders = parsePlaceholders(data["placeholders"]);
588
- return {
589
- title: String(data["title"] ?? "Untitled"),
590
- placeholders: Object.keys(placeholders).length > 0 ? placeholders : void 0,
591
- steps
592
- };
786
+ function parseYamlOrThrow(content, source) {
787
+ try {
788
+ return parse(content);
789
+ } catch (e) {
790
+ throw new Error(`Failed to parse YAML (${source}): ${e.message}`);
791
+ }
593
792
  }
594
- function parsePlaceholders(raw) {
595
- if (!raw || typeof raw !== "object") return {};
596
- const result = {};
597
- for (const [key, val] of Object.entries(raw)) if (val && typeof val === "object" && "dummy" in val) {
598
- const v = val;
599
- result[key] = {
600
- dummy: String(v["dummy"]),
601
- description: v["description"] ? String(v["description"]) : void 0
602
- };
793
+ function enrichZodError(error, source, isBlock) {
794
+ if (!(error instanceof ZodError)) return error;
795
+ const lines = [`Invalid ${source}:`];
796
+ for (const issue of error.issues) {
797
+ const path = issue.path.join(".") || "(root)";
798
+ const message = humanizeIssue(issue, isBlock);
799
+ lines.push(` - ${path}: ${message}`);
603
800
  }
604
- return result;
605
- }
606
- function parseSetupRefs(raw) {
607
- if (!Array.isArray(raw)) return void 0;
608
- const refs = [];
609
- for (const item of raw) if (typeof item === "object" && item !== null && "name" in item) {
610
- const i = item;
611
- refs.push({
612
- name: String(i["name"]),
613
- params: i["params"] && typeof i["params"] === "object" ? Object.fromEntries(Object.entries(i["params"]).map(([k, v]) => [k, String(v)])) : void 0
614
- });
801
+ return new Error(lines.join("\n"));
802
+ }
803
+ function humanizeIssue(issue, isBlock) {
804
+ if (issue.code === "unrecognized_keys") {
805
+ const keys = Array.isArray(issue.keys) ? issue.keys : [];
806
+ if (isBlock && keys.includes("include")) return `Nested blocks are not supported flatten by inlining the included block's steps into this block.`;
807
+ return `Unknown keys: ${keys.join(", ")}`;
615
808
  }
616
- return refs.length > 0 ? refs : void 0;
617
- }
618
- function parsePrerequisites(body) {
619
- const match = body.match(/##\s+Prerequisites\s+([\s\S]*?)(?=##|$)/);
620
- if (!match || !match[1]) return null;
621
- return match[1].trim();
622
- }
623
- function parseSteps(body) {
624
- const stepBlocks = body.split(/###\s+Step\s+\d+:/);
625
- const steps = [];
626
- for (let i = 1; i < stepBlocks.length; i++) {
627
- const block = stepBlocks[i];
628
- if (!block) continue;
629
- const titleMatch = block.match(/^(.+)/);
630
- const instructionMatch = block.match(/\*\*Instruction\*\*:\s*(.+)/);
631
- const expectedMatch = block.match(/\*\*Expected\*\*:\s*(.+)/);
632
- if (!titleMatch || !instructionMatch || !expectedMatch) continue;
633
- steps.push({
634
- id: `step-${String(i).padStart(2, "0")}`,
635
- title: titleMatch[1]?.trim() ?? "",
636
- instruction: instructionMatch[1]?.trim() ?? "",
637
- expected: expectedMatch[1]?.trim() ?? ""
809
+ return issue.message;
810
+ }
811
+ //#endregion
812
+ //#region src/spec/expand.ts
813
+ /**
814
+ * Walk the spec's top-level steps, inlining any `- include: <block>` reference
815
+ * as the block's own steps in order. The result is a flat `step-NN`-numbered
816
+ * sequence — block boundaries survive only as the `source` tag, so trace and
817
+ * codegen never need a separate block code path.
818
+ */
819
+ function expandSpec(spec, options) {
820
+ const out = [];
821
+ let counter = 0;
822
+ const allocId = () => {
823
+ counter += 1;
824
+ return `step-${String(counter).padStart(2, "0")}`;
825
+ };
826
+ for (const step of spec.steps) if (isIncludeStep(step)) {
827
+ const block = resolveBlock(step.include, step.params ?? {}, options.blocks);
828
+ for (const blockStep of block.steps) out.push({
829
+ id: allocId(),
830
+ source: step.include,
831
+ instruction: substituteVars(blockStep.instruction, block.lookup),
832
+ expected: substituteVars(blockStep.expected, block.lookup)
638
833
  });
639
- }
640
- return steps;
834
+ } else out.push({
835
+ id: allocId(),
836
+ source: "spec",
837
+ instruction: step.instruction,
838
+ expected: step.expected
839
+ });
840
+ return out;
841
+ }
842
+ function resolveBlock(blockName, rawParams, blocks) {
843
+ const block = blocks.get(blockName);
844
+ if (!block) throw new Error(`Unknown block: "${blockName}". Define it under .ccqa/blocks/${blockName}/spec.yaml.`);
845
+ const declaredParams = new Map((block.params ?? []).map((p) => [p.name, p]));
846
+ for (const key of Object.keys(rawParams)) if (!declaredParams.has(key)) throw new Error(`Block "${blockName}" received unknown param "${key}". Declared params: ${[...declaredParams.keys()].join(", ") || "(none)"}.`);
847
+ for (const [pname, def] of declaredParams) if (isParamRequired(def) && !(pname in rawParams)) throw new Error(`Block "${blockName}" is missing required param "${pname}".`);
848
+ const lookup = (name) => {
849
+ if (Object.prototype.hasOwnProperty.call(rawParams, name)) return rawParams[name];
850
+ };
851
+ return {
852
+ steps: block.steps,
853
+ lookup
854
+ };
855
+ }
856
+ /**
857
+ * Collect every block name referenced by a spec (top-level only — blocks
858
+ * cannot nest). Used by the store / drift layers to know which blocks to
859
+ * load or invalidate.
860
+ */
861
+ function collectIncludedBlockNames(spec) {
862
+ const names = /* @__PURE__ */ new Set();
863
+ for (const step of spec.steps) if (isIncludeStep(step)) names.add(step.include);
864
+ return [...names];
641
865
  }
642
866
  //#endregion
643
867
  //#region src/store/index.ts
644
868
  const CCQA_DIR = ".ccqa";
869
+ const SPEC_FILE = "spec.yaml";
645
870
  function getCcqaDir(cwd = process.cwd()) {
646
871
  return join(cwd, CCQA_DIR);
647
872
  }
@@ -669,39 +894,44 @@ function getSpecDir(featureName, specName, cwd) {
669
894
  }
670
895
  async function ensureCcqaDir(cwd) {
671
896
  await mkdir(join(getCcqaDir(cwd), "features"), { recursive: true });
897
+ await mkdir(join(getCcqaDir(cwd), "blocks"), { recursive: true });
672
898
  }
673
899
  async function readSpecFile(featureName, specName, cwd) {
674
- const specPath = join(getSpecDir(featureName, specName, cwd), "test-spec.md");
900
+ const specPath = join(getSpecDir(featureName, specName, cwd), SPEC_FILE);
675
901
  return readFile(specPath, "utf-8").catch(() => {
676
902
  throw new Error(`Spec file not found: ${specPath}`);
677
903
  });
678
904
  }
679
905
  async function tryReadSpecFile(featureName, specName, cwd) {
680
- return readFile(join(getSpecDir(featureName, specName, cwd), "test-spec.md"), "utf-8").catch(() => null);
906
+ return readFile(join(getSpecDir(featureName, specName, cwd), SPEC_FILE), "utf-8").catch(() => null);
681
907
  }
682
908
  async function saveSpecFile(featureName, specName, content, cwd) {
683
909
  const specDir = getSpecDir(featureName, specName, cwd);
684
910
  await mkdir(specDir, { recursive: true });
685
- const specPath = join(specDir, "test-spec.md");
911
+ const specPath = join(specDir, SPEC_FILE);
686
912
  await writeFile(specPath, content.endsWith("\n") ? content : content + "\n", "utf-8");
687
913
  return specPath;
688
914
  }
689
915
  /**
690
- * Replace (or insert) the `relatedPaths` key in the spec's YAML frontmatter.
691
- * Preserves every other frontmatter key and the entire body. Returns the
692
- * absolute path that was written, or null if the spec file does not exist.
916
+ * Replace (or insert) the `relatedPaths` key in the spec. Preserves every
917
+ * other top-level field and the entire steps array. Returns the absolute
918
+ * path that was written, or null if the spec file does not exist.
693
919
  */
694
920
  async function updateSpecRelatedPaths(featureName, specName, relatedPaths, cwd) {
695
- const specPath = join(getSpecDir(featureName, specName, cwd), "test-spec.md");
921
+ const specPath = join(getSpecDir(featureName, specName, cwd), SPEC_FILE);
696
922
  const existing = await readFile(specPath, "utf-8").catch(() => null);
697
923
  if (existing === null) return null;
698
- const parsed = matter(existing);
699
- const data = { ...parsed.data };
700
- if (relatedPaths.length > 0) data["relatedPaths"] = relatedPaths;
701
- else delete data["relatedPaths"];
702
- await writeFile(specPath, matter.stringify(parsed.content, data), "utf-8");
924
+ await writeFile(specPath, stringify(stripUndefined({
925
+ ...parseTestSpec(existing, specPath),
926
+ relatedPaths: relatedPaths.length > 0 ? relatedPaths : void 0
927
+ }), { lineWidth: 0 }), "utf-8");
703
928
  return specPath;
704
929
  }
930
+ function stripUndefined(obj) {
931
+ const out = {};
932
+ for (const [k, v] of Object.entries(obj)) if (v !== void 0) out[k] = v;
933
+ return out;
934
+ }
705
935
  async function saveRoute(featureName, specName, route, cwd) {
706
936
  const specDir = getSpecDir(featureName, specName, cwd);
707
937
  await mkdir(specDir, { recursive: true });
@@ -716,38 +946,72 @@ async function saveTraceActions(featureName, specName, actions, cwd) {
716
946
  await writeFile(actionsPath, JSON.stringify(actions, null, 2), "utf-8");
717
947
  return actionsPath;
718
948
  }
719
- function getSetupDir(name, cwd) {
720
- return join(getCcqaDir(cwd), "setups", name);
949
+ function getBlocksDir(cwd) {
950
+ return join(getCcqaDir(cwd), "blocks");
721
951
  }
722
- async function readSetupSpecFile(name, cwd) {
723
- const specPath = join(getSetupDir(name, cwd), "setup-spec.md");
724
- return readFile(specPath, "utf-8").catch(() => {
725
- throw new Error(`Setup spec not found: ${specPath}`);
726
- });
952
+ /**
953
+ * Inverse of `getBlockDir`. Given a file path that appears in a git diff,
954
+ * return the block name if the path points at the block's spec.yaml, else
955
+ * null. Used by `drift --changed` to invalidate specs whose included blocks
956
+ * were edited. (v0.4 inlines blocks into every spec's own trace, so the
957
+ * block directory holds only spec.yaml — no per-block actions.json / route
958
+ * lives here anymore.)
959
+ */
960
+ function parseBlockPath(path) {
961
+ return path.match(/(?:^|\/)\.ccqa\/blocks\/([^/]+)\/spec\.yaml$/)?.[1] ?? null;
727
962
  }
728
- async function saveSetupActions(name, actions, cwd) {
729
- const dir = getSetupDir(name, cwd);
730
- await mkdir(dir, { recursive: true });
731
- const path = join(dir, "actions.json");
732
- await writeFile(path, JSON.stringify(actions, null, 2), "utf-8");
733
- return path;
963
+ /**
964
+ * Load every block under `.ccqa/blocks/<name>/spec.yaml`. Used by the trace /
965
+ * generate / drift entry points to validate include references at parse time.
966
+ *
967
+ * A malformed block is fatal — surfaces as a thrown Error with the path that
968
+ * failed. Missing block directories (no `spec.yaml`) are silently skipped so
969
+ * stray files don't break the loader.
970
+ */
971
+ async function loadAllBlocks(cwd) {
972
+ const dir = getBlocksDir(cwd);
973
+ const names = await readdir(dir).catch(() => []);
974
+ const entries = await Promise.all(names.map(async (name) => {
975
+ const path = join(dir, name, SPEC_FILE);
976
+ const content = await readFile(path, "utf-8").catch(() => null);
977
+ return content === null ? null : [name, parseBlockSpec(content, path)];
978
+ }));
979
+ return new Map(entries.filter((e) => e !== null));
734
980
  }
735
- async function getSetupActions(name, cwd) {
736
- const path = join(getSetupDir(name, cwd), "actions.json");
737
- const content = await readFile(path, "utf-8").catch(() => {
738
- throw new Error(`No setup actions found for: ${name}. Run \`ccqa trace-setup ${name}\` first.`);
739
- });
740
- return {
741
- path,
742
- actions: JSON.parse(content)
743
- };
981
+ /**
982
+ * Project the parsed blocks into the shape the draft / drift prompts consume.
983
+ * Co-located with `loadAllBlocks` so callers don't have to remember the
984
+ * isParamRequired / secret-default mapping.
985
+ */
986
+ async function loadAvailableBlocks(cwd) {
987
+ return [...(await loadAllBlocks(cwd)).entries()].map(([name, block]) => ({
988
+ name,
989
+ title: block.title,
990
+ params: (block.params ?? []).map((p) => ({
991
+ name: p.name,
992
+ required: isParamRequired(p),
993
+ secret: p.secret === true
994
+ }))
995
+ }));
744
996
  }
745
- async function saveSetupRoute(name, route, cwd) {
746
- const dir = getSetupDir(name, cwd);
747
- await mkdir(dir, { recursive: true });
748
- const routePath = join(dir, "route.md");
749
- await writeFile(routePath, routeToMarkdown(route), "utf-8");
750
- return routePath;
997
+ /**
998
+ * Probe for orphaned files left over from earlier ccqa versions inside
999
+ * `.ccqa/blocks/<name>/`. Both pre-v0.4 `test.spec.ts` (function-export
1000
+ * blocks) and the short-lived `actions.json` / `route.md` (recorded-block
1001
+ * variant) are dead in the new "blocks are pure spec templates" model and
1002
+ * should be deleted manually. Returns the absolute paths.
1003
+ */
1004
+ async function findStaleBlockArtifacts(cwd) {
1005
+ const dir = getBlocksDir(cwd);
1006
+ const names = await readdir(dir).catch(() => []);
1007
+ return (await Promise.all(names.flatMap((name) => [
1008
+ "test.spec.ts",
1009
+ "actions.json",
1010
+ "route.md"
1011
+ ].map(async (f) => {
1012
+ const path = join(dir, name, f);
1013
+ return await stat(path).then(() => true).catch(() => false) ? path : null;
1014
+ })))).filter((p) => p !== null);
751
1015
  }
752
1016
  async function getTraceActions(featureName, specName, cwd) {
753
1017
  const path = join(getSpecDir(featureName, specName, cwd), "actions.json");
@@ -789,8 +1053,7 @@ async function listSpecsForFeature(featureName, cwd) {
789
1053
  }
790
1054
  /**
791
1055
  * Lists every feature/spec dir under .ccqa/features/, regardless of whether
792
- * the spec is fully drafted yet. Each spec file is read at most once: title
793
- * and relatedPaths are both extracted from the same parse.
1056
+ * the spec is fully drafted yet. Each spec file is read at most once.
794
1057
  */
795
1058
  async function listFeatureTree(cwd) {
796
1059
  const featuresDir = join(getCcqaDir(cwd), "features");
@@ -801,18 +1064,19 @@ async function listFeatureTree(cwd) {
801
1064
  return {
802
1065
  featureName,
803
1066
  specs: await Promise.all(specDirs.map(async (specName) => {
804
- const content = await readFile(join(testCasesDir, specName, "test-spec.md"), "utf-8").catch(() => null);
1067
+ const specFile = join(testCasesDir, specName, SPEC_FILE);
1068
+ const content = await readFile(specFile, "utf-8").catch(() => null);
805
1069
  if (content === null) return {
806
1070
  specName,
807
1071
  hasSpecFile: false
808
1072
  };
809
1073
  try {
810
- const spec = parseTestSpec(content);
1074
+ const spec = parseTestSpec(content, specFile);
811
1075
  const entry = {
812
1076
  specName,
813
- hasSpecFile: true
1077
+ hasSpecFile: true,
1078
+ includedBlocks: collectIncludedBlockNames(spec)
814
1079
  };
815
- if (spec.title && spec.title !== "Untitled") entry.title = spec.title;
816
1080
  if (spec.relatedPaths) entry.relatedPaths = spec.relatedPaths;
817
1081
  return entry;
818
1082
  } catch {
@@ -845,6 +1109,20 @@ function routeToMarkdown(route) {
845
1109
  return lines.join("\n");
846
1110
  }
847
1111
  //#endregion
1112
+ //#region src/cli/stale-blocks.ts
1113
+ /**
1114
+ * Hint when stale per-block artifacts (`test.spec.ts`, `actions.json`,
1115
+ * `route.md`) from earlier ccqa versions are still present. v0.4 treats
1116
+ * blocks as pure spec templates — they no longer have their own executable
1117
+ * or recorded artifacts, so these files are dead code and should be deleted
1118
+ * manually. Shared by `trace` and `generate`.
1119
+ */
1120
+ async function warnStaleBlockArtifacts() {
1121
+ const stale = await findStaleBlockArtifacts();
1122
+ if (stale.length === 0) return;
1123
+ for (const p of stale) hint(`stale block artifact detected: ${p} — v0.4 no longer uses these; delete it manually.`);
1124
+ }
1125
+ //#endregion
848
1126
  //#region src/drift/parse-related-paths.ts
849
1127
  /**
850
1128
  * Pull a `RELATED_PATHS_BEGIN ... RELATED_PATHS_END` block out of the trace
@@ -869,103 +1147,8 @@ function parseRelatedPathsBlock(text) {
869
1147
  return out;
870
1148
  }
871
1149
  //#endregion
872
- //#region src/runtime/bundled-config.ts
873
- const CANDIDATES = [
874
- "../runtime/vitest.config.mjs",
875
- "./vitest.config.mjs",
876
- "./vitest.config.ts"
877
- ];
878
- function bundledVitestConfigPath() {
879
- for (const rel of CANDIDATES) {
880
- const candidate = fileURLToPath(new URL(rel, import.meta.url));
881
- try {
882
- accessSync(candidate);
883
- return candidate;
884
- } catch {}
885
- }
886
- return fileURLToPath(new URL("./vitest.config.ts", import.meta.url));
887
- }
888
- //#endregion
889
- //#region src/runtime/spawn-vitest.ts
890
- const require$2 = createRequire(import.meta.url);
891
- function resolveVitestBin() {
892
- const pkgPath = require$2.resolve("vitest/package.json");
893
- const pkg = require$2(pkgPath);
894
- const binRel = typeof pkg.bin === "string" ? pkg.bin : pkg.bin?.vitest;
895
- if (!binRel) throw new Error(`vitest package.json has no bin entry (resolved at ${pkgPath})`);
896
- return resolve(dirname(pkgPath), binRel);
897
- }
898
- async function spawnVitestCaptured(args, opts = {}) {
899
- const child = spawnVitestChild(args, opts, "pipe");
900
- const [stdout, stderr, exitCode] = await Promise.all([
901
- drain(child.stdout),
902
- drain(child.stderr),
903
- waitExit(child)
904
- ]);
905
- return {
906
- exitCode,
907
- stdout,
908
- stderr
909
- };
910
- }
911
- async function spawnVitestTeed(args, opts = {}) {
912
- const child = spawnVitestChild(args, opts, "pipe");
913
- const [stdout, stderr, exitCode] = await Promise.all([
914
- teeDrain(child.stdout, process.stdout),
915
- teeDrain(child.stderr, process.stderr),
916
- waitExit(child)
917
- ]);
918
- return {
919
- exitCode,
920
- stdout,
921
- stderr
922
- };
923
- }
924
- function spawnVitestStreaming(args, opts = {}) {
925
- const child = spawnVitestChild(args, opts, "pipe");
926
- return {
927
- child,
928
- stdout: child.stdout,
929
- stderr: child.stderr,
930
- exited: waitExit(child)
931
- };
932
- }
933
- function spawnVitestChild(args, opts, stdio) {
934
- const vitestBin = resolveVitestBin();
935
- return spawn(process.execPath, [vitestBin, ...args], {
936
- cwd: opts.cwd,
937
- env: opts.env ?? process.env,
938
- stdio: [
939
- "ignore",
940
- stdio,
941
- stdio
942
- ]
943
- });
944
- }
945
- async function drain(stream) {
946
- stream.setEncoding("utf8");
947
- let buf = "";
948
- for await (const chunk of stream) buf += chunk;
949
- return buf;
950
- }
951
- async function teeDrain(stream, sink) {
952
- stream.setEncoding("utf8");
953
- let buf = "";
954
- for await (const chunk of stream) {
955
- buf += chunk;
956
- sink.write(chunk);
957
- }
958
- return buf;
959
- }
960
- function waitExit(child) {
961
- return new Promise((resolvePromise, rejectPromise) => {
962
- child.once("exit", (code) => resolvePromise(code ?? 0));
963
- child.once("error", rejectPromise);
964
- });
965
- }
966
- //#endregion
967
1150
  //#region src/runtime/agent-browser-bin.ts
968
- const require$1 = createRequire(import.meta.url);
1151
+ const require$2 = createRequire(import.meta.url);
969
1152
  function hasAgentBrowserShim(dir) {
970
1153
  try {
971
1154
  statSync(join(dir, "agent-browser"));
@@ -999,10 +1182,10 @@ function findNodeModulesBin(start) {
999
1182
  function resolveAgentBrowserBinDir() {
1000
1183
  const fromCwd = findNodeModulesBin(process.cwd());
1001
1184
  if (fromCwd) return fromCwd;
1002
- const fromSelf = findNodeModulesBin(dirname(require$1.resolve("agent-browser/package.json")));
1185
+ const fromSelf = findNodeModulesBin(dirname(require$2.resolve("agent-browser/package.json")));
1003
1186
  if (fromSelf) return fromSelf;
1004
1187
  try {
1005
- const candidate = join(dirname(require$1.resolve("agent-browser/package.json")), "node_modules", ".bin");
1188
+ const candidate = join(dirname(require$2.resolve("agent-browser/package.json")), "node_modules", ".bin");
1006
1189
  if (hasAgentBrowserShim(candidate)) return candidate;
1007
1190
  } catch {}
1008
1191
  return null;
@@ -1062,54 +1245,197 @@ function formatAgentBrowserUnavailableMessage() {
1062
1245
  ].join("\n");
1063
1246
  }
1064
1247
  //#endregion
1065
- //#region src/runtime/env-vars.ts
1066
- const ENV_VAR_RE = /\$\{([A-Z_][A-Z0-9_]*)\}|\$([A-Z_][A-Z0-9_]*)/g;
1248
+ //#region src/runtime/replay-validate.ts
1249
+ const SHORT_TIMEOUT_MS = 5e3;
1250
+ const ASSERT_TIMEOUT_MS = 1e4;
1067
1251
  /**
1068
- * Returns true if the value contains at least one `$VAR` or `${VAR}` reference.
1252
+ * Convert one recorded action into the `agent-browser` arg list that would
1253
+ * exercise it. Returns `null` for actions that should not be validated
1254
+ * (snapshot has no side effect; assert types whose codegen forms aren't
1255
+ * directly verifiable here fall through to the caller's `unverifiable`
1256
+ * fallback).
1069
1257
  */
1070
- function hasEnvRef(value) {
1071
- ENV_VAR_RE.lastIndex = 0;
1072
- return ENV_VAR_RE.test(value);
1258
+ function actionToAbArgs(action, sessionName) {
1259
+ const base = ["--session", sessionName];
1260
+ const sub = (s) => s === void 0 ? "" : resolveEnvRefs(s);
1261
+ switch (action.command) {
1262
+ case "cookies_clear": return [
1263
+ ...base,
1264
+ "cookies",
1265
+ "clear"
1266
+ ];
1267
+ case "open": return [
1268
+ ...base,
1269
+ "open",
1270
+ sub(action.value).replace(/^["']|["']$/g, "")
1271
+ ];
1272
+ case "click": return [
1273
+ ...base,
1274
+ "click",
1275
+ sub(action.selector)
1276
+ ];
1277
+ case "dblclick": return [
1278
+ ...base,
1279
+ "dblclick",
1280
+ sub(action.selector)
1281
+ ];
1282
+ case "fill":
1283
+ case "type": return [
1284
+ ...base,
1285
+ "fill",
1286
+ sub(action.selector),
1287
+ sub(action.value)
1288
+ ];
1289
+ case "check": return [
1290
+ ...base,
1291
+ "check",
1292
+ sub(action.selector)
1293
+ ];
1294
+ case "uncheck": return [
1295
+ ...base,
1296
+ "uncheck",
1297
+ sub(action.selector)
1298
+ ];
1299
+ case "press": return [
1300
+ ...base,
1301
+ "press",
1302
+ sub(action.value)
1303
+ ];
1304
+ case "select": return [
1305
+ ...base,
1306
+ "select",
1307
+ sub(action.selector),
1308
+ sub(action.value)
1309
+ ];
1310
+ case "hover": return [
1311
+ ...base,
1312
+ "hover",
1313
+ sub(action.selector)
1314
+ ];
1315
+ case "scroll": {
1316
+ const args = [action.direction ?? "down", ...action.pixels ? [action.pixels] : []];
1317
+ return [
1318
+ ...base,
1319
+ "scroll",
1320
+ ...args
1321
+ ];
1322
+ }
1323
+ case "drag": return [
1324
+ ...base,
1325
+ "drag",
1326
+ sub(action.selector),
1327
+ sub(action.target)
1328
+ ];
1329
+ case "wait": {
1330
+ const raw = sub(action.selector);
1331
+ if (!raw) return null;
1332
+ if (/^\d+$/.test(raw)) return null;
1333
+ if (raw.startsWith("text=")) return [
1334
+ ...base,
1335
+ "wait",
1336
+ "--text",
1337
+ raw.slice(5),
1338
+ "--timeout",
1339
+ String(SHORT_TIMEOUT_MS)
1340
+ ];
1341
+ return [
1342
+ ...base,
1343
+ "wait",
1344
+ raw,
1345
+ "--timeout",
1346
+ String(SHORT_TIMEOUT_MS)
1347
+ ];
1348
+ }
1349
+ case "snapshot": return null;
1350
+ case "assert": return assertToAbArgs(action, sub, sessionName);
1351
+ }
1073
1352
  }
1074
- /**
1075
- * Resolve every `$VAR` / `${VAR}` reference against the current process env.
1076
- *
1077
- * Missing variables expand to the empty string, mirroring `sh` behaviour.
1078
- * Throwing would force ccqa to be invoked with every var set even for
1079
- * unused setups, which is more user-hostile than letting the test fail
1080
- * downstream with a clearer message ("login form rejected: empty password").
1081
- */
1082
- function resolveEnvRefs(value) {
1083
- return value.replace(ENV_VAR_RE, (_, braced, plain) => {
1084
- const name = braced ?? plain ?? "";
1085
- return process.env[name] ?? "";
1086
- });
1353
+ function assertToAbArgs(action, sub, sessionName) {
1354
+ const base = ["--session", sessionName];
1355
+ const val = sub(action.value ?? action.observation);
1356
+ const sel = sub(action.selector ?? action.observation);
1357
+ switch (action.assertType) {
1358
+ case "text_visible":
1359
+ if (!val) return null;
1360
+ return [
1361
+ ...base,
1362
+ "wait",
1363
+ "--text",
1364
+ val,
1365
+ "--timeout",
1366
+ String(ASSERT_TIMEOUT_MS)
1367
+ ];
1368
+ case "text_not_visible": return null;
1369
+ case "element_visible":
1370
+ if (!sel) return null;
1371
+ return [
1372
+ ...base,
1373
+ "wait",
1374
+ sel,
1375
+ "--timeout",
1376
+ String(ASSERT_TIMEOUT_MS)
1377
+ ];
1378
+ case "element_not_visible": return null;
1379
+ case "url_contains": return null;
1380
+ case "element_enabled":
1381
+ case "element_disabled":
1382
+ case "element_checked":
1383
+ case "element_unchecked":
1384
+ if (!sel || sel.startsWith("text=") || sel.startsWith("[aria-label=")) return null;
1385
+ return [
1386
+ ...base,
1387
+ "wait",
1388
+ sel,
1389
+ "--timeout",
1390
+ String(ASSERT_TIMEOUT_MS)
1391
+ ];
1392
+ default: return null;
1393
+ }
1394
+ }
1395
+ function validateActions(actions, opts) {
1396
+ const kept = [];
1397
+ const dropped = [];
1398
+ let skipUntilSideEffect = false;
1399
+ for (let i = 0; i < actions.length; i++) {
1400
+ const action = actions[i];
1401
+ if (skipUntilSideEffect && isPassiveCommand(action.command)) {
1402
+ dropped.push({
1403
+ index: i,
1404
+ action,
1405
+ reason: "skipped after a preceding action failed"
1406
+ });
1407
+ continue;
1408
+ }
1409
+ skipUntilSideEffect = false;
1410
+ const args = actionToAbArgs(action, opts.sessionName);
1411
+ if (args === null) {
1412
+ kept.push(action);
1413
+ continue;
1414
+ }
1415
+ const result = spawnAB(args);
1416
+ if (result.status === 0) {
1417
+ kept.push(action);
1418
+ continue;
1419
+ }
1420
+ dropped.push({
1421
+ index: i,
1422
+ action,
1423
+ reason: (result.stderr.trim() || result.stdout.trim() || `agent-browser exit ${result.status ?? "?"}`).slice(0, 200)
1424
+ });
1425
+ skipUntilSideEffect = true;
1426
+ }
1427
+ return {
1428
+ kept,
1429
+ dropped
1430
+ };
1087
1431
  }
1088
1432
  /**
1089
- * Embed `$VAR` / `${VAR}` as a JS template-literal expression that reads
1090
- * `process.env.VAR ?? ""` at runtime. Used by `ccqa generate` so the test
1091
- * script never bakes in the secret value.
1092
- *
1093
- * Returns a JavaScript string-literal expression (template literal when env
1094
- * refs are present, plain string literal otherwise).
1095
- *
1096
- * Examples:
1097
- * "${PASSWORD}" -> '`${process.env.PASSWORD ?? ""}`'
1098
- * "user-${SUFFIX}@x.com" -> '`user-${process.env.SUFFIX ?? ""}@x.com`'
1099
- * "literal value" -> '"literal value"'
1433
+ * Passive (read-only) commands whose only effect is observation. When a
1434
+ * preceding action fails, dropping these too is the right move because
1435
+ * they were trying to observe state the failed action would have set up.
1100
1436
  */
1101
- function envRefsToJsExpression(value) {
1102
- if (!hasEnvRef(value)) return JSON.stringify(value);
1103
- const escaped = value.replace(/\\/g, "\\\\").replace(/`/g, "\\`").replace(/\$\{/g, (match, offset, source) => {
1104
- ENV_VAR_RE.lastIndex = 0;
1105
- let m;
1106
- while ((m = ENV_VAR_RE.exec(source)) !== null) if (m.index === offset) return "${";
1107
- return "\\${";
1108
- });
1109
- ENV_VAR_RE.lastIndex = 0;
1110
- return `\`${escaped.replace(ENV_VAR_RE, (_, braced, plain) => {
1111
- return `\${process.env.${braced ?? plain ?? ""} ?? ""}`;
1112
- })}\``;
1437
+ function isPassiveCommand(cmd) {
1438
+ return cmd === "snapshot" || cmd === "wait" || cmd === "assert";
1113
1439
  }
1114
1440
  //#endregion
1115
1441
  //#region src/cli/trace.ts
@@ -1129,30 +1455,35 @@ async function runTrace(featureName, specName, model) {
1129
1455
  throw e;
1130
1456
  }
1131
1457
  await ensureCcqaDir();
1458
+ await warnStaleBlockArtifacts();
1132
1459
  const spec = parseTestSpec(await readSpecFile(featureName, specName));
1133
- const hasSetups = (spec.setups?.length ?? 0) > 0;
1460
+ const expanded = expandSpec(spec, { blocks: await loadAllBlocks() });
1134
1461
  meta("spec", spec.title);
1135
- meta("url", spec.baseUrl);
1136
- if (hasSetups) meta("setups", spec.setups.map((s) => s.name).join(", "));
1137
- meta("steps", spec.steps.length);
1462
+ meta("steps", expanded.length);
1463
+ const includes = collectIncludedBlockNames(spec);
1464
+ if (includes.length > 0) meta("blocks", includes.join(", "));
1138
1465
  blank();
1139
1466
  const sessionName = generateSessionName();
1140
- if (hasSetups) {
1141
- info("Running setup procedures...");
1142
- await runSetups(spec.setups, sessionName);
1143
- blank();
1144
- }
1145
- const systemPrompt = buildTraceSystemPrompt(spec, {
1146
- sessionName,
1147
- skipCookiesClear: hasSetups
1467
+ const systemPrompt = buildTraceSystemPrompt({
1468
+ title: spec.title,
1469
+ steps: expanded,
1470
+ sessionName
1148
1471
  });
1149
- const prompt = buildTracePrompt(spec);
1472
+ const prompt = buildTracePrompt(spec.title);
1150
1473
  info("Running agent-browser session...");
1151
1474
  blank();
1152
1475
  const routeSteps = [];
1153
1476
  let overallStatus = "passed";
1154
1477
  const traceActions = [];
1478
+ let currentStepId;
1155
1479
  let relatedPathsBuffer = null;
1480
+ const withStepId = (action) => {
1481
+ if (!action) return null;
1482
+ return currentStepId ? {
1483
+ ...action,
1484
+ stepId: currentStepId
1485
+ } : action;
1486
+ };
1156
1487
  const { isError } = await invokeClaudeStreaming({
1157
1488
  prompt,
1158
1489
  systemPrompt,
@@ -1168,7 +1499,7 @@ async function runTrace(featureName, specName, model) {
1168
1499
  },
1169
1500
  model,
1170
1501
  onAbAction: (abAction) => {
1171
- const action = parseAbAction(abAction);
1502
+ const action = withStepId(parseAbAction(abAction));
1172
1503
  if (action) traceActions.push(action);
1173
1504
  },
1174
1505
  onAbActionFailed: () => {
@@ -1184,10 +1515,14 @@ async function runTrace(featureName, specName, model) {
1184
1515
  const idx = text.indexOf("RELATED_PATHS_BEGIN");
1185
1516
  if (idx !== -1) relatedPathsBuffer = text.slice(idx) + "\n";
1186
1517
  }
1187
- const statusLine = parseStatusLine(text);
1188
- if (statusLine) step(statusLine.type, statusLine.stepId, statusLine.detail);
1189
1518
  for (const line of text.split("\n")) {
1190
1519
  const trimmed = line.trim();
1520
+ const status = parseStatusLine(line);
1521
+ if (status) {
1522
+ if (status.type === "STEP_START" && status.stepId) currentStepId = status.stepId;
1523
+ step(status.type, status.stepId, status.detail);
1524
+ continue;
1525
+ }
1191
1526
  if (trimmed.startsWith("ROUTE_STEP|")) {
1192
1527
  const routeStep = parseRouteStep(trimmed);
1193
1528
  if (routeStep) {
@@ -1195,24 +1530,25 @@ async function runTrace(featureName, specName, model) {
1195
1530
  if (routeStep.status === "FAILED") overallStatus = "failed";
1196
1531
  }
1197
1532
  } else if (trimmed.startsWith("AB_ACTION|snapshot|") || trimmed.startsWith("AB_ACTION|assert|")) {
1198
- const action = parseAbAction(trimmed);
1533
+ const action = withStepId(parseAbAction(trimmed));
1199
1534
  if (action) traceActions.push(action);
1200
1535
  }
1201
1536
  }
1202
1537
  }
1203
1538
  });
1204
1539
  if (isError) overallStatus = "failed";
1540
+ const validatedActions = validateAndReport(traceActions);
1205
1541
  const route = {
1206
1542
  specName,
1207
1543
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
1208
1544
  status: overallStatus,
1209
1545
  steps: routeSteps
1210
1546
  };
1211
- const [routePath, actionsPath] = await Promise.all([saveRoute(featureName, specName, route), saveTraceActions(featureName, specName, traceActions)]);
1547
+ const [routePath, actionsPath] = await Promise.all([saveRoute(featureName, specName, route), saveTraceActions(featureName, specName, validatedActions)]);
1212
1548
  blank();
1213
1549
  meta("route", routePath);
1214
1550
  meta("saved", actionsPath);
1215
- meta("actions", traceActions.length);
1551
+ meta("actions", validatedActions.length);
1216
1552
  meta("status", overallStatus.toUpperCase());
1217
1553
  const relatedPaths = relatedPathsBuffer !== null ? parseRelatedPathsBlock(relatedPathsBuffer) : null;
1218
1554
  if (relatedPaths !== null) {
@@ -1222,34 +1558,23 @@ async function runTrace(featureName, specName, model) {
1222
1558
  hint(`run 'ccqa generate ${featureName}/${specName}' to generate a test script`);
1223
1559
  }
1224
1560
  /**
1225
- * Execute setup procedures by running their test.spec.ts via vitest with a fixed session name.
1226
- * Creates a temporary runner script that sets the session and imports each setup's test body.
1561
+ * Run the post-trace replay validation and emit user-visible drop reports.
1562
+ * Splitting this out keeps `runTrace` readable; the function is pure aside
1563
+ * from `log.*` and the agent-browser invocations inside `validateActions`.
1227
1564
  */
1228
- async function runSetups(setups, sessionName) {
1229
- for (const ref of setups) {
1230
- info(` setup: ${ref.name}`);
1231
- const scriptPath = join(getSetupDir(ref.name), "test.spec.ts");
1232
- let script = await readFile(scriptPath, "utf-8").catch(() => {
1233
- throw new Error(`Setup test script not found: ${scriptPath}. Run \`ccqa generate-setup ${ref.name}\` first.`);
1234
- });
1235
- for (const [key, value] of Object.entries(ref.params ?? {})) script = script.replaceAll(`{{${key}}}`, resolveEnvRefs(value));
1236
- script = script.replace(/process\.env\.AGENT_BROWSER_SESSION\s*\|?\|?=\s*`.+`;/, `process.env.AGENT_BROWSER_SESSION = ${JSON.stringify(sessionName)};`);
1237
- const tmpPath = join(getSetupDir(ref.name), `_run.spec.ts`);
1238
- await writeFile(tmpPath, script, "utf-8");
1239
- try {
1240
- const { exitCode, stdout, stderr } = await spawnVitestCaptured([
1241
- "run",
1242
- "--config",
1243
- bundledVitestConfigPath(),
1244
- tmpPath
1245
- ]);
1246
- process.stdout.write(stdout);
1247
- if (stderr) process.stderr.write(stderr);
1248
- if (exitCode !== 0) throw new Error(`Setup '${ref.name}' failed (exit ${exitCode})`);
1249
- } finally {
1250
- await unlink(tmpPath).catch(() => {});
1251
- }
1565
+ function validateAndReport(actions) {
1566
+ if (actions.length === 0) return actions;
1567
+ const sessionName = `${generateSessionName()}-validate`;
1568
+ blank();
1569
+ info("post-trace validation (replaying recorded actions)...");
1570
+ const { kept, dropped } = validateActions(actions, { sessionName });
1571
+ if (dropped.length === 0) {
1572
+ meta("validated", `${kept.length}/${actions.length} kept`);
1573
+ return kept;
1252
1574
  }
1575
+ for (const d of dropped) warn(`dropped action #${d.index + 1} (${d.action.command}${d.action.selector ? " " + d.action.selector : ""}): ${d.reason}`);
1576
+ meta("validated", `${kept.length}/${actions.length} kept (${dropped.length} dropped)`);
1577
+ return kept;
1253
1578
  }
1254
1579
  function parseStatusLine(text) {
1255
1580
  for (const line of text.split("\n")) {
@@ -1346,21 +1671,32 @@ function parseAbAction(line) {
1346
1671
  }
1347
1672
  //#endregion
1348
1673
  //#region src/codegen/actions-to-script.ts
1349
- function actionsToScript(actions, title, setupScripts) {
1674
+ function actionsToScript(input) {
1675
+ const { actions, testName, stepMarkers = [] } = input;
1350
1676
  const parts = [...[
1351
1677
  `import { test } from "vitest";`,
1352
1678
  `import { spawnSync } from "node:child_process";`,
1353
- `import { ab, abWait, abAssertTextVisible, abAssertVisible, abAssertNotVisible, abAssertUrl, abAssertEnabled, abAssertDisabled, abAssertChecked, abAssertUnchecked } from "ccqa/test-helpers";`,
1679
+ `import { ${[
1680
+ "ab",
1681
+ "abWait",
1682
+ "abAssertTextVisible",
1683
+ "abAssertVisible",
1684
+ "abAssertNotVisible",
1685
+ "abAssertUrl",
1686
+ "abAssertEnabled",
1687
+ "abAssertDisabled",
1688
+ "abAssertChecked",
1689
+ "abAssertUnchecked"
1690
+ ].join(", ")} } from "ccqa/test-helpers";`,
1354
1691
  "",
1355
- `// Single session shared across all tests — reset per run via cookies clear in first test.`,
1356
- `// Use ||= so an outer harness (e.g. ccqa generate's auto-fix loop) can pre-set the session`,
1357
- `// name and inspect the same session after the run finishes.`,
1692
+ `// Single session shared across the run. Use ||= so an outer harness`,
1693
+ `// (e.g. ccqa generate's auto-fix loop) can pre-set the session name`,
1694
+ `// and inspect the same session after the run finishes.`,
1358
1695
  `process.env.AGENT_BROWSER_SESSION ||= \`ccqa-run-\${Date.now()}\`;`,
1359
1696
  ""
1360
1697
  ]];
1361
- if (setupScripts?.length) for (const setup of setupScripts) parts.push(`test("setup: ${setup.name}", () => {`, setup.body, "}, 3 * 60 * 1000);", "");
1362
- const body = actionsToLines(actions).map((l) => ` ${l}`).join("\n");
1363
- parts.push(`test(${JSON.stringify(title)}, () => {`, body, "}, 5 * 60 * 1000);", "");
1698
+ const body = actionsToLines(actions, stepMarkers).map((l) => ` ${l}`).join("\n");
1699
+ parts.push(`test(${JSON.stringify(testName)}, () => {`, body, "}, 5 * 60 * 1000);", "");
1364
1700
  return parts.join("\n");
1365
1701
  }
1366
1702
  /** Commands that interact with page elements and need the page to be loaded */
@@ -1375,11 +1711,18 @@ const ELEMENT_COMMANDS = new Set([
1375
1711
  "hover",
1376
1712
  "drag"
1377
1713
  ]);
1378
- function actionsToLines(actions) {
1714
+ function actionsToLines(actions, stepMarkers) {
1379
1715
  const lines = [];
1380
1716
  let prevLine = null;
1381
1717
  let prevCommand = null;
1382
- for (const action of actions) {
1718
+ const markerByIndex = new Map(stepMarkers.map((m) => [m.actionIndex, m]));
1719
+ for (let i = 0; i < actions.length; i++) {
1720
+ const marker = markerByIndex.get(i);
1721
+ if (marker) {
1722
+ if (lines.length > 0) lines.push("");
1723
+ lines.push(`// step: ${marker.stepId} [${marker.source}]`);
1724
+ }
1725
+ const action = actions[i];
1383
1726
  const line = actionToLine(action);
1384
1727
  if (line === null) continue;
1385
1728
  if (line === prevLine) continue;
@@ -1398,16 +1741,16 @@ function actionToLine(action) {
1398
1741
  if ("selector" in action && isRefSelector(action.selector)) return null;
1399
1742
  switch (action.command) {
1400
1743
  case "cookies_clear": return `ab("cookies", "clear");`;
1401
- case "open": return `ab("open", ${j((action.value ?? "").replace(/^["']|["']$/g, ""))});`;
1744
+ case "open": return `ab("open", ${jExpr((action.value ?? "").replace(/^["']|["']$/g, ""))});`;
1402
1745
  case "snapshot": return action.observation ? `// ${action.observation}` : null;
1403
1746
  case "click": return `ab("click", ${j(action.selector)});`;
1404
1747
  case "dblclick": return `ab("dblclick", ${j(action.selector)});`;
1405
- case "fill": return `ab("fill", ${j(action.selector)}, ${j(action.value)});`;
1406
- case "type": return `ab("fill", ${j(action.selector)}, ${j(action.value)});`;
1748
+ case "fill": return `ab("fill", ${j(action.selector)}, ${jExpr(action.value)});`;
1749
+ case "type": return `ab("fill", ${j(action.selector)}, ${jExpr(action.value)});`;
1407
1750
  case "check": return `ab("check", ${j(action.selector)});`;
1408
1751
  case "uncheck": return `ab("uncheck", ${j(action.selector)});`;
1409
- case "press": return `ab("press", ${j(action.value)});`;
1410
- case "select": return `ab("select", ${j(action.selector)}, ${j(action.value)});`;
1752
+ case "press": return `ab("press", ${jExpr(action.value)});`;
1753
+ case "select": return `ab("select", ${j(action.selector)}, ${jExpr(action.value)});`;
1411
1754
  case "hover": return `ab("hover", ${j(action.selector)});`;
1412
1755
  case "scroll": return `ab("scroll", ${[action.direction ?? "down", ...action.pixels ? [action.pixels] : []].map(j).join(", ")});`;
1413
1756
  case "drag": return `ab("drag", ${j(action.selector)}, ${j(action.target)});`;
@@ -1423,10 +1766,10 @@ function actionToLine(action) {
1423
1766
  let assertLine = null;
1424
1767
  switch (action.assertType) {
1425
1768
  case "text_visible":
1426
- if (val) assertLine = `abAssertTextVisible(${j(val)});`;
1769
+ if (val) assertLine = `abAssertTextVisible(${jExpr(val)});`;
1427
1770
  break;
1428
1771
  case "text_not_visible":
1429
- if (val) assertLine = `abAssertNotVisible(${j("text=" + val)}, 180_000);`;
1772
+ if (val) assertLine = `abAssertNotVisible(${jExpr("text=" + val)}, 180_000);`;
1430
1773
  break;
1431
1774
  case "element_visible":
1432
1775
  if (sel) assertLine = `abAssertVisible(${j(sel)});`;
@@ -1435,7 +1778,7 @@ function actionToLine(action) {
1435
1778
  if (sel) assertLine = `abAssertNotVisible(${j(sel)});`;
1436
1779
  break;
1437
1780
  case "url_contains":
1438
- if (val) assertLine = `abAssertUrl(${j(val)});`;
1781
+ if (val) assertLine = `abAssertUrl(${jExpr(val)});`;
1439
1782
  break;
1440
1783
  case "element_enabled":
1441
1784
  if (sel && !sel.startsWith("text=") && !sel.startsWith("[aria-label=")) assertLine = `abAssertEnabled(${j(sel)});`;
@@ -1458,6 +1801,14 @@ function actionToLine(action) {
1458
1801
  }
1459
1802
  /** JSON.stringify — produces a quoted string literal safe for embedding in TS source. */
1460
1803
  const j = (s) => JSON.stringify(s);
1804
+ /**
1805
+ * Like `j`, but recognises `$VAR` / `${VAR}` env-ref forms in the value and
1806
+ * emits them as `${process.env.VAR ?? ""}` template-literal substitutions
1807
+ * instead of baking the literal `$VAR` string into the script. Used for
1808
+ * values that came from a spec or block param: form fills, opened URLs,
1809
+ * assertion texts/URLs.
1810
+ */
1811
+ const jExpr = (s) => envRefsToJsExpression(s);
1461
1812
  //#endregion
1462
1813
  //#region src/prompts/codegen.ts
1463
1814
  function buildCleanupPrompt(actions) {
@@ -1490,6 +1841,109 @@ ${actions.map((a, i) => {
1490
1841
  }).join("\n")}`;
1491
1842
  }
1492
1843
  //#endregion
1844
+ //#region src/codegen/cleanup.ts
1845
+ /**
1846
+ * Best-effort cleanup of a recorded action list. Hands the actions to
1847
+ * Claude with the cleanup prompt and parses the returned JSON array; on
1848
+ * any failure (Claude error, malformed JSON, empty array) falls back to
1849
+ * the original input so the caller can always proceed.
1850
+ *
1851
+ * Note: the prompt deliberately does not surface the `stepId` field.
1852
+ * Callers that need to preserve stepIds across cleanup (only `ccqa generate`
1853
+ * today) must re-attach them after this returns.
1854
+ */
1855
+ async function cleanupActions$1(actions, model) {
1856
+ try {
1857
+ const { result, isError } = await invokeClaudeStreaming({
1858
+ prompt: buildCleanupPrompt(actions),
1859
+ disableBuiltinTools: true,
1860
+ maxTurns: 1,
1861
+ model
1862
+ }, () => {});
1863
+ if (isError || !result) return actions;
1864
+ const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
1865
+ const parsed = JSON.parse(json);
1866
+ if (Array.isArray(parsed) && parsed.length > 0) return parsed;
1867
+ } catch {}
1868
+ return actions;
1869
+ }
1870
+ //#endregion
1871
+ //#region src/runtime/bundled-config.ts
1872
+ const CANDIDATES = [
1873
+ "../runtime/vitest.config.mjs",
1874
+ "./vitest.config.mjs",
1875
+ "./vitest.config.ts"
1876
+ ];
1877
+ function bundledVitestConfigPath() {
1878
+ for (const rel of CANDIDATES) {
1879
+ const candidate = fileURLToPath(new URL(rel, import.meta.url));
1880
+ try {
1881
+ accessSync(candidate);
1882
+ return candidate;
1883
+ } catch {}
1884
+ }
1885
+ return fileURLToPath(new URL("./vitest.config.ts", import.meta.url));
1886
+ }
1887
+ //#endregion
1888
+ //#region src/runtime/spawn-vitest.ts
1889
+ const require$1 = createRequire(import.meta.url);
1890
+ function resolveVitestBin() {
1891
+ const pkgPath = require$1.resolve("vitest/package.json");
1892
+ const pkg = require$1(pkgPath);
1893
+ const binRel = typeof pkg.bin === "string" ? pkg.bin : pkg.bin?.vitest;
1894
+ if (!binRel) throw new Error(`vitest package.json has no bin entry (resolved at ${pkgPath})`);
1895
+ return resolve(dirname(pkgPath), binRel);
1896
+ }
1897
+ async function spawnVitestTeed(args, opts = {}) {
1898
+ const child = spawnVitestChild(args, opts, "pipe");
1899
+ const [stdout, stderr, exitCode] = await Promise.all([
1900
+ teeDrain(child.stdout, process.stdout),
1901
+ teeDrain(child.stderr, process.stderr),
1902
+ waitExit(child)
1903
+ ]);
1904
+ return {
1905
+ exitCode,
1906
+ stdout,
1907
+ stderr
1908
+ };
1909
+ }
1910
+ function spawnVitestStreaming(args, opts = {}) {
1911
+ const child = spawnVitestChild(args, opts, "pipe");
1912
+ return {
1913
+ child,
1914
+ stdout: child.stdout,
1915
+ stderr: child.stderr,
1916
+ exited: waitExit(child)
1917
+ };
1918
+ }
1919
+ function spawnVitestChild(args, opts, stdio) {
1920
+ const vitestBin = resolveVitestBin();
1921
+ return spawn(process.execPath, [vitestBin, ...args], {
1922
+ cwd: opts.cwd,
1923
+ env: opts.env ?? process.env,
1924
+ stdio: [
1925
+ "ignore",
1926
+ stdio,
1927
+ stdio
1928
+ ]
1929
+ });
1930
+ }
1931
+ async function teeDrain(stream, sink) {
1932
+ stream.setEncoding("utf8");
1933
+ let buf = "";
1934
+ for await (const chunk of stream) {
1935
+ buf += chunk;
1936
+ sink.write(chunk);
1937
+ }
1938
+ return buf;
1939
+ }
1940
+ function waitExit(child) {
1941
+ return new Promise((resolvePromise, rejectPromise) => {
1942
+ child.once("exit", (code) => resolvePromise(code ?? 0));
1943
+ child.once("error", rejectPromise);
1944
+ });
1945
+ }
1946
+ //#endregion
1493
1947
  //#region src/diagnose/apply.ts
1494
1948
  function applyDiagnosis(script, diagnosis) {
1495
1949
  switch (diagnosis.type) {
@@ -1540,6 +1994,7 @@ function applyTiming(script, fixes) {
1540
1994
  summary: summary.join("; ")
1541
1995
  };
1542
1996
  }
1997
+ const REMOVABLE_ASSERT_RE = /\b(?:abAssert\w*|abWait)\b/;
1543
1998
  function applyOverAssertion(script, lineNumbers) {
1544
1999
  if (lineNumbers.length === 0) return {
1545
2000
  applied: false,
@@ -1552,13 +2007,13 @@ function applyOverAssertion(script, lineNumbers) {
1552
2007
  const idx = line - 1;
1553
2008
  if (idx < 0 || idx >= lines.length) continue;
1554
2009
  const content = lines[idx];
1555
- if (!/abAssert/.test(content)) continue;
2010
+ if (!REMOVABLE_ASSERT_RE.test(content)) continue;
1556
2011
  removed.push(`line ${line}: ${content.trim()}`);
1557
2012
  lines.splice(idx, 1);
1558
2013
  }
1559
2014
  if (removed.length === 0) return {
1560
2015
  applied: false,
1561
- reason: "no abAssert lines matched the proposed line numbers"
2016
+ reason: "no abAssert/abWait lines matched the proposed line numbers"
1562
2017
  };
1563
2018
  return {
1564
2019
  applied: true,
@@ -1604,7 +2059,7 @@ function previewDiff(before, after) {
1604
2059
  //#endregion
1605
2060
  //#region src/diagnose/prompt.ts
1606
2061
  function buildDiagnosePrompt(input) {
1607
- const { script, specMarkdown, actions, failureLog, pageSnapshot, outputLanguage = "en" } = input;
2062
+ const { script, specYaml, actions, failureLog, pageSnapshot, outputLanguage = "en" } = input;
1608
2063
  const numbered = script.split("\n").map((l, i) => `${i + 1}: ${l}`).join("\n");
1609
2064
  return `You are diagnosing a failing E2E test. The test was generated from a recorded trace of the original interaction. Compare the failing run against the original spec and recorded actions to determine WHY the test failed and what the right fix is.
1610
2065
 
@@ -1695,11 +2150,11 @@ Pick exactly ONE category. The output JSON must follow the shape for that catego
1695
2150
  - Your **final** assistant message must start with \`{\` and end with \`}\` — a single JSON object, nothing before or after. No prose preamble like "Confirmed: ...", no markdown fences, no commentary, no tool calls in the same turn. If you have an analysis sentence, put it in the \`reasoning\` field.
1696
2151
  - Line numbers refer to the numbered test script below (1-based).
1697
2152
  - For SELECTOR_DRIFT, \`oldSelector\` must match a substring of the script at that line; \`newSelector\` must be backed by a concrete file:line you read with Grep/Read (do not invent). Cite the evidence in \`reasoning\`.
1698
- - For OVER_ASSERTION, only include lines that contain assert calls (\`abAssert*\`).
1699
- - Cross-check assertions against the spec markdown. If the spec doesn't require the assertion, OVER_ASSERTION is the better diagnosis than SELECTOR_DRIFT.
2153
+ - For OVER_ASSERTION, only include lines that contain assert calls (\`abAssert*\`) or existence-checking waits (\`abWait\`); a recorded \`abWait("[selector]")\` is an implicit existence assertion and a valid removal candidate when the spec never required that element to be present.
2154
+ - Cross-check assertions against the spec YAML. If the spec doesn't require the assertion, OVER_ASSERTION is the better diagnosis than SELECTOR_DRIFT.
1700
2155
 
1701
- ## Test Spec (test-spec.md)
1702
- ${specMarkdown}
2156
+ ## Test Spec (spec.yaml)
2157
+ ${specYaml}
1703
2158
 
1704
2159
  ## Recorded Actions (actions.json summary)
1705
2160
  ${actions.map((a, i) => {
@@ -1910,8 +2365,7 @@ function normaliseSleepFixes(raw) {
1910
2365
  const line = typeof item["line"] === "number" ? item["line"] : null;
1911
2366
  if (line === null) continue;
1912
2367
  const reason = typeof item["reason"] === "string" ? item["reason"] : "";
1913
- const kind = item["kind"];
1914
- if (kind === "insert" || typeof item["seconds"] === "number" && item["increase_to"] === void 0) {
2368
+ if (item["kind"] === "insert") {
1915
2369
  const seconds = typeof item["seconds"] === "number" ? item["seconds"] : null;
1916
2370
  if (seconds === null) continue;
1917
2371
  out.push({
@@ -1920,9 +2374,7 @@ function normaliseSleepFixes(raw) {
1920
2374
  seconds,
1921
2375
  reason
1922
2376
  });
1923
- continue;
1924
- }
1925
- if (kind === "increase" || typeof item["increase_to"] === "number") {
2377
+ } else if (item["kind"] === "increase") {
1926
2378
  const increaseTo = typeof item["increase_to"] === "number" ? item["increase_to"] : null;
1927
2379
  if (increaseTo === null) continue;
1928
2380
  out.push({
@@ -1931,7 +2383,6 @@ function normaliseSleepFixes(raw) {
1931
2383
  increase_to: increaseTo,
1932
2384
  reason
1933
2385
  });
1934
- continue;
1935
2386
  }
1936
2387
  }
1937
2388
  return out;
@@ -2116,7 +2567,7 @@ const DEFAULT_CONFIDENCE_THRESHOLD = .8;
2116
2567
  * or the diagnose loop chose to bail out early.
2117
2568
  */
2118
2569
  async function runAutoFixLoop(input) {
2119
- const { scriptPath, initialRun, specMarkdown, actions, maxRetries, mode, runVitest, agentBrowserSession, outputLanguage, model } = input;
2570
+ const { scriptPath, initialRun, specYaml, actions, maxRetries, mode, runVitest, agentBrowserSession, outputLanguage, model } = input;
2120
2571
  let { exitCode, output, currentScript } = initialRun;
2121
2572
  if (exitCode === 0) return true;
2122
2573
  for (let attempt = 1; attempt <= maxRetries; attempt++) {
@@ -2127,7 +2578,7 @@ async function runAutoFixLoop(input) {
2127
2578
  else fix("page snapshot unavailable; continuing without it");
2128
2579
  const fixed = await diagnoseAndFix({
2129
2580
  script: currentScript,
2130
- specMarkdown,
2581
+ specYaml,
2131
2582
  actions,
2132
2583
  failureLog: output,
2133
2584
  pageSnapshot: pageSnapshot ?? void 0,
@@ -2148,10 +2599,10 @@ async function runAutoFixLoop(input) {
2148
2599
  return false;
2149
2600
  }
2150
2601
  async function diagnoseAndFix(input) {
2151
- const { script, specMarkdown, actions, failureLog, pageSnapshot, mode, outputLanguage, model } = input;
2602
+ const { script, specYaml, actions, failureLog, pageSnapshot, mode, outputLanguage, model } = input;
2152
2603
  const outcome = await timedPhase("diagnose", () => diagnose({
2153
2604
  script,
2154
- specMarkdown,
2605
+ specYaml,
2155
2606
  actions,
2156
2607
  failureLog,
2157
2608
  pageSnapshot,
@@ -2186,7 +2637,7 @@ async function diagnoseAndFix(input) {
2186
2637
  return apply.script;
2187
2638
  }
2188
2639
  if (decision === "skip-low-confidence") {
2189
- fix(`confidence ${result.confidence.toFixed(2)} below threshold ${DEFAULT_CONFIDENCE_THRESHOLD}; skipping (--no-interactive)`);
2640
+ fix(`confidence ${result.confidence.toFixed(2)} below threshold ${DEFAULT_CONFIDENCE_THRESHOLD}; skipping (mode: ${mode})`);
2190
2641
  handoffToUser(result, outcome.raw, outputLanguage);
2191
2642
  return null;
2192
2643
  }
@@ -2210,10 +2661,15 @@ async function diagnoseAndFix(input) {
2210
2661
  process.exit(1);
2211
2662
  }
2212
2663
  }
2664
+ /**
2665
+ * Map a diagnosis to one of three actions. `auto` previously bypassed the
2666
+ * confidence threshold; it no longer does — a low-confidence guess can
2667
+ * corrupt working code, and CI wants "apply obvious fixes, fail loudly on
2668
+ * the rest" rather than "apply every guess".
2669
+ */
2213
2670
  function decide(result, mode) {
2214
- if (mode === "auto") return "apply-auto";
2215
2671
  const highConfidence = result.confidence >= DEFAULT_CONFIDENCE_THRESHOLD;
2216
- if (mode === "non-interactive") return highConfidence ? "apply-auto" : "skip-low-confidence";
2672
+ if (mode === "auto" || mode === "non-interactive") return highConfidence ? "apply-auto" : "skip-low-confidence";
2217
2673
  return highConfidence ? "apply-auto" : "interactive";
2218
2674
  }
2219
2675
  function reportDiagnosis(result) {
@@ -2250,27 +2706,27 @@ function handoffMessage(diagnosis, language) {
2250
2706
  }
2251
2707
  function handoffEn(diagnosis) {
2252
2708
  switch (diagnosis.type) {
2253
- case "DATA_MISSING": return [`application-side issue: required data is missing. ${diagnosis.reason}`, "next step: seed the data (or update test-spec.md prerequisites), then re-run trace + generate."];
2709
+ case "DATA_MISSING": return [`application-side issue: required data is missing. ${diagnosis.reason}`, "next step: seed the data (or update spec.yaml prerequisites), then re-run trace + generate."];
2254
2710
  case "UNKNOWN": return [`could not classify the failure. ${diagnosis.reason}`, "next step: read the failure log above, decide whether the test or the app is wrong, and fix manually. consider re-running ccqa trace if the recorded flow no longer matches the live app."];
2255
2711
  case "SELECTOR_DRIFT": return [
2256
2712
  `selector likely drifted but auto-apply was not safe.`,
2257
2713
  `proposed: line ${diagnosis.line}: "${diagnosis.oldSelector}" → "${diagnosis.newSelector}" (${diagnosis.reason}).`,
2258
2714
  "next step: confirm in the live app and either accept the proposal manually, or re-run ccqa trace to recapture the new selector."
2259
2715
  ];
2260
- case "OVER_ASSERTION": return [`assertion may not be required by the spec. lines: ${diagnosis.lines.join(", ")} (${diagnosis.reason}).`, "next step: cross-check test-spec.md. either delete the assertion from the test, or tighten the spec to require it."];
2716
+ case "OVER_ASSERTION": return [`assertion may not be required by the spec. lines: ${diagnosis.lines.join(", ")} (${diagnosis.reason}).`, "next step: cross-check spec.yaml. either delete the assertion from the test, or tighten the spec to require it."];
2261
2717
  case "TIMING_ISSUE": return [`timing fix proposed but couldn't be applied automatically.`, "next step: insert a sleep manually before the failing line, or re-run with a higher confidence trace."];
2262
2718
  }
2263
2719
  }
2264
2720
  function handoffJa(diagnosis) {
2265
2721
  switch (diagnosis.type) {
2266
- case "DATA_MISSING": return [`アプリ側の問題: 必要なデータが不足しています。${diagnosis.reason}`, "次のステップ: データを seed する(または test-spec.md の prerequisites を更新)してから ccqa trace + generate をやり直してください。"];
2722
+ case "DATA_MISSING": return [`アプリ側の問題: 必要なデータが不足しています。${diagnosis.reason}`, "次のステップ: データを seed する(または spec.yaml の prerequisites を更新)してから ccqa trace + generate をやり直してください。"];
2267
2723
  case "UNKNOWN": return [`失敗を分類できませんでした。${diagnosis.reason}`, "次のステップ: 上の失敗ログを確認し、テストとアプリのどちらが原因か判断して手動で修正してください。記録した手順がアプリの現状と合わない場合は ccqa trace の再実行を検討してください。"];
2268
2724
  case "SELECTOR_DRIFT": return [
2269
2725
  "selector が変わった可能性が高いですが、自動適用は安全でないと判断しました。",
2270
2726
  `提案: 行 ${diagnosis.line}: "${diagnosis.oldSelector}" → "${diagnosis.newSelector}" (${diagnosis.reason})`,
2271
2727
  "次のステップ: アプリで新 selector を確認し、手動で適用するか ccqa trace をやり直して新しい selector を取り直してください。"
2272
2728
  ];
2273
- case "OVER_ASSERTION": return [`spec が要求していない assertion の可能性があります。対象行: ${diagnosis.lines.join(", ")} (${diagnosis.reason})`, "次のステップ: test-spec.md と照合して、テスト側の assertion を削るか、spec 側を更新してください。"];
2729
+ case "OVER_ASSERTION": return [`spec が要求していない assertion の可能性があります。対象行: ${diagnosis.lines.join(", ")} (${diagnosis.reason})`, "次のステップ: spec.yaml と照合して、テスト側の assertion を削るか、spec 側を更新してください。"];
2274
2730
  case "TIMING_ISSUE": return ["timing 関連の修正案は出ましたが、自動適用できませんでした。", "次のステップ: 失敗行の前に手動で sleep を入れるか、より信頼度の高い trace を取り直してください。"];
2275
2731
  }
2276
2732
  }
@@ -2306,18 +2762,24 @@ async function runGenerate(featureName, specName, maxRetries, mode, force, useSn
2306
2762
  meta("actions", actions.length);
2307
2763
  const specContent = await readSpecFile(featureName, specName);
2308
2764
  const spec = parseTestSpec(specContent);
2309
- const setupScripts = await loadSetupScripts(spec.setups);
2310
- if (setupScripts.length > 0) meta("setups", setupScripts.map((s) => s.name).join(", "));
2765
+ const expanded = expandSpec(spec, { blocks: await loadAllBlocks() });
2766
+ await warnStaleBlockArtifacts();
2767
+ meta("steps", expanded.length);
2311
2768
  meta("fix-mode", mode);
2312
2769
  meta("language", outputLanguage);
2313
2770
  blank();
2314
- const cleanedActions = await cleanupActions$1(actions, model);
2771
+ const cleanedActions = await cleanupActions(actions, model);
2315
2772
  if (cleanedActions.length !== actions.length) meta("cleaned", cleanedActions.length);
2316
- const scriptPath = await saveTestScript(featureName, specName, actionsToScript(cleanedActions, spec.title, setupScripts.length > 0 ? setupScripts : void 0));
2773
+ const markers = buildStepMarkers(expanded, cleanedActions);
2774
+ const scriptPath = await saveTestScript(featureName, specName, actionsToScript({
2775
+ actions: cleanedActions,
2776
+ testName: spec.title,
2777
+ stepMarkers: markers
2778
+ }));
2317
2779
  meta("saved", scriptPath);
2318
2780
  blank();
2319
2781
  const agentBrowserSession = useSnapshot ? `ccqa-generate-${Date.now()}` : void 0;
2320
- const runVitestForSession = (path) => runVitest$1(path, agentBrowserSession);
2782
+ const runVitestForSession = (path) => runVitest(path, agentBrowserSession);
2321
2783
  let signalHandler = null;
2322
2784
  if (agentBrowserSession) {
2323
2785
  await closeSession(agentBrowserSession);
@@ -2336,7 +2798,7 @@ async function runGenerate(featureName, specName, maxRetries, mode, force, useSn
2336
2798
  if (await runAutoFixLoop({
2337
2799
  scriptPath,
2338
2800
  initialRun,
2339
- specMarkdown: specContent,
2801
+ specYaml: specContent,
2340
2802
  actions: cleanedActions,
2341
2803
  maxRetries,
2342
2804
  mode,
@@ -2358,6 +2820,30 @@ async function runGenerate(featureName, specName, maxRetries, mode, force, useSn
2358
2820
  if (agentBrowserSession) await closeSession(agentBrowserSession);
2359
2821
  }
2360
2822
  }
2823
+ /**
2824
+ * Build the per-step markers consumed by `actionsToScript`. Each action's
2825
+ * `stepId` (assigned at trace time from the last `STEP_START|...` line)
2826
+ * groups contiguous actions; we emit one marker at the first action of
2827
+ * each contiguous run. Unknown step ids are skipped rather than mis-labelled.
2828
+ */
2829
+ function buildStepMarkers(steps, actions) {
2830
+ const stepById = new Map(steps.map((s) => [s.id, s]));
2831
+ const markers = [];
2832
+ let lastEmittedStepId = null;
2833
+ for (let i = 0; i < actions.length; i++) {
2834
+ const id = actions[i].stepId;
2835
+ if (!id || id === lastEmittedStepId) continue;
2836
+ const step = stepById.get(id);
2837
+ if (!step) continue;
2838
+ markers.push({
2839
+ actionIndex: i,
2840
+ stepId: step.id,
2841
+ source: step.source
2842
+ });
2843
+ lastEmittedStepId = id;
2844
+ }
2845
+ return markers;
2846
+ }
2361
2847
  async function confirmOverwrite(path) {
2362
2848
  if (!process.stdin.isTTY) {
2363
2849
  warn(`${path} exists and stdin is not a TTY; refusing to overwrite. Pass --force to allow.`);
@@ -2377,67 +2863,7 @@ async function confirmOverwrite(path) {
2377
2863
  rl.close();
2378
2864
  }
2379
2865
  }
2380
- async function loadSetupScripts(setups) {
2381
- if (!setups?.length) return [];
2382
- const result = [];
2383
- for (const ref of setups) {
2384
- const scriptPath = join(getSetupDir(ref.name), "test.spec.ts");
2385
- const resolved = replacePlaceholders(extractTestBody(await readFile(scriptPath, "utf-8").catch(() => {
2386
- throw new Error(`Setup test script not found: ${scriptPath}. Run \`ccqa generate-setup ${ref.name}\` first.`);
2387
- })), ref.params ?? {});
2388
- result.push({
2389
- name: ref.name,
2390
- body: resolved
2391
- });
2392
- }
2393
- return result;
2394
- }
2395
- /**
2396
- * Extract the test body (statements inside the test callback) from a setup
2397
- * test script.
2398
- *
2399
- * Locates the first arrow callback (`=> {`) after a top-level `test(` call
2400
- * and returns the text between the matching `{` and `}`. Handles both
2401
- * single-line and multi-line `test(...)` formatting (the latter is what
2402
- * prettier produces).
2403
- *
2404
- * Brace tracking is naive (string/regex/comment literals are not parsed
2405
- * specially), but setup test scripts are themselves generated by ccqa and
2406
- * follow a fixed shape, so this is sufficient in practice.
2407
- */
2408
- function extractTestBody(script) {
2409
- const testCallMatch = /\btest\s*\(/.exec(script);
2410
- if (!testCallMatch) return "";
2411
- const arrowIdx = script.indexOf("=> {", testCallMatch.index);
2412
- if (arrowIdx === -1) return "";
2413
- const bodyStart = arrowIdx + 4;
2414
- let depth = 1;
2415
- let i = bodyStart;
2416
- for (; i < script.length; i++) {
2417
- const ch = script[i];
2418
- if (ch === "{") depth++;
2419
- else if (ch === "}") {
2420
- depth--;
2421
- if (depth === 0) break;
2422
- }
2423
- }
2424
- if (depth !== 0) return "";
2425
- return script.slice(bodyStart, i).replace(/^\n/, "").replace(/\n\s*$/, "");
2426
- }
2427
- function replacePlaceholders(body, params) {
2428
- let result = body;
2429
- for (const [key, value] of Object.entries(params)) if (hasEnvRef(value)) {
2430
- const expr = envRefsToJsExpression(value);
2431
- const re = new RegExp(`(["'])\\{\\{${escapeRegExp(key)}\\}\\}\\1`, "g");
2432
- result = result.replace(re, expr);
2433
- result = result.replaceAll(`{{${key}}}`, value);
2434
- } else result = result.replaceAll(`{{${key}}}`, value);
2435
- return result;
2436
- }
2437
- function escapeRegExp(s) {
2438
- return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
2439
- }
2440
- async function runVitest$1(scriptPath, agentBrowserSession) {
2866
+ async function runVitest(scriptPath, agentBrowserSession) {
2441
2867
  const { exitCode, stdout, stderr } = await spawnVitestTeed([
2442
2868
  "run",
2443
2869
  "--config",
@@ -2454,513 +2880,89 @@ async function runVitest$1(scriptPath, agentBrowserSession) {
2454
2880
  currentScript
2455
2881
  };
2456
2882
  }
2457
- async function cleanupActions$1(actions, model) {
2458
- try {
2459
- const { result, isError } = await invokeClaudeStreaming({
2460
- prompt: buildCleanupPrompt(actions),
2461
- disableBuiltinTools: true,
2462
- maxTurns: 1,
2463
- model
2464
- }, () => {});
2465
- if (isError || !result) return actions;
2466
- const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
2467
- const parsed = JSON.parse(json);
2468
- if (Array.isArray(parsed) && parsed.length > 0) return parsed;
2469
- } catch {}
2470
- return actions;
2883
+ async function cleanupActions(actions, model) {
2884
+ const cleaned = await cleanupActions$1(actions, model);
2885
+ return cleaned === actions ? actions : reattachStepIds(cleaned, actions);
2471
2886
  }
2472
- //#endregion
2473
- //#region src/cli/run.ts
2474
- const USER_VITEST_CONFIG = resolve(".ccqa/vitest.config.ts");
2475
- async function resolveVitestConfig() {
2476
- try {
2477
- await access(USER_VITEST_CONFIG);
2478
- return USER_VITEST_CONFIG;
2479
- } catch {
2480
- return bundledVitestConfigPath();
2887
+ /**
2888
+ * The Claude cleanup pass returns a pruned array without the `stepId` field
2889
+ * (the prompt deliberately doesn't expose it — that would make the prompt
2890
+ * easier to misformat). Re-attach stepIds here by replaying the cleaned
2891
+ * stream against the original and matching the next compatible action.
2892
+ *
2893
+ * Algorithm: walk both arrays in lockstep. For each cleaned action, scan
2894
+ * forward in `original` (from the last-matched cursor) for the next entry
2895
+ * with the same `command` + `selector` + `value` + `assertType` shape, and
2896
+ * borrow its `stepId`. Cleaned actions Claude invented from thin air (rare,
2897
+ * and explicitly forbidden by the prompt) end up with no stepId — codegen
2898
+ * just won't emit a step marker for that index, which is the same outcome
2899
+ * as a wholly stepId-less actions.json.
2900
+ *
2901
+ * The matching is forward-only so that if cleanup keeps two identical fills
2902
+ * (e.g. typing the same value twice intentionally), they're paired to the
2903
+ * first and second occurrence in the original — not both to the first.
2904
+ */
2905
+ function reattachStepIds(cleaned, original) {
2906
+ let cursor = 0;
2907
+ const out = [];
2908
+ for (const c of cleaned) {
2909
+ let matched = null;
2910
+ for (let i = cursor; i < original.length; i++) if (sameShape(c, original[i])) {
2911
+ matched = original[i];
2912
+ cursor = i + 1;
2913
+ break;
2914
+ }
2915
+ if (matched?.stepId) out.push({
2916
+ ...c,
2917
+ stepId: matched.stepId
2918
+ });
2919
+ else out.push(c);
2481
2920
  }
2921
+ return out;
2482
2922
  }
2483
- const runCommand = new Command("run").argument("[target]", "Spec to run: '<feature>/<spec>', '<feature>', or omit for all").description("Run generated agent-browser test scripts").action(async (target) => {
2484
- await runTests(target);
2485
- });
2486
- async function runTests(target) {
2487
- header("run", target);
2488
- const specs = await resolveSpecs(target);
2489
- if (specs.length === 0) {
2490
- error("no test scripts found");
2491
- hint("run 'ccqa generate <feature>/<spec>' first to generate tests");
2492
- process.exit(1);
2493
- }
2494
- const tmpDir = await mkdtemp(join(tmpdir(), "ccqa-run-"));
2495
- const summaries = [];
2496
- let overallExitCode = 0;
2497
- const vitestConfig = await resolveVitestConfig();
2498
- try {
2499
- for (let i = 0; i < specs.length; i++) {
2500
- const { featureName, specName } = specs[i];
2501
- const scriptFile = await getTestScript(featureName, specName);
2502
- if (!scriptFile) {
2503
- warn(`${featureName}/${specName}: no test.spec.ts found`);
2504
- continue;
2505
- }
2506
- run(`${featureName}/${specName}`);
2507
- meta("test", scriptFile);
2508
- blank();
2509
- const reportFile = join(tmpDir, `report-${i}.json`);
2510
- const proc = spawnVitestStreaming([
2511
- "run",
2512
- "--config",
2513
- vitestConfig,
2514
- scriptFile,
2515
- "--reporter=json",
2516
- `--outputFile.json=${reportFile}`
2517
- ]);
2518
- await Promise.all([streamFiltered(proc.stdout, process.stdout), streamFiltered(proc.stderr, process.stderr)]);
2519
- const exitCode = await proc.exited;
2520
- if (exitCode !== 0) overallExitCode = exitCode;
2521
- const report = await readReport(reportFile);
2522
- summaries.push({
2523
- featureName,
2524
- specName,
2525
- scriptFile,
2526
- report,
2527
- exitCode
2528
- });
2529
- blank();
2530
- }
2531
- printSummary(summaries);
2532
- } finally {
2533
- await rm(tmpDir, {
2534
- recursive: true,
2535
- force: true
2536
- });
2537
- }
2538
- process.exit(overallExitCode);
2539
- }
2540
- async function readReport(path) {
2541
- try {
2542
- const raw = await readFile(path, "utf8");
2543
- return JSON.parse(raw);
2544
- } catch {
2545
- return null;
2546
- }
2547
- }
2548
- const useColor = process.stdout.isTTY && process.env.NO_COLOR == null;
2549
- const C = {
2550
- reset: useColor ? "\x1B[0m" : "",
2551
- bold: useColor ? "\x1B[1m" : "",
2552
- dim: useColor ? "\x1B[2m" : "",
2553
- green: useColor ? "\x1B[32m" : "",
2554
- red: useColor ? "\x1B[31m" : "",
2555
- yellow: useColor ? "\x1B[33m" : "",
2556
- cyan: useColor ? "\x1B[36m" : "",
2557
- gray: useColor ? "\x1B[90m" : ""
2558
- };
2559
- function printSummary(summaries) {
2560
- process.stdout.write(`\n${C.cyan}${C.bold}──────── ccqa summary ────────${C.reset}\n\n`);
2561
- let totalTests = 0;
2562
- let totalPassed = 0;
2563
- let totalFailed = 0;
2564
- let totalSkipped = 0;
2565
- for (const s of summaries) {
2566
- const header = `${C.bold}${s.featureName}/${s.specName}${C.reset}`;
2567
- if (!s.report) {
2568
- const icon = s.exitCode === 0 ? `${C.green}✔${C.reset}` : `${C.red}✖${C.reset}`;
2569
- process.stdout.write(`${icon} ${header} ${C.dim}(no report)${C.reset}\n`);
2570
- continue;
2571
- }
2572
- totalTests += s.report.numTotalTests;
2573
- totalPassed += s.report.numPassedTests;
2574
- totalFailed += s.report.numFailedTests;
2575
- totalSkipped += s.report.numPendingTests;
2576
- const ok = s.report.success;
2577
- const icon = ok ? `${C.green}✔${C.reset}` : `${C.red}✖${C.reset}`;
2578
- const countColor = ok ? C.green : C.red;
2579
- process.stdout.write(`${icon} ${header} ${countColor}${s.report.numPassedTests}/${s.report.numTotalTests}${C.reset} ${C.dim}passed${C.reset}\n`);
2580
- for (const file of s.report.testResults) for (const a of file.assertionResults) {
2581
- const aIcon = assertionIcon(a.status);
2582
- const dur = a.duration != null ? ` ${C.gray}${formatDuration(a.duration)}${C.reset}` : "";
2583
- process.stdout.write(` ${aIcon} ${a.fullName}${dur}\n`);
2584
- if (a.status === "failed" && a.failureMessages?.length) for (const msg of a.failureMessages) {
2585
- const firstLine = msg.split("\n")[0] ?? msg;
2586
- process.stdout.write(` ${C.red}${firstLine}${C.reset}\n`);
2587
- }
2588
- }
2589
- }
2590
- const specsPassed = summaries.filter((s) => s.exitCode === 0).length;
2591
- const specsFailed = summaries.filter((s) => s.exitCode !== 0).length;
2592
- process.stdout.write("\n");
2593
- process.stdout.write(` ${C.bold}Specs${C.reset} ${summaries.length} (${C.green}${specsPassed} passed${C.reset}, ${specsFailed > 0 ? C.red : C.dim}${specsFailed} failed${C.reset})\n`);
2594
- process.stdout.write(` ${C.bold}Tests${C.reset} ${totalTests} (${C.green}${totalPassed} passed${C.reset}, ${totalFailed > 0 ? C.red : C.dim}${totalFailed} failed${C.reset}, ${C.yellow}${totalSkipped} skipped${C.reset})\n`);
2595
- process.stdout.write("\n");
2596
- }
2597
- function assertionIcon(status) {
2598
- switch (status) {
2599
- case "passed": return `${C.green}✔${C.reset}`;
2600
- case "failed": return `${C.red}✖${C.reset}`;
2601
- case "skipped":
2602
- case "pending":
2603
- case "todo": return `${C.yellow}◌${C.reset}`;
2604
- }
2605
- }
2606
- function formatDuration(ms) {
2607
- if (ms < 1e3) return `${Math.round(ms)}ms`;
2608
- return `${(ms / 1e3).toFixed(2)}s`;
2609
- }
2610
- const NOISE_LINE_PATTERNS = [/^JSON report written to /];
2611
- async function streamFiltered(source, sink) {
2612
- source.setEncoding("utf8");
2613
- let buffer = "";
2614
- for await (const chunk of source) {
2615
- buffer += chunk;
2616
- let nl = buffer.indexOf("\n");
2617
- while (nl !== -1) {
2618
- const line = buffer.slice(0, nl);
2619
- buffer = buffer.slice(nl + 1);
2620
- if (!NOISE_LINE_PATTERNS.some((p) => p.test(line))) sink.write(line + "\n");
2621
- nl = buffer.indexOf("\n");
2622
- }
2623
- }
2624
- if (buffer.length > 0 && !NOISE_LINE_PATTERNS.some((p) => p.test(buffer))) sink.write(buffer);
2625
- }
2626
- async function resolveSpecs(target) {
2627
- if (!target) return listAllSpecs();
2628
- if (target.includes("/")) {
2629
- const { featureName, specName } = parseSpecPath(target);
2630
- return [{
2631
- featureName,
2632
- specName
2633
- }];
2634
- }
2635
- return (await listSpecsForFeature(target)).map((specName) => ({
2636
- featureName: target,
2637
- specName
2638
- }));
2923
+ function sameShape(a, b) {
2924
+ return a.command === b.command && (a.selector ?? "") === (b.selector ?? "") && (a.value ?? "") === (b.value ?? "") && (a.assertType ?? "") === (b.assertType ?? "");
2639
2925
  }
2640
2926
  //#endregion
2641
- //#region src/cli/trace-setup.ts
2642
- const traceSetupCommand = new Command("trace-setup").argument("<name>", "Setup name to trace (e.g. login)").description("Trace a setup procedure using dummy placeholder values").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").action(async (name, opts) => {
2643
- await runTraceSetup(name, opts.model);
2644
- });
2645
- async function runTraceSetup(name, model) {
2646
- header("trace-setup", name);
2647
- try {
2648
- meta("agent-browser", assertAgentBrowserAvailable());
2649
- } catch (e) {
2650
- if (e instanceof AgentBrowserUnavailableError) {
2651
- error(formatAgentBrowserUnavailableMessage());
2652
- process.exit(1);
2653
- }
2654
- throw e;
2655
- }
2656
- await ensureCcqaDir();
2657
- const spec = parseSetupSpec(await readSetupSpecFile(name));
2658
- const resolvedSpec = replacePlaceholdersWithDummies(spec);
2659
- const secretsToScrub = buildSecretsToScrub(spec);
2660
- meta("setup", spec.title);
2661
- meta("steps", spec.steps.length);
2662
- if (spec.placeholders) meta("placeholders", Object.keys(spec.placeholders).join(", "));
2663
- blank();
2664
- const systemPrompt = buildSetupTraceSystemPrompt(resolvedSpec);
2665
- const prompt = buildSetupTracePrompt(resolvedSpec);
2666
- info("Running agent-browser session...");
2667
- blank();
2668
- const routeSteps = [];
2669
- let overallStatus = "passed";
2670
- const traceActions = [];
2671
- const { isError } = await invokeClaudeStreaming({
2672
- prompt,
2673
- systemPrompt,
2674
- allowedTools: [
2675
- "Bash(*)",
2676
- "Read",
2677
- "Grep",
2678
- "Glob"
2679
- ],
2680
- env: {
2681
- PATH: pathWithAgentBrowserShim(process.env["PATH"]),
2682
- ANTHROPIC_API_KEY: ""
2683
- },
2684
- model,
2685
- onAbAction: (abAction) => {
2686
- const action = parseAbAction(scrubSecrets(abAction, secretsToScrub));
2687
- if (action) traceActions.push(action);
2688
- },
2689
- onAbActionFailed: () => {
2690
- traceActions.pop();
2691
- }
2692
- }, (msg) => {
2693
- if (msg.type !== "assistant") return;
2694
- for (const block of msg.message.content ?? []) {
2695
- if (block.type !== "text" || !block.text) continue;
2696
- const text = block.text;
2697
- const statusLine = parseStatusLine(text);
2698
- if (statusLine) step(statusLine.type, statusLine.stepId, statusLine.detail);
2699
- for (const line of text.split("\n")) {
2700
- const trimmed = line.trim();
2701
- if (trimmed.startsWith("ROUTE_STEP|")) {
2702
- const routeStep = parseRouteStep(trimmed);
2703
- if (routeStep) {
2704
- routeSteps.push(routeStep);
2705
- if (routeStep.status === "FAILED") overallStatus = "failed";
2706
- }
2707
- } else if (trimmed.startsWith("AB_ACTION|snapshot|") || trimmed.startsWith("AB_ACTION|assert|")) {
2708
- const action = parseAbAction(scrubSecrets(trimmed, secretsToScrub));
2709
- if (action) traceActions.push(action);
2710
- }
2711
- }
2712
- }
2713
- });
2714
- if (isError) overallStatus = "failed";
2715
- const route = {
2716
- specName: name,
2717
- timestamp: (/* @__PURE__ */ new Date()).toISOString(),
2718
- status: overallStatus,
2719
- steps: routeSteps
2720
- };
2721
- const [routePath, actionsPath] = await Promise.all([saveSetupRoute(name, route), saveSetupActions(name, traceActions)]);
2722
- blank();
2723
- meta("route", routePath);
2724
- meta("saved", actionsPath);
2725
- meta("actions", traceActions.length);
2726
- meta("status", overallStatus.toUpperCase());
2727
- hint(`run 'ccqa generate-setup ${name}' to generate and validate the setup`);
2728
- }
2729
- function replacePlaceholdersWithDummies(spec) {
2730
- if (!spec.placeholders) return spec;
2731
- const dummies = spec.placeholders;
2732
- const resolve = (text) => {
2733
- let result = text;
2734
- for (const [key, def] of Object.entries(dummies)) result = result.replaceAll(`{{${key}}}`, resolveEnvRefs(def.dummy));
2735
- return result;
2736
- };
2737
- return {
2738
- ...spec,
2739
- steps: spec.steps.map((step) => ({
2740
- ...step,
2741
- instruction: resolve(step.instruction),
2742
- expected: resolve(step.expected)
2743
- }))
2744
- };
2745
- }
2927
+ //#region src/claude/extract-json.ts
2746
2928
  /**
2747
- * Build the substitution map used to scrub real secret values out of
2748
- * recorded actions before they are written to actions.json.
2749
- *
2750
- * For each placeholder whose dummy contains env refs, store
2751
- * <resolved-value> -> <original ${VAR} string>
2752
- * so that an `ab fill ... <secret>` line records the placeholder string
2753
- * instead of the secret. Empty resolved values are skipped — they would
2754
- * otherwise replace incidental empty strings in the recorded actions.
2929
+ * Pulls a JSON object out of a Claude completion. Accepts either a fenced
2930
+ * ```json block or a bare `{...}` payload that constitutes the whole reply.
2931
+ * Returns null when neither shape is present.
2755
2932
  */
2756
- function buildSecretsToScrub(spec) {
2757
- const map = /* @__PURE__ */ new Map();
2758
- if (!spec.placeholders) return map;
2759
- const dummies = spec.placeholders;
2760
- for (const def of Object.values(dummies)) {
2761
- if (!hasEnvRef(def.dummy)) continue;
2762
- const resolved = resolveEnvRefs(def.dummy);
2763
- if (!resolved) continue;
2764
- map.set(resolved, def.dummy);
2765
- }
2766
- return map;
2767
- }
2768
- /** Replace every occurrence of a recorded secret with its `${VAR}` placeholder. */
2769
- function scrubSecrets(line, secrets) {
2770
- if (secrets.size === 0) return line;
2771
- let result = line;
2772
- for (const [secret, placeholder] of secrets) {
2773
- if (!result.includes(secret)) continue;
2774
- result = result.split(secret).join(placeholder);
2775
- }
2776
- return result;
2933
+ function extractJsonBlock(text) {
2934
+ const fenced = text.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
2935
+ if (fenced && fenced[1]) return fenced[1].trim();
2936
+ const trimmed = text.trim();
2937
+ if (trimmed.startsWith("{") && trimmed.endsWith("}")) return trimmed;
2938
+ return null;
2777
2939
  }
2778
2940
  //#endregion
2779
- //#region src/cli/generate-setup.ts
2780
- const generateSetupCommand = new Command("generate-setup").argument("<name>", "Setup name to generate (e.g. login)").description("Clean up, validate, and templatize setup actions").option("--max-retries <n>", "Maximum number of auto-fix retries", "3").option("--from-dummy", "Resume from existing test.dummy.spec.ts (after manual fix)").option("--auto", "Apply auto-fixes without confirmation regardless of confidence (CI use)").option("--no-interactive", "Never prompt; only auto-apply when confidence is high, otherwise give up").option("--language <bcp47>", "Language for diagnose reasoning / hint text (e.g. 'en', 'ja')", "en").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").action(async (name, opts) => {
2781
- const mode = resolveMode(opts);
2782
- await runGenerateSetup(name, parseInt(opts.maxRetries, 10), opts.fromDummy ?? false, mode, opts.language ?? "en", opts.model);
2783
- });
2784
- async function runGenerateSetup(name, maxRetries, fromDummy, mode, outputLanguage, model) {
2785
- header("generate-setup", name);
2786
- await ensureCcqaDir();
2787
- const specContent = await readSetupSpecFile(name);
2788
- const spec = parseSetupSpec(specContent);
2789
- const dummyPath = join(getSetupDir(name), "test.dummy.spec.ts");
2790
- const finalPath = join(getSetupDir(name), "test.spec.ts");
2791
- let cleanedActions = [];
2792
- if (fromDummy) {
2793
- if (!await stat(dummyPath).then(() => true).catch(() => false)) {
2794
- warn(`test.dummy.spec.ts not found. Run without --from-dummy first.`);
2795
- process.exit(1);
2796
- }
2797
- info("Resuming from existing test.dummy.spec.ts");
2798
- } else {
2799
- const { actions } = await getSetupActions(name);
2800
- meta("setup", spec.title);
2801
- meta("actions", actions.length);
2802
- meta("fix-mode", mode);
2803
- meta("language", outputLanguage);
2804
- blank();
2805
- cleanedActions = await cleanupActions(actions, model);
2806
- if (cleanedActions.length !== actions.length) meta("cleaned", cleanedActions.length);
2807
- await writeFile(dummyPath, actionsToScript(cleanedActions, spec.title), "utf-8");
2808
- meta("saved", dummyPath);
2809
- }
2810
- blank();
2811
- const agentBrowserSession = `ccqa-generate-setup-${name}-${Date.now()}`;
2812
- const runVitestForSession = (path) => runVitestResolved(path, agentBrowserSession);
2813
- await closeSession(agentBrowserSession);
2814
- const signalHandler = () => {
2815
- closeSession(agentBrowserSession).finally(() => process.exit(130));
2816
- };
2817
- process.once("SIGINT", signalHandler);
2818
- process.once("SIGTERM", signalHandler);
2819
- try {
2820
- const initialRun = await timedPhase("vitest run #1", () => runVitestForSession(dummyPath), "run");
2821
- let passed = initialRun.exitCode === 0;
2822
- if (!passed) passed = await runAutoFixLoop({
2823
- scriptPath: dummyPath,
2824
- initialRun,
2825
- specMarkdown: specContent,
2826
- actions: cleanedActions,
2827
- maxRetries,
2828
- mode,
2829
- runVitest: runVitestForSession,
2830
- agentBrowserSession,
2831
- outputLanguage,
2832
- model
2833
- });
2834
- if (!passed) {
2835
- warn("auto-fix exhausted; setup test still failing");
2836
- hint(`edit ${dummyPath} manually, then run: ccqa generate-setup ${name} --from-dummy`);
2837
- process.exit(1);
2838
- }
2839
- await writeFile(finalPath, reversePlaceholdersInScript(await readFile(dummyPath, "utf8"), spec.placeholders), "utf-8");
2840
- await unlink(dummyPath).catch(() => {});
2841
- blank();
2842
- meta("saved", finalPath);
2843
- hint(`setup '${name}' is ready; reference it in test-spec.md with setups: [{name: ${name}, params: {...}}]`);
2844
- } finally {
2845
- process.off("SIGINT", signalHandler);
2846
- process.off("SIGTERM", signalHandler);
2847
- await closeSession(agentBrowserSession);
2848
- }
2941
+ //#region src/prompts/draft.ts
2942
+ function buildNamingSystemPrompt() {
2943
+ return `You name a new ccqa test case based on the user's intent and the existing feature tree.
2944
+
2945
+ ccqa test cases live under \`.ccqa/features/<featureName>/test-cases/<specName>/spec.yaml\`.
2946
+
2947
+ ## Naming rules
2948
+
2949
+ - featureName and specName are kebab-case ASCII (lowercase, words separated by '-').
2950
+ - featureName: a broad area (e.g. "tasks", "auth", "billing", "search").
2951
+ - specName: a short scenario name (e.g. "create-and-complete", "login-with-email", "search-by-tag").
2952
+ - Reuse existing featureName when the user's intent fits an existing area. Only invent a new featureName when the existing tree clearly does not cover the area.
2953
+ - specName must NOT collide with an existing spec under the chosen feature. If the natural name collides, pick a different one that distinguishes the new scenario from the existing ones.
2954
+ - Use the codebase (Read/Grep/Glob) sparingly to confirm domain vocabulary if helpful. Do not over-explore.
2955
+
2956
+ ## Output (STRICT)
2957
+
2958
+ Output ONE fenced \`\`\`json block, nothing else outside it:
2959
+
2960
+ {
2961
+ "featureName": "<kebab-case>",
2962
+ "specName": "<kebab-case>",
2963
+ "reason": "<one short sentence: why this name and how it relates to existing specs>"
2849
2964
  }
2850
- /**
2851
- * Replace dummy values with {{placeholder}} directly in the test script text.
2852
- * Longer dummy values are replaced first to avoid partial matches.
2853
- */
2854
- function reversePlaceholdersInScript(script, placeholders) {
2855
- if (!placeholders) return script;
2856
- const entries = Object.entries(placeholders).sort((a, b) => b[1].dummy.length - a[1].dummy.length);
2857
- let result = script;
2858
- for (const [key, def] of entries) result = result.replaceAll(def.dummy, `{{${key}}}`);
2859
- return result;
2860
- }
2861
- async function runVitest(scriptPath, agentBrowserSession) {
2862
- const { exitCode, stdout, stderr } = await spawnVitestTeed([
2863
- "run",
2864
- "--config",
2865
- bundledVitestConfigPath(),
2866
- scriptPath
2867
- ], agentBrowserSession ? { env: {
2868
- ...process.env,
2869
- AGENT_BROWSER_SESSION: agentBrowserSession
2870
- } } : {});
2871
- const currentScript = await readFile(scriptPath, "utf8");
2872
- return {
2873
- exitCode,
2874
- output: stdout + stderr,
2875
- currentScript
2876
- };
2877
- }
2878
- /**
2879
- * Run vitest on `test.dummy.spec.ts`, but transparently expand any `${VAR}`
2880
- * env refs to real values for the duration of the run. The original file is
2881
- * preserved unchanged so subsequent reverse-replace still sees the env-ref
2882
- * literals. Auto-fix edits the original file (via writeFile in callers), so
2883
- * we always re-read it before each invocation.
2884
- */
2885
- async function runVitestResolved(scriptPath, agentBrowserSession) {
2886
- const original = await readFile(scriptPath, "utf8");
2887
- if (!hasEnvRef(original)) return runVitest(scriptPath, agentBrowserSession);
2888
- const tmpPath = scriptPath.replace(/\.ts$/, ".__resolved.spec.ts");
2889
- await writeFile(tmpPath, resolveEnvRefs(original), "utf-8");
2890
- try {
2891
- const { exitCode, stdout, stderr } = await spawnVitestTeed([
2892
- "run",
2893
- "--config",
2894
- bundledVitestConfigPath(),
2895
- tmpPath
2896
- ], agentBrowserSession ? { env: {
2897
- ...process.env,
2898
- AGENT_BROWSER_SESSION: agentBrowserSession
2899
- } } : {});
2900
- return {
2901
- exitCode,
2902
- output: stdout + stderr,
2903
- currentScript: original
2904
- };
2905
- } finally {
2906
- await unlink(tmpPath).catch(() => {});
2907
- }
2908
- }
2909
- async function cleanupActions(actions, model) {
2910
- try {
2911
- const { result, isError } = await invokeClaudeStreaming({
2912
- prompt: buildCleanupPrompt(actions),
2913
- disableBuiltinTools: true,
2914
- maxTurns: 1,
2915
- model
2916
- }, () => {});
2917
- if (isError || !result) return actions;
2918
- const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
2919
- const parsed = JSON.parse(json);
2920
- if (Array.isArray(parsed) && parsed.length > 0) return parsed;
2921
- } catch {}
2922
- return actions;
2923
- }
2924
- //#endregion
2925
- //#region src/claude/extract-json.ts
2926
- /**
2927
- * Pulls a JSON object out of a Claude completion. Accepts either a fenced
2928
- * ```json block or a bare `{...}` payload that constitutes the whole reply.
2929
- * Returns null when neither shape is present.
2930
- */
2931
- function extractJsonBlock(text) {
2932
- const fenced = text.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
2933
- if (fenced && fenced[1]) return fenced[1].trim();
2934
- const trimmed = text.trim();
2935
- if (trimmed.startsWith("{") && trimmed.endsWith("}")) return trimmed;
2936
- return null;
2937
- }
2938
- //#endregion
2939
- //#region src/prompts/draft.ts
2940
- function buildNamingSystemPrompt() {
2941
- return `You name a new ccqa test case based on the user's intent and the existing feature tree.
2942
-
2943
- ccqa test cases live under \`.ccqa/features/<featureName>/test-cases/<specName>/test-spec.md\`.
2944
-
2945
- ## Naming rules
2946
-
2947
- - featureName and specName are kebab-case ASCII (lowercase, words separated by '-').
2948
- - featureName: a broad area (e.g. "tasks", "auth", "billing", "search").
2949
- - specName: a short scenario name (e.g. "create-and-complete", "login-with-email", "search-by-tag").
2950
- - Reuse existing featureName when the user's intent fits an existing area. Only invent a new featureName when the existing tree clearly does not cover the area.
2951
- - specName must NOT collide with an existing spec under the chosen feature. If the natural name collides, pick a different one that distinguishes the new scenario from the existing ones.
2952
- - Use the codebase (Read/Grep/Glob) sparingly to confirm domain vocabulary if helpful. Do not over-explore.
2953
-
2954
- ## Output (STRICT)
2955
-
2956
- Output ONE fenced \`\`\`json block, nothing else outside it:
2957
-
2958
- {
2959
- "featureName": "<kebab-case>",
2960
- "specName": "<kebab-case>",
2961
- "reason": "<one short sentence: why this name and how it relates to existing specs>"
2962
- }
2963
- `;
2965
+ `;
2964
2966
  }
2965
2967
  function buildNamingPrompt(intent, tree) {
2966
2968
  return `## User intent
@@ -2970,7 +2972,7 @@ ${intent}
2970
2972
  ## Existing feature tree
2971
2973
 
2972
2974
  ${tree.length === 0 ? "(no existing features yet)" : tree.map((f) => {
2973
- const specLines = f.specs.length === 0 ? " (no specs yet)" : f.specs.map((s) => ` - ${s.specName}${s.title ? ` — ${s.title}` : ""}`).join("\n");
2975
+ const specLines = f.specs.length === 0 ? " (no specs yet)" : f.specs.map((s) => ` - ${s.specName}`).join("\n");
2974
2976
  return `- ${f.featureName}/\n${specLines}`;
2975
2977
  }).join("\n")}
2976
2978
 
@@ -2979,48 +2981,58 @@ ${tree.length === 0 ? "(no existing features yet)" : tree.map((f) => {
2979
2981
  Pick featureName and specName for the new test case. Follow the naming rules. Avoid colliding with any existing specName under the chosen feature.
2980
2982
  `;
2981
2983
  }
2982
- function buildDraftSystemPrompt() {
2983
- return `You are a QA engineer drafting and refining a ccqa test-spec.md.
2984
+ function buildDraftSystemPrompt(blocks) {
2985
+ return `You are a QA engineer drafting and refining a ccqa spec.yaml.
2984
2986
 
2985
2987
  The CLI runs you in a loop: each turn the user gives an intent (first run) or a refinement instruction (later runs). You read the codebase, validate the spec, and return a single JSON report. The CLI displays a diff and asks the user whether to apply.
2986
2988
 
2987
- ## test-spec.md format (STRICT)
2989
+ ## spec.yaml format (STRICT)
2988
2990
 
2989
- YAML frontmatter + Markdown body.
2991
+ Pure YAML no markdown body, no frontmatter dashes.
2990
2992
 
2991
- Frontmatter fields:
2992
- - title: string (required)
2993
- - baseUrl: string (required, e.g. http://localhost:3000)
2994
- - prerequisites: string (optional, free text)
2995
- - setups: array of { name: string, params?: Record<string,string> } (optional)
2996
- - relatedPaths: array of string (optional) — glob patterns identifying source files this spec depends on. Used by \`ccqa drift --changed\` in CI to skip drift checks for unrelated changes.
2993
+ Top-level fields:
2994
+ - \`title\`: string (required) — short human-readable name for the test
2995
+ - \`relatedPaths\`: array of glob string (optional) — source files this spec depends on, used by \`ccqa drift --changed\`
2996
+ - \`steps\`: array (required, at least one)
2997
2997
 
2998
- Body must contain a \`## Steps\` section followed by step blocks:
2998
+ A step is one of two shapes:
2999
2999
 
3000
+ **Action step** — a user-facing browser interaction:
3001
+ \`\`\`yaml
3002
+ - instruction: <imperative; include the URL directly or via \${ENV_VAR}>
3003
+ expected: <observable outcome — visible text, URL pattern, element state>
3000
3004
  \`\`\`
3001
- ### Step 1: <short title>
3002
- - **Instruction**: <imperative, one sentence>
3003
- - **Expected**: <observable outcome>
3004
3005
 
3005
- ### Step 2: <short title>
3006
- ...
3006
+ **Include step** invoke a reusable block from \`.ccqa/blocks/<name>/spec.yaml\`:
3007
+ \`\`\`yaml
3008
+ - include: <block-name>
3009
+ params:
3010
+ <param-name>: <string value, can use \${ENV_VAR}>
3007
3011
  \`\`\`
3008
3012
 
3013
+ ## URLs
3014
+
3015
+ Each step writes the URL it opens directly inside \`instruction\` (e.g. \`"\${APP_URL}/articles を開く"\`). Use \`\${ENV_VAR}\` references for environment-specific values.
3016
+
3017
+ ## Available blocks
3018
+
3019
+ ${formatBlockList(blocks)}
3020
+
3009
3021
  ## Quality rules
3010
3022
 
3011
3023
  - One user-facing action per step (login, click, fill, navigate, ...).
3012
- - **Expected** must be assertion-friendly: visible text, URL pattern, element state.
3013
- - Forbidden in **Expected**: timestamps, exact counts, session IDs, internal state.
3024
+ - \`expected\` must be assertion-friendly: visible text, URL pattern, element state.
3025
+ - Forbidden in \`expected\`: timestamps, exact counts, session IDs, internal state.
3014
3026
  - 3–8 steps is typical. Fewer means too coarse; more means too fine.
3015
3027
 
3016
3028
  ## Workflow (use Read / Grep / Glob extensively)
3017
3029
 
3018
- 1. Read the codebase under cwd to find concrete strings: routes, button labels, aria-labels, page titles, placeholders. Use those exact strings in **Expected**.
3019
- 2. If the spec references setups, Read \`.ccqa/setups/<name>/setup-spec.md\` and verify each \`params\` key matches the setup's \`placeholders\`.
3020
- 3. Populate \`relatedPaths\` in the frontmatter with **provisional** glob patterns pointing at the source files this spec touches: the route/page file for each URL the spec visits, plus the component files (or their parent feature directory) that render the aria-labels, placeholders, or visible texts the spec asserts on. Prefer directory globs (e.g. \`src/features/tasks/**\`) when several files in one area are involved. Be conservative — include a path if you're unsure rather than omit it. \`ccqa trace\` will refine this list later from real browser observations.
3030
+ 1. Read the codebase under cwd to find concrete strings: routes, button labels, aria-labels, page titles, placeholders. Use those exact strings in \`expected\`.
3031
+ 2. If you use \`include:\` steps, verify each \`params\` key matches a declared param of the block (see the Available blocks list above).
3032
+ 3. Populate \`relatedPaths\` with **provisional** glob patterns pointing at the source files this spec touches: the route/page file for each URL the spec visits, plus the component files (or their parent feature directory) that render the aria-labels, placeholders, or visible texts the spec asserts on. Prefer directory globs (e.g. \`src/features/tasks/**\`) when several files in one area are involved. Be conservative — include a path if you're unsure rather than omit it. \`ccqa trace\` will refine this list later from real browser observations.
3021
3033
  4. Validate the (current or proposed) spec on four axes — emit one issue per finding:
3022
- - **assertable**: each Expected can be verified against a string/URL/state that exists in code.
3023
- - **setups**: referenced setup exists; params keys match placeholders.
3034
+ - **assertable**: each \`expected\` can be verified against a string/URL/state that exists in code.
3035
+ - **blocks**: every \`include\` resolves to a real block; every \`params\` key is declared on that block; every required param is provided.
3024
3036
  - **granularity**: not too coarse (multiple actions per step) nor too fine (snapshot-only steps); order is logical.
3025
3037
  - **unimplemented**: any feature mentioned in the spec that you cannot find in code.
3026
3038
 
@@ -3035,13 +3047,13 @@ Schema:
3035
3047
  "issues": [
3036
3048
  {
3037
3049
  "severity": "OK" | "WARN" | "ERROR",
3038
- "category": "assertable" | "setups" | "granularity" | "unimplemented",
3050
+ "category": "assertable" | "blocks" | "granularity" | "unimplemented",
3039
3051
  "stepId": "step-01" | null,
3040
3052
  "message": "<one-line summary>",
3041
3053
  "detail": "<optional, multiline explanation>"
3042
3054
  }
3043
3055
  ],
3044
- "patch": "<COMPLETE rewritten test-spec.md, or empty string if no changes>"
3056
+ "patch": "<COMPLETE rewritten spec.yaml, or empty string if no changes>"
3045
3057
  }
3046
3058
  \`\`\`
3047
3059
 
@@ -3049,17 +3061,25 @@ Schema:
3049
3061
 
3050
3062
  - \`patch\` must be the COMPLETE file content if non-empty (never a diff fragment).
3051
3063
  - The CLI replaces the file atomically with \`patch\`.
3064
+ - The patch must be valid YAML matching the schema above. The CLI re-parses it before applying; if it fails validation, the patch is rejected.
3052
3065
  - For **create** mode: produce a fresh spec from the user intent.
3053
3066
  - For **refine** mode with a non-empty user instruction: apply the user's request, plus fix any issues it introduces. Preserve the user's wording elsewhere.
3054
3067
  - For **refine** mode with an empty user instruction: only fix issues you find against the current spec; if everything is fine, return \`patch: ""\`.
3055
3068
  - If \`patch\` is the same as the current spec, return \`patch: ""\` instead.
3056
3069
  `;
3057
3070
  }
3071
+ function formatBlockList(blocks) {
3072
+ if (blocks.length === 0) return "(no blocks defined yet — only action steps are available.)";
3073
+ return blocks.map((b) => {
3074
+ const paramLines = b.params.length === 0 ? " params: (none)" : b.params.map((p) => ` - ${p.name}${p.required ? "" : " (optional)"}${p.secret ? " [secret]" : ""}`).join("\n");
3075
+ return `- \`${b.name}\` — ${b.title}\n${paramLines}`;
3076
+ }).join("\n");
3077
+ }
3058
3078
  function buildDraftPrompt(input) {
3059
3079
  const { mode, existing, userInput } = input;
3060
3080
  if (mode === "create") return `## Mode
3061
3081
 
3062
- create — no spec exists yet at the target path. Produce a fresh test-spec.md.
3082
+ create — no spec exists yet at the target path. Produce a fresh spec.yaml.
3063
3083
 
3064
3084
  ## User intent
3065
3085
 
@@ -3067,7 +3087,7 @@ ${userInput}
3067
3087
 
3068
3088
  ## Task
3069
3089
 
3070
- Read the codebase under cwd. Discover concrete strings (routes, labels, titles). Produce a complete test-spec.md as the \`patch\` field, plus any issues you'd flag about your own draft.
3090
+ Read the codebase under cwd. Discover concrete strings (routes, labels, titles). Produce a complete spec.yaml as the \`patch\` field, plus any issues you'd flag about your own draft.
3071
3091
  `;
3072
3092
  return `## Mode
3073
3093
 
@@ -3075,47 +3095,76 @@ refine — a spec already exists. Apply the user's instruction (if any) and vali
3075
3095
 
3076
3096
  ## Current spec
3077
3097
 
3078
- \`\`\`markdown
3098
+ \`\`\`yaml
3079
3099
  ${existing}\`\`\`
3080
3100
 
3081
- ${userInput ? `## User refinement instruction\n\n${userInput}\n` : `## User refinement instruction\n\n(empty — re-validate the current spec against the codebase; only emit a non-empty patch if something is actually wrong)\n`}
3082
- ## Task
3101
+ ${userInput ? `## User refinement instruction\n\n${userInput}\n` : `## User refinement instruction\n\n(empty — re-validate the current spec against the codebase; only emit a non-empty patch if something is actually wrong)\n`}## Task
3083
3102
 
3084
- 1. Read the codebase under cwd and any referenced setups.
3103
+ 1. Read the codebase under cwd and any referenced blocks (\`.ccqa/blocks/<name>/spec.yaml\`).
3085
3104
  2. If the user's instruction is non-empty, apply it to the spec.
3086
3105
  3. Validate the resulting spec on the four axes. Emit issues.
3087
3106
  4. Return the complete updated spec as \`patch\`. If no changes are needed, return \`patch: ""\`.
3088
3107
  `;
3089
3108
  }
3090
3109
  //#endregion
3110
+ //#region src/prompts/drift.ts
3111
+ function buildDriftSystemPrompt(blocks) {
3112
+ return `${buildDraftSystemPrompt(blocks)}
3113
+
3114
+ ## Drift mode
3115
+
3116
+ You are running non-interactively in CI. The user will not see or apply the patch — only the \`issues\` array.
3117
+
3118
+ - Always set \`patch\` to "" in your response.
3119
+ - Focus issue messages on what is **out of sync** between the spec and the current codebase: missing aria-labels, renamed routes, removed buttons, placeholders that no longer exist, include references that point to non-existent blocks.
3120
+ - Do NOT raise issues about stylistic preferences in the spec wording.
3121
+ - Treat \`category: unimplemented\` as the primary signal for drift: anything the spec asserts that you cannot find in code is a drift finding.
3122
+
3123
+ ## Drift severity policy (STRICT)
3124
+
3125
+ The CLI exits non-zero when any issue has \`severity: "ERROR"\` (default) or — with \`--severity warn\` — when any \`WARN\` is present. Pick severity by **whether a deterministic replay of this spec would fail today**, not by how confident you are in your own analysis.
3126
+
3127
+ ### CRITICAL: spec ↔ source mismatch is ERROR, not "vague phrasing" WARN
3128
+
3129
+ The most common false negative is treating a concrete spec/source mismatch as a WARN about "expected phrasing." It is not. Apply this decision rule **before** picking severity:
3130
+
3131
+ 1. **Pick the concrete strings the spec asserts** in each step's \`expected\` (visible text, aria-labels, button labels, route paths). For \`expected\` like "the Dashboard page is visible", the spec is asserting that the literal string "Dashboard" — or the page conceptually identified by that label — is rendered.
3132
+ 2. **Search the source** for those exact strings (\`Grep\` / \`Read\`) at the location the step references (the relevant page/component/route).
3133
+ 3. Classify:
3134
+ - **ERROR** — the source instead renders a *different* string in that location (e.g. spec says "Dashboard", the breadcrumb in \`DashboardPage.tsx\` now renders "Overview"). A replay against the current source would fail; a replay against a stale staging environment would pass and *hide* the drift — exactly the case drift CI exists to catch. Cite both sides in \`detail\`: the spec line and the file:line of the source mismatch.
3135
+ - **WARN (vague phrasing)** — the source's actual string IS present somewhere relevant; the \`expected\` just paraphrases it more loosely (e.g. spec says "the Save button is visible" and the source has both visible "Save" text and \`aria-label="Save"\`). Replay still passes; the spec could just be tightened.
3136
+ - **OK** — the spec's exact string appears in source at the relevant location.
3137
+
3138
+ Use **ERROR** when the spec would break on replay:
3139
+ - A selector the spec relies on (\`aria-label\`, \`placeholder\`, \`data-testid\`, button text) **does not exist anywhere in the source**.
3140
+ - A URL / route the spec navigates to is no longer defined.
3141
+ - An \`expected\` asserts a string or visible text that is no longer rendered by the relevant component.
3142
+ - The source renders a *different* string in the place the spec describes (per the decision rule above).
3143
+ - An \`include\` step references a block that does not exist under \`.ccqa/blocks/<name>/spec.yaml\`, or a \`params\` key is not declared on that block.
3144
+ - The spec references a feature/page that has been removed from the codebase.
3145
+
3146
+ Use **WARN** when the spec is still likely to work, but quality could improve:
3147
+ - The \`expected\` paraphrases a string that **still exists** in source (the literal target is findable, just imprecisely worded).
3148
+ - A step bundles multiple actions, or a needed intermediate verification step is missing.
3149
+ - Stable signals exist that the spec could leverage but currently doesn't.
3150
+ - You are unsure whether a referenced string exists (give the user the benefit of the doubt; do not hard-fail CI on uncertainty).
3151
+
3152
+ Use **OK** for axes you actively verified and found no issue.
3153
+
3154
+ If you cannot decide between ERROR and WARN, choose WARN. Reserve ERROR for findings you can back up with a specific file path or grep result that proves the drift.
3155
+
3156
+ Conversely: when you DO have a citation showing a concrete spec/source mismatch (per the decision rule above), you MUST use ERROR — "vague phrasing" WARN is not a safe fallback for an actual drift.
3157
+ `;
3158
+ }
3159
+ function buildDriftUserPrompt(existing) {
3160
+ return buildDraftPrompt({
3161
+ mode: "refine",
3162
+ existing,
3163
+ userInput: ""
3164
+ });
3165
+ }
3166
+ //#endregion
3091
3167
  //#region src/types.ts
3092
- const TestStepSchema = z.object({
3093
- id: z.string(),
3094
- title: z.string(),
3095
- instruction: z.string(),
3096
- expected: z.string()
3097
- });
3098
- const SetupRefSchema = z.object({
3099
- name: z.string(),
3100
- params: z.record(z.string(), z.string()).optional()
3101
- });
3102
- z.object({
3103
- title: z.string(),
3104
- baseUrl: z.string(),
3105
- prerequisites: z.string().optional(),
3106
- setups: z.array(SetupRefSchema).optional(),
3107
- relatedPaths: z.array(z.string()).optional(),
3108
- steps: z.array(TestStepSchema)
3109
- });
3110
- const PlaceholderDefSchema = z.object({
3111
- dummy: z.string(),
3112
- description: z.string().optional()
3113
- });
3114
- z.object({
3115
- title: z.string(),
3116
- placeholders: z.record(z.string(), PlaceholderDefSchema).optional(),
3117
- steps: z.array(TestStepSchema)
3118
- });
3119
3168
  const RouteStepSchema = z.object({
3120
3169
  title: z.string(),
3121
3170
  action: z.string(),
@@ -3141,7 +3190,7 @@ const DraftIssueSchema = z.object({
3141
3190
  ]),
3142
3191
  category: z.enum([
3143
3192
  "assertable",
3144
- "setups",
3193
+ "blocks",
3145
3194
  "granularity",
3146
3195
  "unimplemented"
3147
3196
  ]),
@@ -3153,20 +3202,499 @@ const DraftReportSchema = z.object({
3153
3202
  issues: z.array(DraftIssueSchema),
3154
3203
  patch: z.string()
3155
3204
  });
3205
+ const DRAFT_CATEGORY_LABEL = {
3206
+ assertable: "Assertability",
3207
+ blocks: "Block references",
3208
+ granularity: "Step granularity",
3209
+ unimplemented: "Unimplemented checks"
3210
+ };
3156
3211
  const DraftNamingSchema = z.object({
3157
3212
  featureName: z.string().min(1),
3158
3213
  specName: z.string().min(1),
3159
3214
  reason: z.string().optional()
3160
3215
  });
3161
3216
  //#endregion
3217
+ //#region src/drift/analyze.ts
3218
+ const DEFAULT_CONCURRENCY$1 = 3;
3219
+ /**
3220
+ * Run drift checks against a list of pre-collected targets. Pure library
3221
+ * function: no commander, no process.exit, no stdout writes. Callers handle
3222
+ * presentation. `cli/drift` does the full sweep with `--changed` scoping;
3223
+ * `cli/run` calls this with just the failing specs after vitest.
3224
+ */
3225
+ async function analyzeDrift(input) {
3226
+ const { targets, cwd, blocks, concurrency = DEFAULT_CONCURRENCY$1, model, onSpecStart } = input;
3227
+ const results = new Array(targets.length);
3228
+ let cursor = 0;
3229
+ const worker = async () => {
3230
+ while (true) {
3231
+ const idx = cursor++;
3232
+ if (idx >= targets.length) return;
3233
+ const target = targets[idx];
3234
+ onSpecStart?.(target);
3235
+ results[idx] = await checkSpec(target, {
3236
+ cwd,
3237
+ blocks,
3238
+ model
3239
+ });
3240
+ }
3241
+ };
3242
+ const pool = Array.from({ length: Math.min(concurrency, targets.length) }, () => worker());
3243
+ await Promise.all(pool);
3244
+ return results;
3245
+ }
3246
+ async function checkSpec(target, opts) {
3247
+ const { featureName, specName } = target;
3248
+ const existing = await tryReadSpecFile(featureName, specName, opts.cwd);
3249
+ if (existing === null) return {
3250
+ target,
3251
+ ok: false,
3252
+ issues: [],
3253
+ error: `spec file disappeared after enumeration: ${featureName}/${specName}`
3254
+ };
3255
+ const { result, isError } = await invokeClaudeStreaming({
3256
+ prompt: buildDriftUserPrompt(existing),
3257
+ systemPrompt: buildDriftSystemPrompt(opts.blocks),
3258
+ allowedTools: [
3259
+ "Read",
3260
+ "Grep",
3261
+ "Glob"
3262
+ ],
3263
+ silenceBashLog: true,
3264
+ cwd: opts.cwd,
3265
+ ...opts.model ? { model: opts.model } : {}
3266
+ }, (_msg) => {});
3267
+ if (isError) return {
3268
+ target,
3269
+ ok: false,
3270
+ issues: [],
3271
+ error: "Claude returned an error result"
3272
+ };
3273
+ const json = extractJsonBlock(result);
3274
+ if (!json) return {
3275
+ target,
3276
+ ok: false,
3277
+ issues: [],
3278
+ error: "Claude did not return a json block"
3279
+ };
3280
+ let report;
3281
+ try {
3282
+ report = DraftReportSchema.parse(JSON.parse(json));
3283
+ } catch (e) {
3284
+ return {
3285
+ target,
3286
+ ok: false,
3287
+ issues: [],
3288
+ error: `failed to parse drift report: ${e.message}`
3289
+ };
3290
+ }
3291
+ return {
3292
+ target,
3293
+ ok: true,
3294
+ issues: report.issues
3295
+ };
3296
+ }
3297
+ //#endregion
3298
+ //#region src/drift/format.ts
3299
+ /**
3300
+ * Render drift results as a string. The CLI commands and the `run` failure
3301
+ * hook are the only callers; both want the formatted output returned so
3302
+ * they can prefix / interleave / pipe it as needed.
3303
+ */
3304
+ function renderDrift(results, format, cwd) {
3305
+ if (format === "json") return renderJson(results);
3306
+ if (format === "github") return renderGithub(results, cwd);
3307
+ return renderText(results);
3308
+ }
3309
+ const HEAVY_RULE = "═".repeat(72);
3310
+ function renderText(results) {
3311
+ const out = [];
3312
+ for (const r of results) {
3313
+ out.push("");
3314
+ const heading = `══ ${r.target.featureName}/${r.target.specName} `;
3315
+ const tail = "═".repeat(Math.max(3, 72 - heading.length));
3316
+ out.push(`${heading}${tail}`);
3317
+ if (r.error) {
3318
+ out.push(` ERROR ${r.error}`);
3319
+ continue;
3320
+ }
3321
+ const errors = r.issues.filter((i) => i.severity === "ERROR");
3322
+ const warnings = r.issues.filter((i) => i.severity === "WARN");
3323
+ const passed = r.issues.filter((i) => i.severity === "OK");
3324
+ if (errors.length === 0 && warnings.length === 0) {
3325
+ const label = passed.length === 1 ? "check" : "checks";
3326
+ const detail = passed.length > 0 ? `all ${passed.length} ${label} passed` : "no issues";
3327
+ out.push(` ✓ ${detail}`);
3328
+ continue;
3329
+ }
3330
+ for (const issue of errors) appendFinding(out, "ERROR", issue);
3331
+ for (const issue of warnings) appendFinding(out, "WARN", issue);
3332
+ if (passed.length > 0) {
3333
+ const names = passed.map((i) => DRAFT_CATEGORY_LABEL[i.category]).join(", ");
3334
+ out.push("");
3335
+ out.push(` ✓ passed (${passed.length}): ${names}`);
3336
+ }
3337
+ }
3338
+ out.push("");
3339
+ out.push(HEAVY_RULE);
3340
+ const totals = summarize(results);
3341
+ out.push(` specs ${results.length} (${totals.errored} errored)`);
3342
+ out.push(` findings ${totals.error} error, ${totals.warn} warn, ${totals.ok} ok`);
3343
+ out.push("");
3344
+ return out.join("\n");
3345
+ }
3346
+ function appendFinding(out, level, issue) {
3347
+ const stepPart = issue.stepId ? ` ${issue.stepId}` : "";
3348
+ out.push("");
3349
+ out.push(` ${level} ${DRAFT_CATEGORY_LABEL[issue.category]}${stepPart}`);
3350
+ out.push(` ${issue.message}`);
3351
+ if (issue.detail) out.push(` └ ${issue.detail.replace(/\n/g, "\n ")}`);
3352
+ }
3353
+ function renderJson(results) {
3354
+ const payload = { specs: results.map((r) => ({
3355
+ feature: r.target.featureName,
3356
+ spec: r.target.specName,
3357
+ ok: r.ok,
3358
+ ...r.error ? { error: r.error } : {},
3359
+ issues: r.issues.map((i) => ({
3360
+ severity: i.severity,
3361
+ category: i.category,
3362
+ stepId: i.stepId,
3363
+ message: i.message,
3364
+ ...i.detail ? { detail: i.detail } : {}
3365
+ }))
3366
+ })) };
3367
+ return `${JSON.stringify(payload, null, 2)}\n`;
3368
+ }
3369
+ function renderGithub(results, cwd) {
3370
+ const repoRoot = process.env["GITHUB_WORKSPACE"] ?? process.cwd();
3371
+ const lines = [];
3372
+ for (const r of results) {
3373
+ const file = githubRelPath(cwd, repoRoot, r.target.featureName, r.target.specName);
3374
+ if (r.error) {
3375
+ lines.push(`::error file=${file}::${escapeGhMessage(r.error)}`);
3376
+ continue;
3377
+ }
3378
+ for (const issue of r.issues) {
3379
+ if (issue.severity === "OK") continue;
3380
+ const level = issue.severity === "ERROR" ? "error" : "warning";
3381
+ const title = `${r.target.featureName}/${r.target.specName} — ${issue.category}${issue.stepId ? ` (${issue.stepId})` : ""}`;
3382
+ const body = issue.detail ? `${issue.message}\n${issue.detail}` : issue.message;
3383
+ lines.push(`::${level} file=${file},title=${escapeGhProp(title)}::${escapeGhMessage(body)}`);
3384
+ }
3385
+ }
3386
+ return lines.length === 0 ? "" : `${lines.join("\n")}\n`;
3387
+ }
3388
+ function githubRelPath(cwd, repoRoot, featureName, specName) {
3389
+ const abs = resolve(cwd, ".ccqa", "features", featureName, "test-cases", specName, "spec.yaml");
3390
+ const rel = relative(repoRoot, abs);
3391
+ return rel.startsWith("..") ? abs : rel;
3392
+ }
3393
+ function escapeGhMessage(s) {
3394
+ return s.replace(/%/g, "%25").replace(/\r/g, "%0D").replace(/\n/g, "%0A");
3395
+ }
3396
+ function escapeGhProp(s) {
3397
+ return s.replace(/%/g, "%25").replace(/\r/g, "%0D").replace(/\n/g, "%0A").replace(/,/g, "%2C").replace(/:/g, "%3A");
3398
+ }
3399
+ function summarize(results) {
3400
+ let error = 0;
3401
+ let warn = 0;
3402
+ let ok = 0;
3403
+ let errored = 0;
3404
+ for (const r of results) {
3405
+ if (r.error) errored++;
3406
+ for (const issue of r.issues) if (issue.severity === "ERROR") error++;
3407
+ else if (issue.severity === "WARN") warn++;
3408
+ else ok++;
3409
+ }
3410
+ return {
3411
+ error,
3412
+ warn,
3413
+ ok,
3414
+ errored
3415
+ };
3416
+ }
3417
+ //#endregion
3418
+ //#region src/drift/exit-code.ts
3419
+ /**
3420
+ * Map drift results to an exit code. Spec-level errors (Claude call failed)
3421
+ * always fail; otherwise ERROR severity always fails, WARN fails only when
3422
+ * the threshold is `warn`.
3423
+ */
3424
+ function determineExitCode(results, threshold) {
3425
+ for (const r of results) {
3426
+ if (r.error) return 1;
3427
+ for (const issue of r.issues) {
3428
+ if (issue.severity === "ERROR") return 1;
3429
+ if (threshold === "warn" && issue.severity === "WARN") return 1;
3430
+ }
3431
+ }
3432
+ return 0;
3433
+ }
3434
+ //#endregion
3435
+ //#region src/drift/auth.ts
3436
+ /**
3437
+ * Probe whether the host has any credential the Anthropic SDK can pick up:
3438
+ * 1. ANTHROPIC_API_KEY env var (CI / scripted use)
3439
+ * 2. ~/.claude/.credentials.json (local Claude Code login)
3440
+ *
3441
+ * `run --drift` is opt-in, so the caller will only consult this after the
3442
+ * user has asked for drift. We never throw — auth absence is a normal flow
3443
+ * that surfaces as "drift analysis skipped".
3444
+ */
3445
+ function driftAuthAvailable() {
3446
+ const key = process.env["ANTHROPIC_API_KEY"];
3447
+ if (typeof key === "string" && key.length > 0) return { ok: true };
3448
+ if (existsSync(join(homedir(), ".claude", ".credentials.json"))) return { ok: true };
3449
+ return {
3450
+ ok: false,
3451
+ reason: "no ANTHROPIC_API_KEY / claude login"
3452
+ };
3453
+ }
3454
+ //#endregion
3455
+ //#region src/cli/run.ts
3456
+ const USER_VITEST_CONFIG = resolve(".ccqa/vitest.config.ts");
3457
+ async function resolveVitestConfig() {
3458
+ try {
3459
+ await access(USER_VITEST_CONFIG);
3460
+ return USER_VITEST_CONFIG;
3461
+ } catch {
3462
+ return bundledVitestConfigPath();
3463
+ }
3464
+ }
3465
+ const runCommand = new Command("run").argument("[target]", "Spec to run: '<feature>/<spec>', '<feature>', or omit for all").description("Run generated agent-browser test scripts. Pass --drift to invoke a Claude-driven drift analysis on each failing spec (skipped silently when no test fails). Requires ANTHROPIC_API_KEY or a local Claude login.").option("--drift", "On vitest failure, run drift analysis on the failing specs").option("--drift-strict", "Treat drift ERROR findings as a run failure (exit 1 even if vitest passed). Implies --drift.").option("--format <fmt>", "Output format for the drift block: text | json | github", "text").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Used by --drift only. Overrides CCQA_MODEL.").action(async (target, opts) => {
3466
+ await runTests(target, opts);
3467
+ });
3468
+ async function runTests(target, opts) {
3469
+ header("run", target);
3470
+ const specs = await resolveSpecs(target);
3471
+ if (specs.length === 0) {
3472
+ error("no test scripts found");
3473
+ hint("run 'ccqa generate <feature>/<spec>' first to generate tests");
3474
+ process.exit(1);
3475
+ }
3476
+ const tmpDir = await mkdtemp(join(tmpdir(), "ccqa-run-"));
3477
+ const summaries = [];
3478
+ let overallExitCode = 0;
3479
+ const vitestConfig = await resolveVitestConfig();
3480
+ try {
3481
+ for (let i = 0; i < specs.length; i++) {
3482
+ const { featureName, specName } = specs[i];
3483
+ const scriptFile = await getTestScript(featureName, specName);
3484
+ if (!scriptFile) {
3485
+ warn(`${featureName}/${specName}: no test.spec.ts found`);
3486
+ continue;
3487
+ }
3488
+ run(`${featureName}/${specName}`);
3489
+ meta("test", scriptFile);
3490
+ blank();
3491
+ const reportFile = join(tmpDir, `report-${i}.json`);
3492
+ const proc = spawnVitestStreaming([
3493
+ "run",
3494
+ "--config",
3495
+ vitestConfig,
3496
+ scriptFile,
3497
+ "--reporter=json",
3498
+ `--outputFile.json=${reportFile}`
3499
+ ]);
3500
+ await Promise.all([streamFiltered(proc.stdout, process.stdout), streamFiltered(proc.stderr, process.stderr)]);
3501
+ const exitCode = await proc.exited;
3502
+ if (exitCode !== 0) overallExitCode = exitCode;
3503
+ const report = await readReport(reportFile);
3504
+ summaries.push({
3505
+ featureName,
3506
+ specName,
3507
+ scriptFile,
3508
+ report,
3509
+ exitCode
3510
+ });
3511
+ blank();
3512
+ }
3513
+ printSummary(summaries);
3514
+ overallExitCode = await maybeRunDrift(summaries, opts, overallExitCode);
3515
+ } finally {
3516
+ await rm(tmpDir, {
3517
+ recursive: true,
3518
+ force: true
3519
+ });
3520
+ }
3521
+ process.exit(overallExitCode);
3522
+ }
3523
+ function failedSpec(s) {
3524
+ if (s.exitCode !== 0) return true;
3525
+ return (s.report?.numFailedTests ?? 0) > 0;
3526
+ }
3527
+ function parseDriftFormat(raw) {
3528
+ const v = raw ?? "text";
3529
+ if (v === "text" || v === "json" || v === "github") return v;
3530
+ error(`invalid --format: ${v} (expected text|json|github)`);
3531
+ process.exit(2);
3532
+ }
3533
+ /**
3534
+ * Choose which specs to drift-check. `--drift` is a fail-supplement: only the
3535
+ * specs that failed get a drift analysis (the goal is to *explain* a vitest
3536
+ * failure). `--drift-strict` is an audit: even passing specs are checked,
3537
+ * because the CI need is "fail loud if the spec lags behind the source",
3538
+ * which can absolutely happen while vitest is still green against a stale
3539
+ * staging environment.
3540
+ */
3541
+ function selectDriftTargets(summaries, opts) {
3542
+ if (opts.driftStrict) return summaries;
3543
+ if (opts.drift) return summaries.filter(failedSpec);
3544
+ return [];
3545
+ }
3546
+ /**
3547
+ * Opt-in post-vitest drift hook. With `--drift`, fires only when at least
3548
+ * one spec failed (supplemental signal). With `--drift-strict`, fires
3549
+ * unconditionally so a spec/source divergence is caught even when vitest
3550
+ * passed. Skips silently when auth is unavailable so the run's exit code
3551
+ * is determined by vitest alone.
3552
+ */
3553
+ async function maybeRunDrift(summaries, opts, currentExitCode) {
3554
+ const candidates = selectDriftTargets(summaries, opts);
3555
+ if (candidates.length === 0) return currentExitCode;
3556
+ const auth = driftAuthAvailable();
3557
+ if (!auth.ok) {
3558
+ info(`drift analysis skipped (${auth.reason})`);
3559
+ return currentExitCode;
3560
+ }
3561
+ const format = parseDriftFormat(opts.format);
3562
+ const cwd = process.cwd();
3563
+ const tree = await listFeatureTree(cwd);
3564
+ const targets = candidates.map((s) => {
3565
+ const spec = tree.find((f) => f.featureName === s.featureName)?.specs.find((sp) => sp.specName === s.specName);
3566
+ if (!spec) return null;
3567
+ const t = {
3568
+ featureName: s.featureName,
3569
+ specName: s.specName
3570
+ };
3571
+ if (spec.relatedPaths) t.relatedPaths = spec.relatedPaths;
3572
+ if (spec.includedBlocks) t.includedBlocks = spec.includedBlocks;
3573
+ return t;
3574
+ }).filter((t) => t !== null);
3575
+ if (targets.length === 0) {
3576
+ info("drift analysis skipped (no spec.yaml found for failing specs)");
3577
+ return currentExitCode;
3578
+ }
3579
+ const results = await analyzeDrift({
3580
+ targets,
3581
+ cwd,
3582
+ blocks: await loadAvailableBlocks(cwd),
3583
+ concurrency: Math.min(3, targets.length),
3584
+ ...opts.model ? { model: opts.model } : {},
3585
+ onSpecStart: (t) => {
3586
+ if (format === "text") info(`drift: checking ${t.featureName}/${t.specName}`);
3587
+ }
3588
+ });
3589
+ if (format === "text") process.stdout.write(`\n${C.cyan}${C.bold}──────── drift analysis ────────${C.reset}\n`);
3590
+ process.stdout.write(renderDrift(results, format, cwd));
3591
+ if (opts.driftStrict && determineExitCode(results, "error") !== 0) return currentExitCode || 1;
3592
+ return currentExitCode;
3593
+ }
3594
+ async function readReport(path) {
3595
+ try {
3596
+ const raw = await readFile(path, "utf8");
3597
+ return JSON.parse(raw);
3598
+ } catch {
3599
+ return null;
3600
+ }
3601
+ }
3602
+ const useColor = process.stdout.isTTY && process.env.NO_COLOR == null;
3603
+ const C = {
3604
+ reset: useColor ? "\x1B[0m" : "",
3605
+ bold: useColor ? "\x1B[1m" : "",
3606
+ dim: useColor ? "\x1B[2m" : "",
3607
+ green: useColor ? "\x1B[32m" : "",
3608
+ red: useColor ? "\x1B[31m" : "",
3609
+ yellow: useColor ? "\x1B[33m" : "",
3610
+ cyan: useColor ? "\x1B[36m" : "",
3611
+ gray: useColor ? "\x1B[90m" : ""
3612
+ };
3613
+ function printSummary(summaries) {
3614
+ process.stdout.write(`\n${C.cyan}${C.bold}──────── ccqa summary ────────${C.reset}\n\n`);
3615
+ let totalTests = 0;
3616
+ let totalPassed = 0;
3617
+ let totalFailed = 0;
3618
+ let totalSkipped = 0;
3619
+ for (const s of summaries) {
3620
+ const header = `${C.bold}${s.featureName}/${s.specName}${C.reset}`;
3621
+ if (!s.report) {
3622
+ const icon = s.exitCode === 0 ? `${C.green}✔${C.reset}` : `${C.red}✖${C.reset}`;
3623
+ process.stdout.write(`${icon} ${header} ${C.dim}(no report)${C.reset}\n`);
3624
+ continue;
3625
+ }
3626
+ totalTests += s.report.numTotalTests;
3627
+ totalPassed += s.report.numPassedTests;
3628
+ totalFailed += s.report.numFailedTests;
3629
+ totalSkipped += s.report.numPendingTests;
3630
+ const ok = s.report.success;
3631
+ const icon = ok ? `${C.green}✔${C.reset}` : `${C.red}✖${C.reset}`;
3632
+ const countColor = ok ? C.green : C.red;
3633
+ process.stdout.write(`${icon} ${header} ${countColor}${s.report.numPassedTests}/${s.report.numTotalTests}${C.reset} ${C.dim}passed${C.reset}\n`);
3634
+ for (const file of s.report.testResults) for (const a of file.assertionResults) {
3635
+ const aIcon = assertionIcon(a.status);
3636
+ const dur = a.duration != null ? ` ${C.gray}${formatDuration(a.duration)}${C.reset}` : "";
3637
+ process.stdout.write(` ${aIcon} ${a.fullName}${dur}\n`);
3638
+ if (a.status === "failed" && a.failureMessages?.length) for (const msg of a.failureMessages) {
3639
+ const firstLine = msg.split("\n")[0] ?? msg;
3640
+ process.stdout.write(` ${C.red}${firstLine}${C.reset}\n`);
3641
+ }
3642
+ }
3643
+ }
3644
+ const specsPassed = summaries.filter((s) => s.exitCode === 0).length;
3645
+ const specsFailed = summaries.filter((s) => s.exitCode !== 0).length;
3646
+ process.stdout.write("\n");
3647
+ process.stdout.write(` ${C.bold}Specs${C.reset} ${summaries.length} (${C.green}${specsPassed} passed${C.reset}, ${specsFailed > 0 ? C.red : C.dim}${specsFailed} failed${C.reset})\n`);
3648
+ process.stdout.write(` ${C.bold}Tests${C.reset} ${totalTests} (${C.green}${totalPassed} passed${C.reset}, ${totalFailed > 0 ? C.red : C.dim}${totalFailed} failed${C.reset}, ${C.yellow}${totalSkipped} skipped${C.reset})\n`);
3649
+ process.stdout.write("\n");
3650
+ }
3651
+ function assertionIcon(status) {
3652
+ switch (status) {
3653
+ case "passed": return `${C.green}✔${C.reset}`;
3654
+ case "failed": return `${C.red}✖${C.reset}`;
3655
+ case "skipped":
3656
+ case "pending":
3657
+ case "todo": return `${C.yellow}◌${C.reset}`;
3658
+ }
3659
+ }
3660
+ function formatDuration(ms) {
3661
+ if (ms < 1e3) return `${Math.round(ms)}ms`;
3662
+ return `${(ms / 1e3).toFixed(2)}s`;
3663
+ }
3664
+ const NOISE_LINE_PATTERNS = [/^JSON report written to /];
3665
+ async function streamFiltered(source, sink) {
3666
+ source.setEncoding("utf8");
3667
+ let buffer = "";
3668
+ for await (const chunk of source) {
3669
+ buffer += chunk;
3670
+ let nl = buffer.indexOf("\n");
3671
+ while (nl !== -1) {
3672
+ const line = buffer.slice(0, nl);
3673
+ buffer = buffer.slice(nl + 1);
3674
+ if (!NOISE_LINE_PATTERNS.some((p) => p.test(line))) sink.write(line + "\n");
3675
+ nl = buffer.indexOf("\n");
3676
+ }
3677
+ }
3678
+ if (buffer.length > 0 && !NOISE_LINE_PATTERNS.some((p) => p.test(buffer))) sink.write(buffer);
3679
+ }
3680
+ async function resolveSpecs(target) {
3681
+ if (!target) return listAllSpecs();
3682
+ if (target.includes("/")) {
3683
+ const { featureName, specName } = parseSpecPath(target);
3684
+ return [{
3685
+ featureName,
3686
+ specName
3687
+ }];
3688
+ }
3689
+ return (await listSpecsForFeature(target)).map((specName) => ({
3690
+ featureName: target,
3691
+ specName
3692
+ }));
3693
+ }
3694
+ //#endregion
3162
3695
  //#region src/cli/draft.ts
3163
- const CATEGORY_LABEL$1 = {
3164
- assertable: "Assertability",
3165
- setups: "Setup references",
3166
- granularity: "Step granularity",
3167
- unimplemented: "Unimplemented checks"
3168
- };
3169
- const draftCommand = new Command("draft").argument("[feature/spec]", "Optional spec path (e.g. tasks/create-and-complete). If omitted, Claude proposes one from your intent.").description("Interactively draft and refine a test-spec.md with Claude Code").option("--instruction <text>", "Non-interactive single-shot instruction (skips the interactive loop)").option("--apply", "Auto-apply each generated patch without [y/N] confirmation", false).action(async (specPath, opts) => {
3696
+ const CATEGORY_LABEL = DRAFT_CATEGORY_LABEL;
3697
+ const draftCommand = new Command("draft").argument("[feature/spec]", "Optional spec path (e.g. tasks/create-and-complete). If omitted, Claude proposes one from your intent.").description("Interactively draft and refine a spec.yaml with Claude Code").option("--instruction <text>", "Non-interactive single-shot instruction (skips the interactive loop)").option("--apply", "Auto-apply each generated patch without [y/N] confirmation", false).action(async (specPath, opts) => {
3170
3698
  await ensureCcqaDir();
3171
3699
  let featureName;
3172
3700
  let specName;
@@ -3216,7 +3744,7 @@ async function runDraft(featureName, specName, opts, prefilledIntent) {
3216
3744
  async function runOneTurn(input) {
3217
3745
  const { featureName, specName, existing, userInput, autoApply } = input;
3218
3746
  const isFirstRun = existing === null;
3219
- const systemPrompt = buildDraftSystemPrompt();
3747
+ const systemPrompt = buildDraftSystemPrompt(await loadAvailableBlocks());
3220
3748
  const userPrompt = buildDraftPrompt({
3221
3749
  mode: isFirstRun ? "create" : "refine",
3222
3750
  existing: existing ?? "",
@@ -3349,24 +3877,24 @@ function printReviewBlock(issues) {
3349
3877
  }
3350
3878
  if (errors.length) {
3351
3879
  process.stdout.write(` ERRORS (${errors.length})\n`);
3352
- for (const issue of errors) writeFinding$1(issue);
3880
+ for (const issue of errors) writeFinding(issue);
3353
3881
  process.stdout.write("\n");
3354
3882
  }
3355
3883
  if (warnings.length) {
3356
3884
  process.stdout.write(` WARNINGS (${warnings.length})\n`);
3357
- for (const issue of warnings) writeFinding$1(issue);
3885
+ for (const issue of warnings) writeFinding(issue);
3358
3886
  process.stdout.write("\n");
3359
3887
  }
3360
3888
  if (passed.length) {
3361
- const names = passed.map((i) => CATEGORY_LABEL$1[i.category]).join(", ");
3889
+ const names = passed.map((i) => CATEGORY_LABEL[i.category]).join(", ");
3362
3890
  process.stdout.write(` PASSED (${passed.length})\n ${names}\n`);
3363
3891
  }
3364
3892
  process.stdout.write(`\n${RULE}\n\n`);
3365
3893
  return errors.length > 0;
3366
3894
  }
3367
- function writeFinding$1(issue) {
3895
+ function writeFinding(issue) {
3368
3896
  const stepPart = issue.stepId ? ` ${issue.stepId}` : "";
3369
- process.stdout.write(` ${CATEGORY_LABEL$1[issue.category]}${stepPart}\n`);
3897
+ process.stdout.write(` ${CATEGORY_LABEL[issue.category]}${stepPart}\n`);
3370
3898
  process.stdout.write(` ${issue.message}\n`);
3371
3899
  if (issue.detail) process.stdout.write(` └ ${issue.detail.replace(/\n/g, "\n ")}\n`);
3372
3900
  }
@@ -3380,10 +3908,7 @@ async function proposeNaming(opts) {
3380
3908
  const tree = await listFeatureTree();
3381
3909
  const treeForPrompt = tree.map((f) => ({
3382
3910
  featureName: f.featureName,
3383
- specs: f.specs.map((s) => ({
3384
- specName: s.specName,
3385
- ...s.title ? { title: s.title } : {}
3386
- }))
3911
+ specs: f.specs.map((s) => ({ specName: s.specName }))
3387
3912
  }));
3388
3913
  info("Proposing a feature/spec name based on your intent...");
3389
3914
  const { result, isError } = await invokeClaudeStreaming({
@@ -3533,49 +4058,6 @@ function truncate(s, n) {
3533
4058
  return s.slice(s.length - n);
3534
4059
  }
3535
4060
  //#endregion
3536
- //#region src/prompts/drift.ts
3537
- function buildDriftSystemPrompt() {
3538
- return `${buildDraftSystemPrompt()}
3539
-
3540
- ## Drift mode
3541
-
3542
- You are running non-interactively in CI. The user will not see or apply the patch — only the \`issues\` array.
3543
-
3544
- - Always set \`patch\` to "" in your response.
3545
- - Focus issue messages on what is **out of sync** between the spec and the current codebase: missing aria-labels, renamed routes, removed buttons, placeholders that no longer exist, setup references that point to non-existent files.
3546
- - Do NOT raise issues about stylistic preferences in the spec wording.
3547
- - Treat \`category: unimplemented\` as the primary signal for drift: anything the spec asserts that you cannot find in code is a drift finding.
3548
-
3549
- ## Drift severity policy (STRICT)
3550
-
3551
- The CLI exits non-zero when any issue has \`severity: "ERROR"\` (default) or — with \`--severity warn\` — when any \`WARN\` is present. Pick severity by **whether a deterministic replay of this spec would fail today**, not by how confident you are in your own analysis.
3552
-
3553
- Use **ERROR** when the spec would break on replay:
3554
- - A selector the spec relies on (\`aria-label\`, \`placeholder\`, \`data-testid\`, button text) **does not exist anywhere in the source**.
3555
- - A URL / route the spec navigates to is no longer defined.
3556
- - An **Expected** asserts a string or visible text that is no longer rendered by the relevant component.
3557
- - A \`setups[].name\` does not resolve to \`.ccqa/setups/<name>/setup-spec.md\`, or a \`params\` key is not declared in that setup's \`placeholders\`.
3558
- - The spec references a feature/page that has been removed from the codebase.
3559
-
3560
- Use **WARN** when the spec is still likely to work, but quality could improve:
3561
- - The Expected is vague ("a message appears") when a precise string exists in code.
3562
- - A step bundles multiple actions, or a needed intermediate verification step is missing.
3563
- - Stable signals exist that the spec could leverage but currently doesn't.
3564
- - You are unsure whether a referenced string exists (give the user the benefit of the doubt; do not hard-fail CI on uncertainty).
3565
-
3566
- Use **OK** for axes you actively verified and found no issue.
3567
-
3568
- If you cannot decide between ERROR and WARN, choose WARN. Reserve ERROR for findings you can back up with a specific file path or grep result that proves the drift.
3569
- `;
3570
- }
3571
- function buildDriftUserPrompt(existing) {
3572
- return buildDraftPrompt({
3573
- mode: "refine",
3574
- existing,
3575
- userInput: ""
3576
- });
3577
- }
3578
- //#endregion
3579
4061
  //#region src/drift/affected.ts
3580
4062
  const execFileP = promisify(execFile);
3581
4063
  /**
@@ -3595,10 +4077,10 @@ function resolveBaseRef(explicit) {
3595
4077
  * post-rename layout.
3596
4078
  *
3597
4079
  * Paths are re-rooted to be relative to `cwd`, not the git repo root. In a
3598
- * monorepo where `cwd` is a sub-package (e.g. `js/apps/knowledge-webapp`),
3599
- * git emits paths relative to the repo root, but specs declare relatedPaths
3600
- * relative to their own package. Changes outside `cwd` are dropped so an
3601
- * unrelated PR can never accidentally scope a sub-package's specs in.
4080
+ * monorepo where `cwd` is a sub-package (e.g. `apps/foo`), git emits paths
4081
+ * relative to the repo root, but specs declare relatedPaths relative to
4082
+ * their own package. Changes outside `cwd` are dropped so an unrelated PR
4083
+ * can never accidentally scope a sub-package's specs in.
3602
4084
  */
3603
4085
  async function getChangedFiles(base, cwd) {
3604
4086
  const [{ stdout: rootOut }, { stdout: diffOut }] = await Promise.all([execFileP("git", ["rev-parse", "--show-toplevel"], { cwd }), execFileP("git", [
@@ -3837,9 +4319,8 @@ ${previews.map((p) => {
3837
4319
  ## Existing specs
3838
4320
 
3839
4321
  ${specs.map((s) => {
3840
- const title = s.title ? ` — ${s.title}` : "";
3841
4322
  const paths = s.relatedPaths.length === 0 ? " (no relatedPaths declared)" : s.relatedPaths.map((p) => ` - ${p}`).join("\n");
3842
- return `- ${s.featureName}/${s.specName}${title}\n${paths}`;
4323
+ return `- ${s.featureName}/${s.specName}\n${paths}`;
3843
4324
  }).join("\n")}
3844
4325
 
3845
4326
  ## Task
@@ -3850,7 +4331,7 @@ Return the spec keys that might be affected by any of the new files. Conservativ
3850
4331
  //#endregion
3851
4332
  //#region src/cli/drift.ts
3852
4333
  const DEFAULT_CONCURRENCY = 3;
3853
- const driftCommand = new Command("drift").argument("[feature/spec]", "Optional spec id. If omitted, every spec under .ccqa/features/ is checked.").description("Check whether each test-spec.md is still in sync with the current codebase (CI-friendly, no patches applied).").option("--format <fmt>", "Output format: text | json | github", "text").option("--severity <level>", "Exit non-zero on this severity or higher: warn | error", "error").option("--concurrency <n>", `Parallel spec checks (default: ${DEFAULT_CONCURRENCY})`).option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").option("--cwd <path>", "Working directory used as both the .ccqa root and the codebase Claude reads. Useful for monorepos. Defaults to process.cwd().").option("--changed", "Restrict drift checks to specs whose relatedPaths intersect the git diff against --base (or, in CI, $GITHUB_BASE_REF, else origin/main). New files are routed to specs via a single lightweight Claude call.").option("--base <ref>", "Base ref to diff against when --changed is set. Defaults to $GITHUB_BASE_REF (CI) or origin/main.").action(async (specPath, opts) => {
4334
+ const driftCommand = new Command("drift").argument("[feature/spec]", "Optional spec id. If omitted, every spec under .ccqa/features/ is checked.").description("Check whether each spec.yaml is still in sync with the current codebase (CI-friendly, no patches applied).").option("--format <fmt>", "Output format: text | json | github", "text").option("--severity <level>", "Exit non-zero on this severity or higher: warn | error", "error").option("--concurrency <n>", `Parallel spec checks (default: ${DEFAULT_CONCURRENCY})`).option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").option("--cwd <path>", "Working directory used as both the .ccqa root and the codebase Claude reads. Useful for monorepos. Defaults to process.cwd().").option("--changed", "Restrict drift checks to specs whose relatedPaths intersect the git diff against --base (or, in CI, $GITHUB_BASE_REF, else origin/main). New files are routed to specs via a single lightweight Claude call.").option("--base <ref>", "Base ref to diff against when --changed is set. Defaults to $GITHUB_BASE_REF (CI) or origin/main.").action(async (specPath, opts) => {
3854
4335
  const format = parseFormat(opts.format);
3855
4336
  const threshold = parseSeverity(opts.severity);
3856
4337
  const concurrency = parseConcurrency(opts.concurrency);
@@ -3878,8 +4359,18 @@ const driftCommand = new Command("drift").argument("[feature/spec]", "Optional s
3878
4359
  if (format === "text") meta("scoped", `${targets.length} of ${total} spec${total > 1 ? "s" : ""}`);
3879
4360
  if (targets.length === 0) exitWithNoSpecs(format, "no specs intersect the changed file set; nothing to check");
3880
4361
  }
3881
- const results = await runChecks(targets, concurrency, opts.model, cwd, format);
3882
- emitReport(results, format, cwd);
4362
+ const blocks = await loadAvailableBlocks(cwd);
4363
+ const results = await analyzeDrift({
4364
+ targets,
4365
+ cwd,
4366
+ blocks,
4367
+ concurrency,
4368
+ ...opts.model ? { model: opts.model } : {},
4369
+ onSpecStart: (t) => {
4370
+ if (format === "text") info(`checking ${t.featureName}/${t.specName}`);
4371
+ }
4372
+ });
4373
+ process.stdout.write(renderDrift(results, format, cwd));
3883
4374
  process.exit(determineExitCode(results, threshold));
3884
4375
  });
3885
4376
  function exitWithNoSpecs(format, message) {
@@ -3905,12 +4396,21 @@ async function filterByChanged(input) {
3905
4396
  const newFiles = changed.filter((f) => f.status === "added");
3906
4397
  const existingChanges = changed.filter((f) => f.status !== "added");
3907
4398
  const affected = /* @__PURE__ */ new Set();
4399
+ const touchedBlockNames = /* @__PURE__ */ new Set();
4400
+ for (const f of changed) {
4401
+ const blockName = parseBlockPath(f.path);
4402
+ if (blockName) touchedBlockNames.add(blockName);
4403
+ }
3908
4404
  for (const t of targets) {
3909
4405
  if (!t.relatedPaths) {
3910
4406
  affected.add(specKey(t));
3911
4407
  continue;
3912
4408
  }
3913
- if (existingChanges.some((f) => isPathAffectedBy(f.path, t.relatedPaths)) || newFiles.some((f) => isPathAffectedBy(f.path, t.relatedPaths))) affected.add(specKey(t));
4409
+ if (existingChanges.some((f) => isPathAffectedBy(f.path, t.relatedPaths)) || newFiles.some((f) => isPathAffectedBy(f.path, t.relatedPaths))) {
4410
+ affected.add(specKey(t));
4411
+ continue;
4412
+ }
4413
+ if (t.includedBlocks?.some((name) => touchedBlockNames.has(name))) affected.add(specKey(t));
3914
4414
  }
3915
4415
  if (newFiles.length > 0) {
3916
4416
  if (format === "text") info(`routing ${newFiles.length} new file(s) to specs via Claude...`);
@@ -3919,7 +4419,6 @@ async function filterByChanged(input) {
3919
4419
  specs: targets.filter((t) => t.relatedPaths).map((t) => ({
3920
4420
  featureName: t.featureName,
3921
4421
  specName: t.specName,
3922
- title: t.title,
3923
4422
  relatedPaths: t.relatedPaths
3924
4423
  })),
3925
4424
  cwd,
@@ -3930,18 +4429,20 @@ async function filterByChanged(input) {
3930
4429
  return targets.filter((t) => affected.has(specKey(t)));
3931
4430
  }
3932
4431
  async function collectTargets(specPath, cwd) {
4432
+ const tree = await listFeatureTree(cwd);
3933
4433
  if (specPath) {
3934
4434
  const { featureName, specName } = parseSpecPath(specPath);
3935
- if (await tryReadSpecFile(featureName, specName, cwd) === null) {
4435
+ const spec = tree.find((f) => f.featureName === featureName)?.specs.find((s) => s.specName === specName);
4436
+ if (!spec?.hasSpecFile) {
3936
4437
  error(`spec not found: ${featureName}/${specName} (under ${cwd})`);
3937
4438
  process.exit(1);
3938
4439
  }
3939
4440
  return [{
3940
4441
  featureName,
3941
- specName
4442
+ specName,
4443
+ includedBlocks: spec.includedBlocks ?? []
3942
4444
  }];
3943
4445
  }
3944
- const tree = await listFeatureTree(cwd);
3945
4446
  const out = [];
3946
4447
  for (const feature of tree) for (const spec of feature.specs) {
3947
4448
  if (!spec.hasSpecFile) continue;
@@ -3950,206 +4451,11 @@ async function collectTargets(specPath, cwd) {
3950
4451
  specName: spec.specName
3951
4452
  };
3952
4453
  if (spec.relatedPaths) t.relatedPaths = spec.relatedPaths;
3953
- if (spec.title) t.title = spec.title;
4454
+ if (spec.includedBlocks) t.includedBlocks = spec.includedBlocks;
3954
4455
  out.push(t);
3955
4456
  }
3956
4457
  return out;
3957
4458
  }
3958
- async function runChecks(targets, concurrency, model, cwd, format) {
3959
- const results = new Array(targets.length);
3960
- let cursor = 0;
3961
- const worker = async () => {
3962
- while (true) {
3963
- const idx = cursor++;
3964
- if (idx >= targets.length) return;
3965
- const target = targets[idx];
3966
- results[idx] = await checkSpec(target, model, cwd, format);
3967
- }
3968
- };
3969
- const pool = Array.from({ length: Math.min(concurrency, targets.length) }, () => worker());
3970
- await Promise.all(pool);
3971
- return results;
3972
- }
3973
- async function checkSpec(target, model, cwd, format) {
3974
- const { featureName, specName } = target;
3975
- const existing = await tryReadSpecFile(featureName, specName, cwd);
3976
- if (existing === null) return {
3977
- target,
3978
- ok: false,
3979
- issues: [],
3980
- error: `spec file disappeared after enumeration: ${featureName}/${specName}`
3981
- };
3982
- if (format === "text") info(`checking ${featureName}/${specName}`);
3983
- const { result, isError } = await invokeClaudeStreaming({
3984
- prompt: buildDriftUserPrompt(existing),
3985
- systemPrompt: buildDriftSystemPrompt(),
3986
- allowedTools: [
3987
- "Read",
3988
- "Grep",
3989
- "Glob"
3990
- ],
3991
- silenceBashLog: true,
3992
- cwd,
3993
- ...model ? { model } : {}
3994
- }, (_msg) => {});
3995
- if (isError) return {
3996
- target,
3997
- ok: false,
3998
- issues: [],
3999
- error: "Claude returned an error result"
4000
- };
4001
- const json = extractJsonBlock(result);
4002
- if (!json) return {
4003
- target,
4004
- ok: false,
4005
- issues: [],
4006
- error: "Claude did not return a json block"
4007
- };
4008
- let report;
4009
- try {
4010
- report = DraftReportSchema.parse(JSON.parse(json));
4011
- } catch (e) {
4012
- return {
4013
- target,
4014
- ok: false,
4015
- issues: [],
4016
- error: `failed to parse drift report: ${e.message}`
4017
- };
4018
- }
4019
- return {
4020
- target,
4021
- ok: true,
4022
- issues: report.issues
4023
- };
4024
- }
4025
- function emitReport(results, format, cwd) {
4026
- if (format === "json") {
4027
- emitJson(results);
4028
- return;
4029
- }
4030
- if (format === "github") {
4031
- emitGithub(results, cwd);
4032
- return;
4033
- }
4034
- emitText(results);
4035
- }
4036
- const CATEGORY_LABEL = {
4037
- assertable: "Assertability",
4038
- setups: "Setup references",
4039
- granularity: "Step granularity",
4040
- unimplemented: "Unimplemented checks"
4041
- };
4042
- const HEAVY_RULE = "═".repeat(72);
4043
- function emitText(results) {
4044
- for (const r of results) {
4045
- blank();
4046
- const heading = `══ ${r.target.featureName}/${r.target.specName} `;
4047
- const tail = "═".repeat(Math.max(3, 72 - heading.length));
4048
- process.stdout.write(`${heading}${tail}\n`);
4049
- if (r.error) {
4050
- process.stdout.write(` ERROR ${r.error}\n`);
4051
- continue;
4052
- }
4053
- const errors = r.issues.filter((i) => i.severity === "ERROR");
4054
- const warnings = r.issues.filter((i) => i.severity === "WARN");
4055
- const passed = r.issues.filter((i) => i.severity === "OK");
4056
- if (errors.length === 0 && warnings.length === 0) {
4057
- const label = passed.length === 1 ? "check" : "checks";
4058
- const detail = passed.length > 0 ? `all ${passed.length} ${label} passed` : "no issues";
4059
- process.stdout.write(` ✓ ${detail}\n`);
4060
- continue;
4061
- }
4062
- for (const issue of errors) writeFinding("ERROR", issue);
4063
- for (const issue of warnings) writeFinding("WARN", issue);
4064
- if (passed.length > 0) {
4065
- const names = passed.map((i) => CATEGORY_LABEL[i.category]).join(", ");
4066
- process.stdout.write(`\n ✓ passed (${passed.length}): ${names}\n`);
4067
- }
4068
- }
4069
- blank();
4070
- process.stdout.write(`${HEAVY_RULE}\n`);
4071
- const totals = summarize(results);
4072
- meta("specs", `${results.length} (${totals.errored} errored)`);
4073
- meta("findings", `${totals.error} error, ${totals.warn} warn, ${totals.ok} ok`);
4074
- }
4075
- function writeFinding(level, issue) {
4076
- const stepPart = issue.stepId ? ` ${issue.stepId}` : "";
4077
- process.stdout.write(`\n ${level} ${CATEGORY_LABEL[issue.category]}${stepPart}\n`);
4078
- process.stdout.write(` ${issue.message}\n`);
4079
- if (issue.detail) process.stdout.write(` └ ${issue.detail.replace(/\n/g, "\n ")}\n`);
4080
- }
4081
- function emitJson(results) {
4082
- const payload = { specs: results.map((r) => ({
4083
- feature: r.target.featureName,
4084
- spec: r.target.specName,
4085
- ok: r.ok,
4086
- ...r.error ? { error: r.error } : {},
4087
- issues: r.issues.map((i) => ({
4088
- severity: i.severity,
4089
- category: i.category,
4090
- stepId: i.stepId,
4091
- message: i.message,
4092
- ...i.detail ? { detail: i.detail } : {}
4093
- }))
4094
- })) };
4095
- process.stdout.write(`${JSON.stringify(payload, null, 2)}\n`);
4096
- }
4097
- function emitGithub(results, cwd) {
4098
- const repoRoot = process.env["GITHUB_WORKSPACE"] ?? process.cwd();
4099
- for (const r of results) {
4100
- const file = githubRelPath(cwd, repoRoot, r.target.featureName, r.target.specName);
4101
- if (r.error) {
4102
- process.stdout.write(`::error file=${file}::${escapeGhMessage(r.error)}\n`);
4103
- continue;
4104
- }
4105
- for (const issue of r.issues) {
4106
- if (issue.severity === "OK") continue;
4107
- const level = issue.severity === "ERROR" ? "error" : "warning";
4108
- const title = `${r.target.featureName}/${r.target.specName} — ${issue.category}${issue.stepId ? ` (${issue.stepId})` : ""}`;
4109
- const body = issue.detail ? `${issue.message}\n${issue.detail}` : issue.message;
4110
- process.stdout.write(`::${level} file=${file},title=${escapeGhProp(title)}::${escapeGhMessage(body)}\n`);
4111
- }
4112
- }
4113
- }
4114
- function githubRelPath(cwd, repoRoot, featureName, specName) {
4115
- const abs = resolve(cwd, ".ccqa", "features", featureName, "test-cases", specName, "test-spec.md");
4116
- const rel = relative(repoRoot, abs);
4117
- return rel.startsWith("..") ? abs : rel;
4118
- }
4119
- function escapeGhMessage(s) {
4120
- return s.replace(/%/g, "%25").replace(/\r/g, "%0D").replace(/\n/g, "%0A");
4121
- }
4122
- function escapeGhProp(s) {
4123
- return s.replace(/%/g, "%25").replace(/\r/g, "%0D").replace(/\n/g, "%0A").replace(/,/g, "%2C").replace(/:/g, "%3A");
4124
- }
4125
- function summarize(results) {
4126
- let error = 0;
4127
- let warn = 0;
4128
- let ok = 0;
4129
- let errored = 0;
4130
- for (const r of results) {
4131
- if (r.error) errored++;
4132
- for (const issue of r.issues) if (issue.severity === "ERROR") error++;
4133
- else if (issue.severity === "WARN") warn++;
4134
- else ok++;
4135
- }
4136
- return {
4137
- error,
4138
- warn,
4139
- ok,
4140
- errored
4141
- };
4142
- }
4143
- function determineExitCode(results, threshold) {
4144
- for (const r of results) {
4145
- if (r.error) return 1;
4146
- for (const issue of r.issues) {
4147
- if (issue.severity === "ERROR") return 1;
4148
- if (threshold === "warn" && issue.severity === "WARN") return 1;
4149
- }
4150
- }
4151
- return 0;
4152
- }
4153
4459
  function parseFormat(raw) {
4154
4460
  const v = raw ?? "text";
4155
4461
  if (v === "text" || v === "json" || v === "github") return v;
@@ -4192,8 +4498,6 @@ program.addCommand(driftCommand);
4192
4498
  program.addCommand(traceCommand);
4193
4499
  program.addCommand(generateCommand);
4194
4500
  program.addCommand(runCommand);
4195
- program.addCommand(traceSetupCommand);
4196
- program.addCommand(generateSetupCommand);
4197
4501
  program.parse();
4198
4502
  //#endregion
4199
4503
  export {};