ccqa 0.5.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/bin/ccqa.mjs CHANGED
@@ -9,11 +9,11 @@ import { query } from "@anthropic-ai/claude-agent-sdk";
9
9
  import { ZodError, z } from "zod";
10
10
  import { delimiter, dirname, join, relative, resolve } from "node:path";
11
11
  import { parse, stringify } from "yaml";
12
- import { execFile, spawn } from "node:child_process";
12
+ import { execFile, spawn, spawnSync } from "node:child_process";
13
13
  import { createInterface } from "node:readline";
14
14
  import { homedir, tmpdir } from "node:os";
15
- import { createInterface as createInterface$1 } from "node:readline/promises";
16
15
  import { promisify } from "node:util";
16
+ import { createInterface as createInterface$1 } from "node:readline/promises";
17
17
  //#region src/prompts/trace.ts
18
18
  function generateSessionName() {
19
19
  return `ccqa-trace-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-")}`;
@@ -562,6 +562,71 @@ function isParamRequired(param) {
562
562
  return param.required !== false;
563
563
  }
564
564
  //#endregion
565
+ //#region src/spec/perspectives-schema.ts
566
+ /**
567
+ * `perspectives.yaml` is an inventory of the test coverage that already
568
+ * exists under `.ccqa/` — the ccqa equivalent of a hand-kept QA spreadsheet,
569
+ * but scoped deliberately to *facts about what is tested today*.
570
+ *
571
+ * It intentionally does NOT carry severity / importance / priority. Deciding
572
+ * "how badly does it hurt the customer if this breaks" is a human + PdM
573
+ * decision, not something ccqa should author or silently overwrite. Keeping
574
+ * those columns out of the schema (and `.strict()` rejecting them) makes the
575
+ * boundary explicit: perspectives is a factual stock-take, severity lives
576
+ * wherever the team decides on it.
577
+ *
578
+ * It also does NOT attempt code-vs-test gap analysis (listing untested
579
+ * areas). A flat dump of "things in code with no test" is noise without
580
+ * prioritisation; that is a separate, later concern.
581
+ */
582
+ /**
583
+ * Whether the spec has been traced / generated. Both are derived mechanically
584
+ * by the CLI from on-disk artifacts (actions.json / test.spec.ts), never
585
+ * written by Claude — these are facts and must not drift.
586
+ */
587
+ const PerspectiveStatusSchema = z.object({
588
+ traced: z.boolean(),
589
+ generated: z.boolean()
590
+ }).strict();
591
+ /**
592
+ * One test case in the inventory.
593
+ *
594
+ * - `title` / `relatedPaths` are transcribed verbatim from the spec.yaml.
595
+ * - `status` is mechanically derived (see PerspectiveStatusSchema).
596
+ * - `summary` is a 1–2 sentence description of *what the spec verifies*,
597
+ * derived from its steps by Claude.
598
+ * - `startScreen` / `testCondition` / `preconditions` mirror the columns a
599
+ * hand-kept QA table carries. They are Claude-derived from the spec's
600
+ * steps (the opening screen, the state the test assumes, and the setup
601
+ * prerequisites such as which role logs in). Optional: a spec may not
602
+ * express all of them.
603
+ * - `note` is a human-only field. Regenerating perspectives preserves it.
604
+ *
605
+ * The detailed test procedure and expected results are deliberately NOT
606
+ * duplicated here — the spec.yaml steps are the single source of truth for
607
+ * those. The Markdown view links back to the spec instead of restating them.
608
+ */
609
+ const PerspectiveSpecSchema = z.object({
610
+ specName: z.string().min(1),
611
+ title: z.string().min(1),
612
+ summary: z.string(),
613
+ startScreen: z.string().optional(),
614
+ testCondition: z.string().optional(),
615
+ preconditions: z.array(z.string().min(1)).optional(),
616
+ relatedPaths: z.array(z.string().min(1)).optional(),
617
+ status: PerspectiveStatusSchema,
618
+ note: z.string().optional()
619
+ }).strict();
620
+ const PerspectiveFeatureSchema = z.object({
621
+ featureName: z.string().min(1),
622
+ specs: z.array(PerspectiveSpecSchema)
623
+ }).strict();
624
+ /** Top-level perspectives schema. `.strict()` rejects any unknown key. */
625
+ const PerspectivesSchema = z.object({
626
+ generatedAt: z.string().optional(),
627
+ features: z.array(PerspectiveFeatureSchema)
628
+ }).strict();
629
+ //#endregion
565
630
  //#region src/types.ts
566
631
  const RouteStepSchema = z.object({
567
632
  title: z.string(),
@@ -633,7 +698,7 @@ const DraftIssueSchema = z.object({
633
698
  ]),
634
699
  stepId: z.string().nullable(),
635
700
  message: z.string(),
636
- detail: z.string().optional()
701
+ detail: z.string().nullish()
637
702
  });
638
703
  const DraftReportSchema = z.object({
639
704
  issues: z.array(DraftIssueSchema),
@@ -1205,6 +1270,8 @@ function collectIncludedBlockNames(spec) {
1205
1270
  //#region src/store/index.ts
1206
1271
  const CCQA_DIR = ".ccqa";
1207
1272
  const SPEC_FILE = "spec.yaml";
1273
+ const PERSPECTIVES_FILE = "perspectives.yaml";
1274
+ const PERSPECTIVES_MD_FILE = "perspectives.md";
1208
1275
  function getCcqaDir(cwd = process.cwd()) {
1209
1276
  return join(cwd, CCQA_DIR);
1210
1277
  }
@@ -1250,6 +1317,56 @@ async function saveSpecFile(featureName, specName, content, cwd) {
1250
1317
  await writeFile(specPath, content.endsWith("\n") ? content : content + "\n", "utf-8");
1251
1318
  return specPath;
1252
1319
  }
1320
+ /** Absolute path to the single repo-wide `.ccqa/perspectives.yaml`. */
1321
+ function getPerspectivesPath(cwd) {
1322
+ return join(getCcqaDir(cwd), PERSPECTIVES_FILE);
1323
+ }
1324
+ /**
1325
+ * Read `.ccqa/perspectives.yaml` raw. Returns null when the file does not
1326
+ * exist (first-ever generation) so callers can treat it as optional.
1327
+ */
1328
+ async function tryReadPerspectives(cwd) {
1329
+ return readFile(getPerspectivesPath(cwd), "utf-8").catch(() => null);
1330
+ }
1331
+ /**
1332
+ * Write `.ccqa/perspectives.yaml`. Mirrors `saveSpecFile`: ensures the
1333
+ * directory exists and the content ends in a trailing newline.
1334
+ */
1335
+ async function savePerspectives(content, cwd) {
1336
+ await mkdir(getCcqaDir(cwd), { recursive: true });
1337
+ const path = getPerspectivesPath(cwd);
1338
+ await writeFile(path, content.endsWith("\n") ? content : content + "\n", "utf-8");
1339
+ return path;
1340
+ }
1341
+ /**
1342
+ * Human-readable Markdown companion to perspectives.yaml. The `.yaml` is the
1343
+ * machine-readable source of truth; the `.md` is a rendered view for review.
1344
+ */
1345
+ function getPerspectivesMarkdownPath(cwd) {
1346
+ return join(getCcqaDir(cwd), PERSPECTIVES_MD_FILE);
1347
+ }
1348
+ async function savePerspectivesMarkdown(content, cwd) {
1349
+ await mkdir(getCcqaDir(cwd), { recursive: true });
1350
+ const path = getPerspectivesMarkdownPath(cwd);
1351
+ await writeFile(path, content.endsWith("\n") ? content : content + "\n", "utf-8");
1352
+ return path;
1353
+ }
1354
+ /**
1355
+ * Per-category detail view: `.ccqa/features/<feature>/perspectives.md`. The
1356
+ * root `perspectives.md` is a thin category index that links here; this file
1357
+ * carries the full per-case tables for one feature. The feature dir already
1358
+ * exists (it holds the test cases), but `mkdir -p` keeps this safe when called
1359
+ * in isolation.
1360
+ */
1361
+ function getFeaturePerspectivesMarkdownPath(featureName, cwd) {
1362
+ return join(getFeatureDir(featureName, cwd), PERSPECTIVES_MD_FILE);
1363
+ }
1364
+ async function saveFeaturePerspectivesMarkdown(featureName, content, cwd) {
1365
+ await mkdir(getFeatureDir(featureName, cwd), { recursive: true });
1366
+ const path = getFeaturePerspectivesMarkdownPath(featureName, cwd);
1367
+ await writeFile(path, content.endsWith("\n") ? content : content + "\n", "utf-8");
1368
+ return path;
1369
+ }
1253
1370
  /**
1254
1371
  * Replace (or insert) the `relatedPaths` key in the spec. Preserves every
1255
1372
  * other top-level field and the entire steps array. Returns the absolute
@@ -2188,16 +2305,60 @@ function formatUnstableDrop(drop) {
2188
2305
  return `${`${action.command}${action.assertType ? " " + action.assertType : ""}`}: contains unstable literal (${ids}) — ${samples}`;
2189
2306
  }
2190
2307
  //#endregion
2308
+ //#region src/prompts/language.ts
2309
+ /**
2310
+ * Shared language handling for every Claude-driven command. Each command
2311
+ * writes some human-readable text (drift findings, trace observations, draft
2312
+ * prose, diagnose hints, perspectives summaries), so the language policy is a
2313
+ * single cross-cutting concern rather than per-command logic.
2314
+ *
2315
+ * The value is a BCP-47 tag (e.g. "ja", "en") or the sentinel "auto". With
2316
+ * "auto" the model follows the language of the material it is given — Japanese
2317
+ * specs/codebase yield Japanese output — and `languageDirective` returns an
2318
+ * empty string so prompts stay byte-identical to the no-flag baseline.
2319
+ */
2320
+ const DEFAULT_LANGUAGE = "auto";
2321
+ /**
2322
+ * The instruction appended to a command's system prompt. Empty for "auto"
2323
+ * (and undefined / blank), so the model keeps its natural material-following
2324
+ * behaviour; otherwise it pins every human-readable field to the given tag.
2325
+ */
2326
+ function languageDirective(language) {
2327
+ const lang = (language ?? "auto").trim();
2328
+ if (lang === "" || lang === "auto") return "";
2329
+ return `\n\nIMPORTANT: Write every human-readable field, message, and explanation in **${lang}** (BCP-47 language tag), regardless of the language of the spec or codebase.`;
2330
+ }
2331
+ /**
2332
+ * Whether the CLI's own interactive prompts (the strings ccqa prints itself,
2333
+ * not the model's output) should be Japanese. Only an explicit Japanese tag
2334
+ * (`ja`, `ja-JP`, …) opts in; `auto` (the default) and every other tag keep
2335
+ * the English prompts, so an English user running with no flag is unaffected.
2336
+ */
2337
+ function useJapanesePrompts(language) {
2338
+ return /^ja\b/i.test((language ?? "").trim());
2339
+ }
2340
+ //#endregion
2341
+ //#region src/cli/options.ts
2342
+ /**
2343
+ * Shared `--language` flag. Every Claude-driven command writes some
2344
+ * human-readable text, so language is a cross-cutting concern handled the same
2345
+ * way everywhere — much like `--model`. The value is a BCP-47 tag (e.g. "ja",
2346
+ * "en") or "auto" (default), which follows the language of the material.
2347
+ */
2348
+ function addLanguageOption(command) {
2349
+ return command.option("--language <bcp47>", "Language for human-readable output (e.g. 'en', 'ja'). Default 'auto' follows the language of the spec/codebase.", DEFAULT_LANGUAGE);
2350
+ }
2351
+ //#endregion
2191
2352
  //#region src/cli/trace.ts
2192
2353
  const VALIDATION_MODES = ["lenient", "strict"];
2193
- const traceCommand = new Command("trace").argument("<feature/spec>", "Spec id in '<feature>/<spec>' form (resolves to .ccqa/features/<feature>/test-cases/<spec>/)").description("Run agent-browser, verify assertions, and record structured actions").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").option("--validation-mode <mode>", "Post-trace validation behaviour: 'lenient' (default) tags failing actions with a warning but keeps them; 'strict' drops them from actions.json.", (raw) => {
2354
+ const traceCommand = addLanguageOption(new Command("trace").argument("<feature/spec>", "Spec id in '<feature>/<spec>' form (resolves to .ccqa/features/<feature>/test-cases/<spec>/)").description("Run agent-browser, verify assertions, and record structured actions").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").option("--validation-mode <mode>", "Post-trace validation behaviour: 'lenient' (default) tags failing actions with a warning but keeps them; 'strict' drops them from actions.json.", (raw) => {
2194
2355
  if (VALIDATION_MODES.includes(raw)) return raw;
2195
2356
  throw new Error(`--validation-mode must be one of ${VALIDATION_MODES.join(" | ")}`);
2196
- }, "lenient").action(async (specPath, opts) => {
2357
+ }, "lenient")).action(async (specPath, opts) => {
2197
2358
  const { featureName, specName } = parseSpecPath(specPath);
2198
- await runTrace(featureName, specName, opts.model, opts.validationMode ?? "lenient");
2359
+ await runTrace(featureName, specName, opts.model, opts.validationMode ?? "lenient", opts.language);
2199
2360
  });
2200
- async function runTrace(featureName, specName, model, validationMode = "lenient") {
2361
+ async function runTrace(featureName, specName, model, validationMode = "lenient", language) {
2201
2362
  header("trace", `${featureName}/${specName}`);
2202
2363
  try {
2203
2364
  meta("agent-browser", assertAgentBrowserAvailable());
@@ -2228,7 +2389,7 @@ async function runTrace(featureName, specName, model, validationMode = "lenient"
2228
2389
  });
2229
2390
  const userPrompt = await loadTraceUserPrompt();
2230
2391
  if (userPrompt !== null) meta("user-prompt", ".ccqa/prompts/trace.user.md");
2231
- const systemPrompt = userPrompt === null ? baseSystemPrompt : `${baseSystemPrompt}\n## Project-specific guidance\n\n${userPrompt}\n`;
2392
+ const systemPrompt = (userPrompt === null ? baseSystemPrompt : `${baseSystemPrompt}\n## Project-specific guidance\n\n${userPrompt}\n`) + languageDirective(language);
2232
2393
  const prompt = buildTracePrompt(spec.title);
2233
2394
  info("Running agent-browser session...");
2234
2395
  blank();
@@ -3217,18 +3378,47 @@ function previewDiff(before, after) {
3217
3378
  return out.join("\n");
3218
3379
  }
3219
3380
  //#endregion
3381
+ //#region src/prompts/format.ts
3382
+ /**
3383
+ * Formatting helpers shared by the Claude prompt builders (diagnose, report).
3384
+ * Centralised so the prompts cannot drift apart on mechanics that must stay
3385
+ * consistent across commands.
3386
+ */
3387
+ /** Prefix every line with its 1-based number, the form fix suggestions cite. */
3388
+ function numberLines(script) {
3389
+ return script.split("\n").map((l, i) => `${i + 1}: ${l}`).join("\n");
3390
+ }
3391
+ /**
3392
+ * The "## Output language" prompt section. Empty for "auto" so the prompt
3393
+ * stays byte-identical to the no-flag baseline. `fields` names the
3394
+ * human-readable JSON fields to translate; `verbatimNames` names the
3395
+ * enum-like values that must never be translated.
3396
+ */
3397
+ function outputLanguageBlock(outputLanguage, fields, verbatimNames) {
3398
+ if (outputLanguage === "auto") return "";
3399
+ return `## Output language
3400
+
3401
+ Write all human-readable fields (${fields}) in **${outputLanguage}** (BCP-47 tag).
3402
+ Selectors, file paths, identifiers, ${verbatimNames}, JSON keys, and quoted strings stay verbatim regardless of language.
3403
+
3404
+ `;
3405
+ }
3406
+ //#endregion
3220
3407
  //#region src/diagnose/prompt.ts
3221
3408
  function buildDiagnosePrompt(input) {
3222
- const { script, specYaml, actions, failureLog, pageSnapshot, outputLanguage = "en" } = input;
3223
- const numbered = script.split("\n").map((l, i) => `${i + 1}: ${l}`).join("\n");
3409
+ const { script, specYaml, actions, failureLog, pageSnapshot, outputLanguage = "auto" } = input;
3410
+ const numbered = numberLines(script);
3411
+ const actionsSummary = actions.map((a, i) => {
3412
+ const parts = [`${i + 1}. ${a.command}`];
3413
+ if (a.assertType) parts.push(`assertType="${a.assertType}"`);
3414
+ if (a.selector) parts.push(`selector="${a.selector}"`);
3415
+ if (a.value) parts.push(`value="${a.value}"`);
3416
+ if (a.observation) parts.push(`→ ${a.observation}`);
3417
+ return parts.join(" ");
3418
+ }).join("\n");
3224
3419
  return `You are diagnosing a failing E2E test. The test was generated from a recorded trace of the original interaction. Compare the failing run against the original spec and recorded actions to determine WHY the test failed and what the right fix is.
3225
3420
 
3226
- ## Output language
3227
-
3228
- Write all human-readable fields (\`reasoning\`, \`reason\`) in **${outputLanguage}** (BCP-47 tag).
3229
- Selectors, file paths, identifiers, code, type names (TIMING_ISSUE, etc.), JSON keys, and quoted strings stay verbatim regardless of language.
3230
-
3231
- ## You have read-only filesystem tools
3421
+ ${outputLanguageBlock(outputLanguage, "`reasoning`, `reason`", "code, type names (TIMING_ISSUE, etc.)")}## You have read-only filesystem tools
3232
3422
 
3233
3423
  You can call \`Grep\`, \`Glob\`, and \`Read\` against the current repository before producing the JSON.
3234
3424
 
@@ -3317,14 +3507,7 @@ Pick exactly ONE category. The output JSON must follow the shape for that catego
3317
3507
  ${specYaml}
3318
3508
 
3319
3509
  ## Recorded Actions (actions.json summary)
3320
- ${actions.map((a, i) => {
3321
- const parts = [`${i + 1}. ${a.command}`];
3322
- if (a.assertType) parts.push(`assertType="${a.assertType}"`);
3323
- if (a.selector) parts.push(`selector="${a.selector}"`);
3324
- if (a.value) parts.push(`value="${a.value}"`);
3325
- if (a.observation) parts.push(`→ ${a.observation}`);
3326
- return parts.join(" ");
3327
- }).join("\n")}
3510
+ ${actionsSummary}
3328
3511
 
3329
3512
  ## Test Script (with line numbers)
3330
3513
  ${numbered}
@@ -3901,11 +4084,11 @@ function resolveMode(opts) {
3901
4084
  }
3902
4085
  //#endregion
3903
4086
  //#region src/cli/generate.ts
3904
- const generateCommand = new Command("generate").argument("<feature/spec>", "Spec id in '<feature>/<spec>' form (resolves to .ccqa/features/<feature>/test-cases/<spec>/)").description("Generate agent-browser test script from recorded trace actions. test.spec.ts is regenerated from actions.json on every run; pass --force to overwrite manual edits.").option("--max-retries <n>", "Maximum number of auto-fix retries", "3").option("--auto", "Apply auto-fixes without confirmation regardless of confidence (CI use)").option("--no-interactive", "Never prompt; only auto-apply when confidence is high, otherwise give up").option("--force", "Overwrite an existing test.spec.ts without warning").option("--no-snapshot", "Don't pin AGENT_BROWSER_SESSION / capture page snapshots after a failure (debug toggle)").option("--language <bcp47>", "Language for diagnose reasoning / hint text (e.g. 'en', 'ja')", "en").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").action(async (specPath, opts) => {
4087
+ const generateCommand = addLanguageOption(new Command("generate").argument("<feature/spec>", "Spec id in '<feature>/<spec>' form (resolves to .ccqa/features/<feature>/test-cases/<spec>/)").description("Generate agent-browser test script from recorded trace actions. test.spec.ts is regenerated from actions.json on every run; pass --force to overwrite manual edits.").option("--max-retries <n>", "Maximum number of auto-fix retries", "3").option("--auto", "Apply auto-fixes without confirmation regardless of confidence (CI use)").option("--no-interactive", "Never prompt; only auto-apply when confidence is high, otherwise give up").option("--force", "Overwrite an existing test.spec.ts without warning").option("--no-snapshot", "Don't pin AGENT_BROWSER_SESSION / capture page snapshots after a failure (debug toggle)").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.")).action(async (specPath, opts) => {
3905
4088
  const { featureName, specName } = parseSpecPath(specPath);
3906
4089
  const mode = resolveMode(opts);
3907
4090
  const useSnapshot = opts.snapshot !== false;
3908
- await runGenerate(featureName, specName, parseInt(opts.maxRetries, 10), mode, opts.force ?? false, useSnapshot, opts.language ?? "en", opts.model);
4091
+ await runGenerate(featureName, specName, parseInt(opts.maxRetries, 10), mode, opts.force ?? false, useSnapshot, opts.language ?? "auto", opts.model);
3909
4092
  });
3910
4093
  async function runGenerate(featureName, specName, maxRetries, mode, force, useSnapshot, outputLanguage, model) {
3911
4094
  header("generate", `${featureName}/${specName}`);
@@ -4395,7 +4578,7 @@ const DEFAULT_CONCURRENCY$1 = 3;
4395
4578
  * `cli/run` calls this with just the failing specs after vitest.
4396
4579
  */
4397
4580
  async function analyzeDrift(input) {
4398
- const { targets, cwd, blocks, concurrency = DEFAULT_CONCURRENCY$1, model, onSpecStart } = input;
4581
+ const { targets, cwd, blocks, concurrency = DEFAULT_CONCURRENCY$1, model, language, onSpecStart } = input;
4399
4582
  const results = new Array(targets.length);
4400
4583
  let cursor = 0;
4401
4584
  const worker = async () => {
@@ -4407,7 +4590,8 @@ async function analyzeDrift(input) {
4407
4590
  results[idx] = await checkSpec(target, {
4408
4591
  cwd,
4409
4592
  blocks,
4410
- model
4593
+ model,
4594
+ language
4411
4595
  });
4412
4596
  }
4413
4597
  };
@@ -4426,7 +4610,7 @@ async function checkSpec(target, opts) {
4426
4610
  };
4427
4611
  const { result, isError } = await invokeClaudeStreaming({
4428
4612
  prompt: buildDriftUserPrompt(existing),
4429
- systemPrompt: buildDriftSystemPrompt(opts.blocks),
4613
+ systemPrompt: buildDriftSystemPrompt(opts.blocks) + languageDirective(opts.language),
4430
4614
  allowedTools: [
4431
4615
  "Read",
4432
4616
  "Grep",
@@ -4467,165 +4651,1187 @@ async function checkSpec(target, opts) {
4467
4651
  };
4468
4652
  }
4469
4653
  //#endregion
4470
- //#region src/drift/format.ts
4654
+ //#region src/drift/affected.ts
4655
+ const execFileP = promisify(execFile);
4471
4656
  /**
4472
- * Render drift results as a string. The CLI commands and the `run` failure
4473
- * hook are the only callers; both want the formatted output returned so
4474
- * they can prefix / interleave / pipe it as needed.
4657
+ * Resolve the base ref to diff against for `ccqa drift --changed`.
4658
+ * Precedence: explicit override > GITHUB_BASE_REF > origin/main.
4475
4659
  */
4476
- function renderDrift(results, format, cwd) {
4477
- if (format === "json") return renderJson(results);
4478
- if (format === "github") return renderGithub(results, cwd);
4479
- return renderText(results);
4660
+ function resolveBaseRef(explicit) {
4661
+ if (explicit && explicit.length > 0) return explicit;
4662
+ const ghBase = process.env["GITHUB_BASE_REF"];
4663
+ if (ghBase && ghBase.length > 0) return ghBase.startsWith("origin/") ? ghBase : `origin/${ghBase}`;
4664
+ return "origin/main";
4480
4665
  }
4481
- const HEAVY_RULE = "═".repeat(72);
4482
- function renderText(results) {
4666
+ /**
4667
+ * Run `git diff --name-status base...HEAD` from `cwd` and return one entry per
4668
+ * changed file. Renames are reported under their NEW path with status
4669
+ * "renamed" — the OLD path is dropped because the spec mapping is against the
4670
+ * post-rename layout.
4671
+ *
4672
+ * Paths are re-rooted to be relative to `cwd`, not the git repo root. In a
4673
+ * monorepo where `cwd` is a sub-package (e.g. `apps/foo`), git emits paths
4674
+ * relative to the repo root, but specs declare relatedPaths relative to
4675
+ * their own package. Changes outside `cwd` are dropped so an unrelated PR
4676
+ * can never accidentally scope a sub-package's specs in.
4677
+ */
4678
+ async function getChangedFiles(base, cwd) {
4679
+ const [{ stdout: rootOut }, { stdout: diffOut }] = await Promise.all([execFileP("git", ["rev-parse", "--show-toplevel"], { cwd }), execFileP("git", [
4680
+ "diff",
4681
+ "--name-status",
4682
+ "-M",
4683
+ `${base}...HEAD`
4684
+ ], {
4685
+ cwd,
4686
+ maxBuffer: 32 * 1024 * 1024
4687
+ })]);
4688
+ return rerootChangedFiles(parseGitDiffOutput(diffOut), rootOut.trim(), cwd);
4689
+ }
4690
+ /**
4691
+ * Convert paths in `entries` from git-repo-root relative to `cwd` relative,
4692
+ * dropping anything outside `cwd`. Exported for unit tests.
4693
+ */
4694
+ function rerootChangedFiles(entries, repoRoot, cwd) {
4695
+ const prefix = relative(repoRoot, cwd);
4696
+ if (!prefix) return entries;
4483
4697
  const out = [];
4484
- for (const r of results) {
4485
- out.push("");
4486
- const heading = `══ ${r.target.featureName}/${r.target.specName} `;
4487
- const tail = "═".repeat(Math.max(3, 72 - heading.length));
4488
- out.push(`${heading}${tail}`);
4489
- if (r.error) {
4490
- out.push(` ERROR ${r.error}`);
4698
+ for (const e of entries) {
4699
+ const rel = relative(prefix, e.path);
4700
+ if (rel.startsWith("..") || rel === "") continue;
4701
+ out.push({
4702
+ ...e,
4703
+ path: rel
4704
+ });
4705
+ }
4706
+ return out;
4707
+ }
4708
+ function parseGitDiffOutput(stdout) {
4709
+ const out = [];
4710
+ for (const line of stdout.split("\n")) {
4711
+ if (!line.trim()) continue;
4712
+ const parts = line.split(" ");
4713
+ const code = parts[0];
4714
+ if (!code) continue;
4715
+ if (code.startsWith("R")) {
4716
+ const newPath = parts[2];
4717
+ if (newPath) out.push({
4718
+ path: newPath,
4719
+ status: "renamed"
4720
+ });
4491
4721
  continue;
4492
4722
  }
4493
- const errors = r.issues.filter((i) => i.severity === "ERROR");
4494
- const warnings = r.issues.filter((i) => i.severity === "WARN");
4495
- const passed = r.issues.filter((i) => i.severity === "OK");
4496
- if (errors.length === 0 && warnings.length === 0) {
4497
- const label = passed.length === 1 ? "check" : "checks";
4498
- const detail = passed.length > 0 ? `all ${passed.length} ${label} passed` : "no issues";
4499
- out.push(` ✓ ${detail}`);
4723
+ if (code.startsWith("C")) {
4724
+ const newPath = parts[2];
4725
+ if (newPath) out.push({
4726
+ path: newPath,
4727
+ status: "added"
4728
+ });
4500
4729
  continue;
4501
4730
  }
4502
- for (const issue of errors) appendFinding(out, "ERROR", issue);
4503
- for (const issue of warnings) appendFinding(out, "WARN", issue);
4504
- if (passed.length > 0) {
4505
- const names = passed.map((i) => DRAFT_CATEGORY_LABEL[i.category]).join(", ");
4506
- out.push("");
4507
- out.push(` ✓ passed (${passed.length}): ${names}`);
4731
+ const path = parts[1];
4732
+ if (!path) continue;
4733
+ switch (code[0]) {
4734
+ case "A":
4735
+ out.push({
4736
+ path,
4737
+ status: "added"
4738
+ });
4739
+ break;
4740
+ case "M":
4741
+ case "T":
4742
+ out.push({
4743
+ path,
4744
+ status: "modified"
4745
+ });
4746
+ break;
4747
+ case "D":
4748
+ out.push({
4749
+ path,
4750
+ status: "deleted"
4751
+ });
4752
+ break;
4753
+ default: out.push({
4754
+ path,
4755
+ status: "modified"
4756
+ });
4508
4757
  }
4509
4758
  }
4510
- out.push("");
4511
- out.push(HEAVY_RULE);
4512
- const totals = summarize(results);
4513
- out.push(` specs ${results.length} (${totals.errored} errored)`);
4514
- out.push(` findings ${totals.error} error, ${totals.warn} warn, ${totals.ok} ok`);
4515
- out.push("");
4516
- return out.join("\n");
4759
+ return out;
4517
4760
  }
4518
- function appendFinding(out, level, issue) {
4519
- const stepPart = issue.stepId ? ` ${issue.stepId}` : "";
4520
- out.push("");
4521
- out.push(` ${level} ${DRAFT_CATEGORY_LABEL[issue.category]}${stepPart}`);
4522
- out.push(` ${issue.message}`);
4523
- if (issue.detail) out.push(` └ ${issue.detail.replace(/\n/g, "\n ")}`);
4761
+ function stripLeadingDotSlash(s) {
4762
+ return s.startsWith("./") ? s.slice(2) : s;
4524
4763
  }
4525
- function renderJson(results) {
4526
- const payload = { specs: results.map((r) => ({
4527
- feature: r.target.featureName,
4528
- spec: r.target.specName,
4529
- ok: r.ok,
4530
- ...r.error ? { error: r.error } : {},
4531
- issues: r.issues.map((i) => ({
4532
- severity: i.severity,
4533
- category: i.category,
4534
- stepId: i.stepId,
4535
- message: i.message,
4536
- ...i.detail ? { detail: i.detail } : {}
4537
- }))
4538
- })) };
4539
- return `${JSON.stringify(payload, null, 2)}\n`;
4764
+ const REGEX_CACHE = /* @__PURE__ */ new Map();
4765
+ /** Compiles `pattern` to a RegExp, memoized so repeated `--changed` matches don't re-build. */
4766
+ function compileGlob(pattern) {
4767
+ const cached = REGEX_CACHE.get(pattern);
4768
+ if (cached) return cached;
4769
+ const compiled = globToRegExp(stripLeadingDotSlash(pattern));
4770
+ REGEX_CACHE.set(pattern, compiled);
4771
+ return compiled;
4540
4772
  }
4541
- function renderGithub(results, cwd) {
4542
- const repoRoot = process.env["GITHUB_WORKSPACE"] ?? process.cwd();
4543
- const lines = [];
4544
- for (const r of results) {
4545
- const file = githubRelPath(cwd, repoRoot, r.target.featureName, r.target.specName);
4546
- if (r.error) {
4547
- lines.push(`::error file=${file}::${escapeGhMessage(r.error)}`);
4548
- continue;
4549
- }
4550
- for (const issue of r.issues) {
4551
- if (issue.severity === "OK") continue;
4552
- const level = issue.severity === "ERROR" ? "error" : "warning";
4553
- const title = `${r.target.featureName}/${r.target.specName} — ${issue.category}${issue.stepId ? ` (${issue.stepId})` : ""}`;
4554
- const body = issue.detail ? `${issue.message}\n${issue.detail}` : issue.message;
4555
- lines.push(`::${level} file=${file},title=${escapeGhProp(title)}::${escapeGhMessage(body)}`);
4773
+ function globToRegExp(pattern) {
4774
+ let re = "^";
4775
+ let i = 0;
4776
+ while (i < pattern.length) {
4777
+ const ch = pattern[i];
4778
+ if (ch === "?") {
4779
+ re += "[^/]";
4780
+ i++;
4781
+ continue;
4556
4782
  }
4783
+ if (ch !== "*") {
4784
+ re += /[.+^${}()|[\]\\]/.test(ch) ? "\\" + ch : ch;
4785
+ i++;
4786
+ continue;
4787
+ }
4788
+ if (pattern[i + 1] !== "*") {
4789
+ re += "[^/]*";
4790
+ i++;
4791
+ continue;
4792
+ }
4793
+ const hasLeadingSlash = re.endsWith("/");
4794
+ const hasTrailingSlash = pattern[i + 2] === "/";
4795
+ if (hasLeadingSlash) re = re.slice(0, -1);
4796
+ if (hasLeadingSlash || hasTrailingSlash) re += "(?:/?.*)?";
4797
+ else re += ".*";
4798
+ i += hasTrailingSlash ? 3 : 2;
4557
4799
  }
4558
- return lines.length === 0 ? "" : `${lines.join("\n")}\n`;
4559
- }
4560
- function githubRelPath(cwd, repoRoot, featureName, specName) {
4561
- const abs = resolve(cwd, ".ccqa", "features", featureName, "test-cases", specName, "spec.yaml");
4562
- const rel = relative(repoRoot, abs);
4563
- return rel.startsWith("..") ? abs : rel;
4564
- }
4565
- function escapeGhMessage(s) {
4566
- return s.replace(/%/g, "%25").replace(/\r/g, "%0D").replace(/\n/g, "%0A");
4567
- }
4568
- function escapeGhProp(s) {
4569
- return s.replace(/%/g, "%25").replace(/\r/g, "%0D").replace(/\n/g, "%0A").replace(/,/g, "%2C").replace(/:/g, "%3A");
4570
- }
4571
- function summarize(results) {
4572
- let error = 0;
4573
- let warn = 0;
4574
- let ok = 0;
4575
- let errored = 0;
4576
- for (const r of results) {
4577
- if (r.error) errored++;
4578
- for (const issue of r.issues) if (issue.severity === "ERROR") error++;
4579
- else if (issue.severity === "WARN") warn++;
4580
- else ok++;
4581
- }
4582
- return {
4583
- error,
4584
- warn,
4585
- ok,
4586
- errored
4587
- };
4800
+ return new RegExp(re + "$");
4588
4801
  }
4589
- //#endregion
4590
- //#region src/drift/exit-code.ts
4591
4802
  /**
4592
- * Map drift results to an exit code. Spec-level errors (Claude call failed)
4593
- * always fail; otherwise ERROR severity always fails, WARN fails only when
4594
- * the threshold is `warn`.
4803
+ * Returns true if `changedPath` is covered by any of `relatedPaths`. An empty
4804
+ * `relatedPaths` returns false callers handle the "unscoped spec" case
4805
+ * separately (treat the spec as always-affected) before calling this.
4595
4806
  */
4596
- function determineExitCode(results, threshold) {
4597
- for (const r of results) {
4598
- if (r.error) return 1;
4599
- for (const issue of r.issues) {
4600
- if (issue.severity === "ERROR") return 1;
4601
- if (threshold === "warn" && issue.severity === "WARN") return 1;
4602
- }
4603
- }
4604
- return 0;
4807
+ function isPathAffectedBy(changedPath, relatedPaths) {
4808
+ const stripped = stripLeadingDotSlash(changedPath);
4809
+ for (const pattern of relatedPaths) if (compileGlob(pattern).test(stripped)) return true;
4810
+ return false;
4605
4811
  }
4606
4812
  //#endregion
4607
4813
  //#region src/drift/auth.ts
4608
4814
  /**
4609
4815
  * Probe whether the host has any credential the Anthropic SDK can pick up:
4610
4816
  * 1. ANTHROPIC_API_KEY env var (CI / scripted use)
4611
- * 2. ~/.claude/.credentials.json (local Claude Code login)
4817
+ * 2. ~/.claude/.credentials.json (Claude Code login, file-based platforms)
4818
+ * 3. macOS Keychain item "Claude Code-credentials" (Claude Code login on
4819
+ * darwin stores the OAuth credentials in the Keychain, not on disk)
4612
4820
  *
4613
- * `run --drift` is opt-in, so the caller will only consult this after the
4614
- * user has asked for drift. We never throw — auth absence is a normal flow
4615
- * that surfaces as "drift analysis skipped".
4821
+ * Claude-driven hooks are opt-in, so the caller only consults this after the
4822
+ * user has asked for analysis. We never throw — auth absence is a normal flow
4823
+ * that surfaces as "analysis skipped".
4616
4824
  */
4617
4825
  function driftAuthAvailable() {
4618
4826
  const key = process.env["ANTHROPIC_API_KEY"];
4619
4827
  if (typeof key === "string" && key.length > 0) return { ok: true };
4620
4828
  if (existsSync(join(homedir(), ".claude", ".credentials.json"))) return { ok: true };
4829
+ if (process.platform === "darwin" && keychainHasClaudeCredentials()) return { ok: true };
4621
4830
  return {
4622
4831
  ok: false,
4623
4832
  reason: "no ANTHROPIC_API_KEY / claude login"
4624
4833
  };
4625
4834
  }
4835
+ /**
4836
+ * `security find-generic-password` without `-w` only checks the item's
4837
+ * existence (exit 0) — it never reads the secret, so no Keychain unlock
4838
+ * prompt is triggered. Resolved via PATH so tests can stub the binary.
4839
+ */
4840
+ function keychainHasClaudeCredentials() {
4841
+ try {
4842
+ return spawnSync("security", [
4843
+ "find-generic-password",
4844
+ "-s",
4845
+ "Claude Code-credentials"
4846
+ ], {
4847
+ stdio: "ignore",
4848
+ timeout: 3e3
4849
+ }).status === 0;
4850
+ } catch {
4851
+ return false;
4852
+ }
4853
+ }
4854
+ //#endregion
4855
+ //#region src/report/prompt.ts
4856
+ function buildFailureAnalysisPrompt(input) {
4857
+ const { script, specYaml, failureLog, diffPatch, changedFiles, baseRef, driftIssues, outputLanguage = "auto" } = input;
4858
+ const numbered = numberLines(script);
4859
+ return `You are analyzing a failing E2E regression test right after a source change landed. Your job is a root-cause CALL, not a fix: decide which of three categories explains the failure, using the source diff as your primary context.
4860
+
4861
+ ${outputLanguageBlock(outputLanguage, "`reasoning`, `detail`", "label names (TEST_DRIFT, etc.)")}## The three categories
4862
+
4863
+ The question that separates them: **is the behavior the spec describes still what the product intends?**
4864
+
4865
+ 1. TEST_DRIFT — what the spec verifies is unchanged; only the test code drifted from the source. Typical: a selector/aria-label/placeholder rename, a timing change, an over-tight assertion. The diff shows a change that is invisible to the user's intent but visible to the test.
4866
+ 2. SPEC_CHANGE — the thing being verified itself changed: the UI flow, the layout, the feature's intended behavior. The diff deliberately changes what the spec asserts. You MUST cite the diff hunk (file + what changed) as evidence for this label.
4867
+ 3. PRODUCT_BUG — neither of the above: the failure is not explained by the diff nor by test staleness. The product regressed.
4868
+
4869
+ If the evidence is too weak to choose, answer UNKNOWN — a wrong confident call is worse than an honest UNKNOWN, because humans grade these predictions to measure accuracy.
4870
+
4871
+ ## You have read-only filesystem tools
4872
+
4873
+ You can call \`Grep\`, \`Glob\`, and \`Read\` against the current repository (post-change state) before producing the JSON. Use them to:
4874
+ - confirm a suspected selector rename (grep for \`aria-label=\`, \`placeholder=\`, \`data-testid\`, i18n strings),
4875
+ - read the changed files in full when the truncated patch is not enough,
4876
+ - check whether the element/flow the spec describes still exists in the source.
4877
+
4878
+ You have **up to 12 tool turns**. Do NOT write, edit, run shell commands, or hit the network.
4879
+
4880
+ ## Decision guidance
4881
+
4882
+ - Diff touches only attributes/identifiers the test selects on (labels, testids, class names, timing) while the user-visible flow is intact → TEST_DRIFT.
4883
+ - Diff intentionally removes/reworks the UI or flow that a spec step verifies (component deleted, page restructured, copy redefined, feature flag flipped) → SPEC_CHANGE.
4884
+ - Diff UNINTENTIONALLY breaks behavior the spec still intends — e.g. a refactor that drops a side effect, an inverted condition, a regression hiding inside a cleanup commit — → PRODUCT_BUG, citing the diff hunk as evidence. A product bug is often introduced BY the diff; what separates it from SPEC_CHANGE is intent: does the change read as a deliberate redesign of what the spec verifies, or as collateral damage?
4885
+ - Diff is unrelated to the failing step (or there is no relevant diff) and the test was passing before → lean PRODUCT_BUG; first rule out timing/data flakiness and infrastructure errors (daemon not running, network down, missing credentials) — those read as UNKNOWN with low confidence, not PRODUCT_BUG.
4886
+ - The drift audit findings (when present) flag spec↔code mismatches; an ERROR there usually supports TEST_DRIFT or SPEC_CHANGE over PRODUCT_BUG.
4887
+
4888
+ ## Sub-diagnosis vocabulary
4889
+
4890
+ Alongside the label, report the closest fine-grained mechanic:
4891
+ - SELECTOR_DRIFT, TIMING_ISSUE, OVER_ASSERTION — usually under TEST_DRIFT
4892
+ - DATA_MISSING — missing test data/state; usually UNKNOWN or PRODUCT_BUG depending on cause
4893
+ - NONE — when nothing fits (typical for SPEC_CHANGE and PRODUCT_BUG)
4894
+
4895
+ ## Output
4896
+
4897
+ Your **final** assistant message must start with \`{\` and end with \`}\` — a single JSON object, nothing before or after. No prose preamble, no markdown fences, no tool calls in the same turn.
4898
+
4899
+ {
4900
+ "label": "TEST_DRIFT" | "SPEC_CHANGE" | "PRODUCT_BUG" | "UNKNOWN",
4901
+ "confidence": <0.0-1.0>,
4902
+ "subDiagnosis": "SELECTOR_DRIFT" | "TIMING_ISSUE" | "OVER_ASSERTION" | "DATA_MISSING" | "NONE",
4903
+ "evidence": [
4904
+ { "file": "<file:line or diff hunk reference, omit if log-only>", "detail": "<what this shows>" }
4905
+ ],
4906
+ "reasoning": "<why this label, citing the evidence>"
4907
+ }
4908
+
4909
+ ## Confidence guidance
4910
+
4911
+ - 0.9-1.0: the diff (or a file you read) directly shows the cause
4912
+ - 0.7-0.9: strong indirect evidence
4913
+ - 0.4-0.7: plausible but another category could explain it
4914
+ - < 0.4: answer UNKNOWN instead of guessing
4915
+
4916
+ Evidence rules: TEST_DRIFT and SPEC_CHANGE require at least one concrete \`file\` reference (diff hunk or file:line you actually read). PRODUCT_BUG should explain why the diff does NOT account for the failure.
4917
+
4918
+ ## Test Spec (spec.yaml)
4919
+ ${specYaml}
4920
+
4921
+ ## Test Script (with line numbers)
4922
+ ${numbered}
4923
+
4924
+ ${diffPatch ? `## Source changes since ${baseRef ?? "base"} (git diff, may be truncated)
4925
+
4926
+ ### Changed files (name-status)
4927
+ ${changedFiles ?? "(unavailable)"}
4928
+
4929
+ ### Patch
4930
+ \`\`\`diff
4931
+ ${diffPatch}
4932
+ \`\`\`
4933
+ ` : `## Source changes
4934
+
4935
+ No diff context is available (the base ref could not be resolved, or there are no changes). Classify from the failure log, the spec, and what you can read in the repository — and be correspondingly more conservative: prefer UNKNOWN over a confident SPEC_CHANGE/PRODUCT_BUG call without diff evidence.
4936
+ `}
4937
+ ${driftIssues && driftIssues.length > 0 ? `## Spec↔code drift audit findings
4938
+
4939
+ A separate read-only audit compared the spec against the current source. Treat these as hints, not verdicts:
4940
+
4941
+ ${driftIssues.map((i) => `- [${i.severity}] (${DRAFT_CATEGORY_LABEL[i.category]}${i.stepId ? `, step ${i.stepId}` : ""}) ${i.message}${i.detail ? ` — ${i.detail}` : ""}`).join("\n")}
4942
+ ` : ""}## Failure Log
4943
+ ${failureLog.slice(0, 8e3)}`;
4944
+ }
4945
+ //#endregion
4946
+ //#region src/diagnose/types.ts
4947
+ /**
4948
+ * The concrete (fixable) diagnosis tags as a value, for consumers that need
4949
+ * to enumerate them (e.g. the run report's subDiagnosis vocabulary). The
4950
+ * `satisfies` clause makes renaming a union member without updating this
4951
+ * list a compile error.
4952
+ */
4953
+ const FIXABLE_DIAGNOSIS_TYPES = [
4954
+ "SELECTOR_DRIFT",
4955
+ "TIMING_ISSUE",
4956
+ "OVER_ASSERTION",
4957
+ "DATA_MISSING"
4958
+ ];
4959
+ //#endregion
4960
+ //#region src/report/schema.ts
4961
+ /**
4962
+ * The three-way root-cause call for a failing spec, framed as drift analysis:
4963
+ * - TEST_DRIFT: what the spec verifies is unchanged; only the test code
4964
+ * drifted from the source (selector rename, timing, ...).
4965
+ * Future iterations may auto-fix these.
4966
+ * - SPEC_CHANGE: the thing being verified itself changed (UI redesign,
4967
+ * spec change). Never auto-fix — a human must re-draft.
4968
+ * - PRODUCT_BUG: neither of the above explains the failure — treat it as
4969
+ * a product regression.
4970
+ *
4971
+ * The stakeholder ask behind this module is measurement-first: the call is
4972
+ * known to be hard, so every prediction is embedded in the HTML report where
4973
+ * a human records the ground truth and the report computes the confusion
4974
+ * matrix client-side. Accuracy may start low; it must be *visible*.
4975
+ */
4976
+ const FAILURE_LABELS = [
4977
+ "TEST_DRIFT",
4978
+ "SPEC_CHANGE",
4979
+ "PRODUCT_BUG"
4980
+ ];
4981
+ const FailureLabelSchema = z.enum(FAILURE_LABELS);
4982
+ /** What the model may answer: the three labels, or UNKNOWN when evidence is weak. */
4983
+ const PREDICTED_LABELS = [...FAILURE_LABELS, "UNKNOWN"];
4984
+ const PredictedLabelSchema = z.enum(PREDICTED_LABELS);
4985
+ const SUB_DIAGNOSES = [...FIXABLE_DIAGNOSIS_TYPES, "NONE"];
4986
+ const FailureEvidenceSchema = z.object({
4987
+ file: z.string().optional(),
4988
+ detail: z.string()
4989
+ });
4990
+ /**
4991
+ * LLM output shape. Deliberately NOT .strict(): the model occasionally adds
4992
+ * keys, and rejecting the whole analysis over an extra field would collapse
4993
+ * a usable prediction into UNKNOWN. Zod's default strips unknown keys.
4994
+ */
4995
+ const FailureAnalysisSchema = z.object({
4996
+ label: PredictedLabelSchema,
4997
+ confidence: z.number().min(0).max(1),
4998
+ subDiagnosis: z.enum(SUB_DIAGNOSES).optional(),
4999
+ evidence: z.array(FailureEvidenceSchema),
5000
+ reasoning: z.string()
5001
+ });
5002
+ const ReportAssertionSchema = z.object({
5003
+ name: z.string(),
5004
+ status: z.enum([
5005
+ "passed",
5006
+ "failed",
5007
+ "skipped"
5008
+ ]),
5009
+ durationMs: z.number().nullable()
5010
+ });
5011
+ const ReportSpecResultSchema = z.object({
5012
+ feature: z.string(),
5013
+ spec: z.string(),
5014
+ status: z.enum(["passed", "failed"]),
5015
+ testCounts: z.object({
5016
+ total: z.number(),
5017
+ passed: z.number(),
5018
+ failed: z.number()
5019
+ }).nullable(),
5020
+ durationMs: z.number().nullable(),
5021
+ assertions: z.array(ReportAssertionSchema).nullable(),
5022
+ analysis: FailureAnalysisSchema.nullable(),
5023
+ analysisSkipped: z.string().nullable(),
5024
+ driftIssues: z.array(DraftIssueSchema).nullable(),
5025
+ failureLogExcerpt: z.string().nullable(),
5026
+ diffExcerpt: z.string().nullable(),
5027
+ specYaml: z.string().nullable()
5028
+ });
5029
+ z.object({
5030
+ schemaVersion: z.literal(1),
5031
+ createdAt: z.string(),
5032
+ runId: z.string().nullable(),
5033
+ git: z.object({
5034
+ head: z.string().nullable(),
5035
+ base: z.string().nullable()
5036
+ }),
5037
+ model: z.string().nullable(),
5038
+ promptVersion: z.string(),
5039
+ results: z.array(ReportSpecResultSchema)
5040
+ });
5041
+ /** Shape of the "export labels" download produced by the report's client-side JS. */
5042
+ const LabelEntrySchema = z.object({
5043
+ feature: z.string(),
5044
+ spec: z.string(),
5045
+ predicted: PredictedLabelSchema,
5046
+ label: FailureLabelSchema,
5047
+ note: z.string().optional()
5048
+ });
5049
+ z.object({
5050
+ schemaVersion: z.literal(1),
5051
+ runId: z.string().nullable(),
5052
+ promptVersion: z.string(),
5053
+ exportedAt: z.string(),
5054
+ labels: z.array(LabelEntrySchema)
5055
+ });
5056
+ //#endregion
5057
+ //#region src/report/analyze.ts
5058
+ /**
5059
+ * Classify one failing spec into TEST_DRIFT / SPEC_CHANGE / PRODUCT_BUG /
5060
+ * UNKNOWN. Same resilience contract as diagnose(): read-only tools, JSON-only
5061
+ * final message, and any parse failure degrades to UNKNOWN with confidence 0
5062
+ * rather than throwing — the report must always render.
5063
+ */
5064
+ async function analyzeFailure(input, options = {}) {
5065
+ const { result: raw, isError } = await invokeClaudeStreaming({
5066
+ prompt: buildFailureAnalysisPrompt(input),
5067
+ allowedTools: [
5068
+ "Read",
5069
+ "Grep",
5070
+ "Glob"
5071
+ ],
5072
+ silenceBashLog: true,
5073
+ maxTurns: 12,
5074
+ ...options.model ? { model: options.model } : {},
5075
+ ...options.cwd ? { cwd: options.cwd } : {}
5076
+ }, () => {});
5077
+ if (isError || !raw) return {
5078
+ analysis: unknownAnalysis(isError ? "Claude returned an error result" : "Claude returned no output"),
5079
+ raw: raw ?? "",
5080
+ sdkError: isError
5081
+ };
5082
+ for (const candidate of extractJsonCandidates(raw)) {
5083
+ let parsed;
5084
+ try {
5085
+ parsed = JSON.parse(candidate);
5086
+ } catch {
5087
+ continue;
5088
+ }
5089
+ const normalised = normaliseFailureAnalysis(parsed);
5090
+ if (normalised) return {
5091
+ analysis: normalised,
5092
+ raw,
5093
+ sdkError: false
5094
+ };
5095
+ }
5096
+ return {
5097
+ analysis: unknownAnalysis(`analysis returned no parseable JSON: ${truncate$2(raw, 500)}`),
5098
+ raw,
5099
+ sdkError: false
5100
+ };
5101
+ }
5102
+ function unknownAnalysis(reasoning) {
5103
+ return {
5104
+ label: "UNKNOWN",
5105
+ confidence: 0,
5106
+ subDiagnosis: "NONE",
5107
+ evidence: [],
5108
+ reasoning
5109
+ };
5110
+ }
5111
+ const LABELS = new Set(PREDICTED_LABELS);
5112
+ const SUB_SET = new Set(SUB_DIAGNOSES);
5113
+ /**
5114
+ * Manual, lenient normalisation (mirrors diagnose's normaliseResult): a
5115
+ * missing/extra field should degrade gracefully, not reject the whole
5116
+ * prediction — only an unrecognisable label makes the candidate unusable.
5117
+ */
5118
+ function normaliseFailureAnalysis(parsed) {
5119
+ if (!isObject(parsed)) return null;
5120
+ const label = parsed["label"];
5121
+ if (typeof label !== "string" || !LABELS.has(label)) return null;
5122
+ const confidence = typeof parsed["confidence"] === "number" ? clamp(parsed["confidence"], 0, 1) : 0;
5123
+ const reasoning = typeof parsed["reasoning"] === "string" ? parsed["reasoning"] : "";
5124
+ const rawSub = parsed["subDiagnosis"];
5125
+ const subDiagnosis = typeof rawSub === "string" && SUB_SET.has(rawSub) ? rawSub : "NONE";
5126
+ const evidence = [];
5127
+ if (Array.isArray(parsed["evidence"])) for (const item of parsed["evidence"]) {
5128
+ if (!isObject(item)) continue;
5129
+ const detail = typeof item["detail"] === "string" ? item["detail"] : null;
5130
+ if (detail === null) continue;
5131
+ const file = typeof item["file"] === "string" ? item["file"] : void 0;
5132
+ evidence.push(file !== void 0 ? {
5133
+ file,
5134
+ detail
5135
+ } : { detail });
5136
+ }
5137
+ return {
5138
+ label,
5139
+ confidence,
5140
+ subDiagnosis,
5141
+ evidence,
5142
+ reasoning
5143
+ };
5144
+ }
5145
+ /**
5146
+ * Capture the PR diff used as context for failure analysis. `--relative`
5147
+ * re-roots paths to `cwd` and drops changes outside it, matching how
5148
+ * relatedPaths are declared in a monorepo sub-package.
5149
+ *
5150
+ * Errors (unknown base ref, not a git repo, ...) are returned, not thrown:
5151
+ * the report is still worth generating without diff context.
5152
+ */
5153
+ async function capturePrDiff(base, cwd) {
5154
+ try {
5155
+ const [{ stdout: head }, { stdout: patch }, { stdout: nameStatus }] = await Promise.all([
5156
+ execFileP("git", [
5157
+ "rev-parse",
5158
+ "--short",
5159
+ "HEAD"
5160
+ ], { cwd }),
5161
+ execFileP("git", [
5162
+ "diff",
5163
+ "-M",
5164
+ "--relative",
5165
+ `${base}...HEAD`
5166
+ ], {
5167
+ cwd,
5168
+ maxBuffer: 64 * 1024 * 1024
5169
+ }),
5170
+ execFileP("git", [
5171
+ "diff",
5172
+ "--name-status",
5173
+ "-M",
5174
+ "--relative",
5175
+ `${base}...HEAD`
5176
+ ], {
5177
+ cwd,
5178
+ maxBuffer: 32 * 1024 * 1024
5179
+ })
5180
+ ]);
5181
+ return {
5182
+ ok: true,
5183
+ diff: {
5184
+ patch,
5185
+ nameStatus: nameStatus.trim(),
5186
+ head: head.trim()
5187
+ }
5188
+ };
5189
+ } catch (e) {
5190
+ return {
5191
+ ok: false,
5192
+ error: e.message.split("\n")[0] ?? "git diff failed"
5193
+ };
5194
+ }
5195
+ }
5196
+ /**
5197
+ * Split a unified diff into per-file sections on `diff --git` boundaries.
5198
+ * The path is taken from the `b/` side so renames/edits key on the
5199
+ * post-change layout — the same side relatedPaths are written against.
5200
+ */
5201
+ const DIFF_HEADER = /^diff --git a\/(.+) b\/(.+)$/;
5202
+ function splitPatchByFile(patch) {
5203
+ const sections = [];
5204
+ const lines = patch.split("\n");
5205
+ let current = null;
5206
+ const flush = () => {
5207
+ if (current) sections.push({
5208
+ path: current.path,
5209
+ body: current.lines.join("\n")
5210
+ });
5211
+ current = null;
5212
+ };
5213
+ for (const line of lines) {
5214
+ const m = DIFF_HEADER.exec(line);
5215
+ if (m) {
5216
+ flush();
5217
+ current = {
5218
+ path: m[2],
5219
+ lines: [line]
5220
+ };
5221
+ } else if (current) current.lines.push(line);
5222
+ }
5223
+ flush();
5224
+ return sections;
5225
+ }
5226
+ /**
5227
+ * Scope a full patch down to the files a spec depends on, then truncate so
5228
+ * the analysis prompt stays bounded. `relatedPaths` null/empty means the
5229
+ * spec is unscoped — keep the whole patch (still truncated). Callers scoping
5230
+ * the same patch for many specs can pass pre-split sections instead.
5231
+ */
5232
+ function scopePatchForSpec(patch, relatedPaths, caps = {}) {
5233
+ const perFile = caps.perFile ?? 8192;
5234
+ const total = caps.total ?? 49152;
5235
+ let sections = typeof patch === "string" ? splitPatchByFile(patch) : patch;
5236
+ if (relatedPaths && relatedPaths.length > 0) {
5237
+ const scoped = sections.filter((s) => isPathAffectedBy(s.path, relatedPaths));
5238
+ if (scoped.length > 0) sections = scoped;
5239
+ }
5240
+ const parts = [];
5241
+ let used = 0;
5242
+ let droppedFiles = 0;
5243
+ for (const s of sections) {
5244
+ if (used >= total) {
5245
+ droppedFiles++;
5246
+ continue;
5247
+ }
5248
+ let body = s.body;
5249
+ if (body.length > perFile) body = `${body.slice(0, perFile)}\n[truncated: ${body.length - perFile} more chars of ${s.path}]`;
5250
+ if (used + body.length > total) body = `${body.slice(0, total - used)}\n[truncated: total patch cap reached]`;
5251
+ parts.push(body);
5252
+ used += body.length;
5253
+ }
5254
+ if (droppedFiles > 0) parts.push(`[truncated: ${droppedFiles} more changed file(s) omitted]`);
5255
+ return parts.join("\n");
5256
+ }
5257
+ //#endregion
5258
+ //#region src/report/render.ts
5259
+ /**
5260
+ * Render the run report as ONE self-contained HTML file (inline CSS/JS, no
5261
+ * network). It is meant to be uploaded as a CI artifact like Playwright's
5262
+ * HTML report and opened locally; the layout deliberately mirrors that
5263
+ * report's conventions — header stats that double as filters, a search box,
5264
+ * collapsible per-spec cards with a step list and durations, automatic
5265
+ * light/dark theme.
5266
+ *
5267
+ * The measurement loop lives client-side: each analyzed failure gets
5268
+ * ground-truth radio buttons, and a vanilla-JS block recomputes accuracy /
5269
+ * confusion matrix / per-class precision-recall on every change. Labels
5270
+ * persist in localStorage and can be exported/imported as JSON
5271
+ * (LabelsExportSchema) so the grading work survives the browser session.
5272
+ */
5273
+ function renderRunReport(data) {
5274
+ const failed = data.results.filter((r) => r.status === "failed");
5275
+ const analyzed = failed.filter((r) => r.analysis !== null);
5276
+ const passedCount = data.results.length - failed.length;
5277
+ const totalDuration = data.results.reduce((sum, r) => sum + (r.durationMs ?? 0), 0);
5278
+ const dataJson = JSON.stringify(data).replace(/</g, "\\u003c");
5279
+ return `<!DOCTYPE html>
5280
+ <html lang="en">
5281
+ <head>
5282
+ <meta charset="utf-8">
5283
+ <meta name="viewport" content="width=device-width, initial-scale=1">
5284
+ <title>ccqa run report</title>
5285
+ <style>${CSS}</style>
5286
+ </head>
5287
+ <body>
5288
+ <header>
5289
+ <div class="header-inner">
5290
+ <div class="header-top">
5291
+ <h1>ccqa run report</h1>
5292
+ <div class="meta">
5293
+ <span title="generated at">${esc(formatDate(data.createdAt))}</span>
5294
+ ${totalDuration > 0 ? `<span>${formatDuration$1(totalDuration)}</span>` : ""}
5295
+ ${data.runId ? `<span>CI run ${esc(data.runId)}</span>` : ""}
5296
+ ${data.git.head ? `<span><code>${esc(data.git.head)}</code>${data.git.base ? ` vs <code>${esc(data.git.base)}</code>` : ""}</span>` : ""}
5297
+ <span class="dim">prompt v${esc(data.promptVersion)}</span>
5298
+ </div>
5299
+ </div>
5300
+ <div class="toolbar">
5301
+ <div class="chips" id="filter-chips">
5302
+ <button type="button" class="chip active" data-filter="all">All <span class="count">${data.results.length}</span></button>
5303
+ <button type="button" class="chip chip-pass" data-filter="passed">${passedCount} passed</button>
5304
+ <button type="button" class="chip chip-fail" data-filter="failed">${failed.length} failed</button>
5305
+ </div>
5306
+ <input type="search" id="search" placeholder="Filter by name…" autocomplete="off">
5307
+ </div>
5308
+ </div>
5309
+ </header>
5310
+
5311
+ <div class="page">
5312
+ ${analyzed.length > 0 ? metricsPanel() : ""}
5313
+
5314
+ <main id="spec-list">
5315
+ ${data.results.map((r, i) => renderResult(r, i)).join("\n")}
5316
+ </main>
5317
+ <p class="empty-note" id="no-match" hidden>No specs match the current filter.</p>
5318
+ </div>
5319
+
5320
+ <script type="application/json" id="ccqa-report-data">${dataJson}<\/script>
5321
+ <script>${CLIENT_JS}<\/script>
5322
+ </body>
5323
+ </html>
5324
+ `;
5325
+ }
5326
+ function metricsPanel() {
5327
+ return `<section class="panel" id="measure-panel">
5328
+ <div class="panel-head">
5329
+ <h2>Prediction accuracy</h2>
5330
+ <div class="measure-actions">
5331
+ <button type="button" id="export-labels">Export labels (JSON)</button>
5332
+ <label class="import-label">Import labels<input type="file" id="import-labels" accept="application/json"></label>
5333
+ </div>
5334
+ </div>
5335
+ <p class="hint">Grade each failed case below with its true cause; the matrix updates live. Labels are saved in this browser (localStorage) — export them to keep or merge.</p>
5336
+ <div id="metrics"></div>
5337
+ </section>`;
5338
+ }
5339
+ function renderResult(r, index) {
5340
+ const id = `${r.feature}/${r.spec}`;
5341
+ const duration = r.durationMs != null && r.durationMs > 0 ? `<span class="duration">${formatDuration$1(r.durationMs)}</span>` : "";
5342
+ const counts = r.testCounts ? `<span class="counts">${r.testCounts.passed}/${r.testCounts.total}</span>` : "";
5343
+ const predictionChip = r.status === "failed" && r.analysis ? `<span class="badge ${r.analysis.label}">${r.analysis.label}</span>` : "";
5344
+ return `<details class="spec ${r.status}" data-status="${r.status}" data-case-id="${esc(id)}"${r.status === "failed" ? " open" : ""}>
5345
+ <summary>
5346
+ ${statusIcon(r.status)}
5347
+ <span class="spec-name">${esc(id)}</span>
5348
+ ${predictionChip}
5349
+ <span class="spacer"></span>
5350
+ ${counts}
5351
+ ${duration}
5352
+ </summary>
5353
+ <div class="spec-body">
5354
+ ${renderAssertions(r)}
5355
+ ${r.status === "failed" ? r.analysis ? renderAnalysis(r, index) : renderSkipped(r) : ""}
5356
+ ${renderDriftIssues(r)}
5357
+ ${collapsible("Failure log", r.failureLogExcerpt)}
5358
+ ${collapsible("Source diff (scoped)", r.diffExcerpt, "diff")}
5359
+ ${collapsible("spec.yaml", r.specYaml)}
5360
+ </div>
5361
+ </details>`;
5362
+ }
5363
+ function statusIcon(status) {
5364
+ if (status === "passed") return `<span class="status-icon pass" aria-label="passed">✓</span>`;
5365
+ if (status === "failed") return `<span class="status-icon fail" aria-label="failed">✕</span>`;
5366
+ return `<span class="status-icon skip" aria-label="skipped">◌</span>`;
5367
+ }
5368
+ function renderAssertions(r) {
5369
+ if (!r.assertions || r.assertions.length === 0) return "";
5370
+ return `<ul class="steps">${r.assertions.map((a) => {
5371
+ const dur = a.durationMs != null ? `<span class="duration">${formatDuration$1(a.durationMs)}</span>` : "";
5372
+ return `<li>${statusIcon(a.status)}<span class="step-name">${esc(a.name)}</span><span class="spacer"></span>${dur}</li>`;
5373
+ }).join("")}</ul>`;
5374
+ }
5375
+ function renderAnalysis(r, index) {
5376
+ const a = r.analysis;
5377
+ const pct = Math.round(a.confidence * 100);
5378
+ const evidence = a.evidence.length > 0 ? `<ul class="evidence">${a.evidence.map((e) => `<li>${e.file ? `<code>${esc(e.file)}</code> — ` : ""}${esc(e.detail)}</li>`).join("")}</ul>` : "";
5379
+ return `<div class="analysis">
5380
+ <div class="prediction">
5381
+ <span class="badge ${a.label}">${a.label}</span>
5382
+ <span class="confidence" title="confidence"><span class="confidence-bar"><span style="width:${pct}%"></span></span>${pct}%</span>
5383
+ ${a.subDiagnosis && a.subDiagnosis !== "NONE" ? `<span class="sub">${esc(a.subDiagnosis)}</span>` : ""}
5384
+ </div>
5385
+ <p class="reasoning">${esc(a.reasoning)}</p>
5386
+ ${evidence}
5387
+ <div class="truth">
5388
+ <span class="truth-title">True cause</span>
5389
+ ${FAILURE_LABELS.map((label) => `<label class="truth-option ${label}"><input type="radio" name="label--${index}" value="${label}"><span>${label}</span></label>`).join("\n ")}
5390
+ <input type="text" class="note" placeholder="note (optional)" data-case-index="${index}">
5391
+ </div>
5392
+ </div>`;
5393
+ }
5394
+ function renderSkipped(r) {
5395
+ return `<div class="analysis skipped">analysis skipped${r.analysisSkipped ? `: ${esc(r.analysisSkipped)}` : ""}</div>`;
5396
+ }
5397
+ function renderDriftIssues(r) {
5398
+ if (!r.driftIssues || r.driftIssues.length === 0) return "";
5399
+ const items = r.driftIssues.map((i) => `<li><span class="severity ${i.severity}">${i.severity}</span> (${esc(DRAFT_CATEGORY_LABEL[i.category])}${i.stepId ? `, step ${esc(i.stepId)}` : ""}) ${esc(i.message)}${i.detail ? ` — ${esc(i.detail)}` : ""}</li>`).join("");
5400
+ return `<details class="drift"><summary>Spec↔code drift audit (${r.driftIssues.length})</summary><ul>${items}</ul></details>`;
5401
+ }
5402
+ function collapsible(title, content, kind = "") {
5403
+ if (!content) return "";
5404
+ return `<details class="raw ${kind}"><summary>${esc(title)}</summary><pre>${esc(content)}</pre></details>`;
5405
+ }
5406
+ const ESC_MAP = {
5407
+ "&": "&amp;",
5408
+ "<": "&lt;",
5409
+ ">": "&gt;",
5410
+ "\"": "&quot;",
5411
+ "'": "&#39;"
5412
+ };
5413
+ function esc(s) {
5414
+ return s.replace(/[&<>"']/g, (c) => ESC_MAP[c]);
5415
+ }
5416
+ function formatDuration$1(ms) {
5417
+ if (ms < 1e3) return `${Math.round(ms)}ms`;
5418
+ if (ms < 6e4) return `${(ms / 1e3).toFixed(1)}s`;
5419
+ return `${Math.floor(ms / 6e4)}m ${Math.round(ms % 6e4 / 1e3)}s`;
5420
+ }
5421
+ function formatDate(iso) {
5422
+ return iso.replace("T", " ").replace(/\.\d+Z$/, " UTC");
5423
+ }
5424
+ const CSS = `
5425
+ :root {
5426
+ color-scheme: light dark;
5427
+ --bg: #f4f5f7;
5428
+ --surface: #ffffff;
5429
+ --surface-2: #f8f9fa;
5430
+ --border: #e1e4e8;
5431
+ --text: #1f2328;
5432
+ --text-dim: #656d76;
5433
+ --accent: #1f6feb;
5434
+ --pass: #1a7f37;
5435
+ --pass-bg: #dafbe1;
5436
+ --fail: #cf222e;
5437
+ --fail-bg: #ffebe9;
5438
+ --skip: #9a6700;
5439
+ --code-bg: #0d1117;
5440
+ --code-text: #e6edf3;
5441
+ --shadow: 0 1px 3px rgba(31, 35, 40, 0.06);
5442
+ }
5443
+ @media (prefers-color-scheme: dark) {
5444
+ :root {
5445
+ --bg: #0d1117;
5446
+ --surface: #161b22;
5447
+ --surface-2: #1c2129;
5448
+ --border: #30363d;
5449
+ --text: #e6edf3;
5450
+ --text-dim: #8b949e;
5451
+ --accent: #58a6ff;
5452
+ --pass: #3fb950;
5453
+ --pass-bg: rgba(63, 185, 80, 0.15);
5454
+ --fail: #f85149;
5455
+ --fail-bg: rgba(248, 81, 73, 0.15);
5456
+ --skip: #d29922;
5457
+ --code-bg: #010409;
5458
+ --code-text: #e6edf3;
5459
+ --shadow: none;
5460
+ }
5461
+ }
5462
+ * { box-sizing: border-box; }
5463
+ body {
5464
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Hiragino Sans", "Noto Sans JP", sans-serif;
5465
+ margin: 0; background: var(--bg); color: var(--text); font-size: 14px;
5466
+ }
5467
+ header {
5468
+ position: sticky; top: 0; z-index: 10;
5469
+ background: var(--surface); border-bottom: 1px solid var(--border);
5470
+ }
5471
+ .header-inner { max-width: 1080px; margin: 0 auto; padding: 14px 24px 10px; }
5472
+ .header-top { display: flex; align-items: baseline; gap: 18px; flex-wrap: wrap; }
5473
+ h1 { font-size: 17px; margin: 0; font-weight: 650; }
5474
+ h2 { font-size: 14px; margin: 0; font-weight: 650; }
5475
+ .meta { font-size: 12px; color: var(--text-dim); display: flex; gap: 14px; flex-wrap: wrap; }
5476
+ .meta code { background: var(--surface-2); border: 1px solid var(--border); padding: 0 5px; border-radius: 4px; font-size: 11px; }
5477
+ .dim { color: var(--text-dim); }
5478
+ .toolbar { display: flex; align-items: center; gap: 12px; margin-top: 10px; flex-wrap: wrap; }
5479
+ .chips { display: flex; gap: 6px; }
5480
+ .chip {
5481
+ font: inherit; font-size: 12.5px; font-weight: 600; cursor: pointer;
5482
+ padding: 3px 12px; border-radius: 999px; border: 1px solid var(--border);
5483
+ background: var(--surface); color: var(--text-dim);
5484
+ }
5485
+ .chip .count { opacity: 0.7; }
5486
+ .chip.active { background: var(--text); color: var(--surface); border-color: var(--text); }
5487
+ .chip-pass.active { background: var(--pass); border-color: var(--pass); color: #fff; }
5488
+ .chip-fail.active { background: var(--fail); border-color: var(--fail); color: #fff; }
5489
+ #search {
5490
+ font: inherit; font-size: 13px; flex: 1; min-width: 180px; max-width: 320px; margin-left: auto;
5491
+ padding: 5px 12px; border: 1px solid var(--border); border-radius: 6px;
5492
+ background: var(--surface-2); color: var(--text);
5493
+ }
5494
+ #search:focus { outline: 2px solid var(--accent); outline-offset: -1px; }
5495
+ .page { max-width: 1080px; margin: 16px auto; padding: 0 24px; }
5496
+ .panel {
5497
+ background: var(--surface); border: 1px solid var(--border); border-radius: 8px;
5498
+ padding: 14px 18px; margin-bottom: 16px; box-shadow: var(--shadow);
5499
+ }
5500
+ .panel-head { display: flex; align-items: center; justify-content: space-between; gap: 12px; flex-wrap: wrap; }
5501
+ .hint { font-size: 12px; color: var(--text-dim); margin: 6px 0 10px; }
5502
+ .spec {
5503
+ background: var(--surface); border: 1px solid var(--border); border-radius: 8px;
5504
+ margin-bottom: 8px; box-shadow: var(--shadow);
5505
+ }
5506
+ .spec > summary {
5507
+ display: flex; align-items: center; gap: 10px; padding: 10px 16px;
5508
+ cursor: pointer; list-style: none; user-select: none;
5509
+ }
5510
+ .spec > summary::-webkit-details-marker { display: none; }
5511
+ .spec > summary::before {
5512
+ content: "▸"; color: var(--text-dim); font-size: 11px;
5513
+ transition: transform 0.12s ease; flex: 0 0 auto;
5514
+ }
5515
+ .spec[open] > summary::before { transform: rotate(90deg); }
5516
+ .spec-name { font-weight: 600; font-size: 13.5px; }
5517
+ .spacer { flex: 1; }
5518
+ .counts { font-size: 12px; color: var(--text-dim); }
5519
+ .duration { font-size: 12px; color: var(--text-dim); font-variant-numeric: tabular-nums; }
5520
+ .status-icon { font-weight: 700; font-size: 13px; flex: 0 0 auto; }
5521
+ .status-icon.pass { color: var(--pass); }
5522
+ .status-icon.fail { color: var(--fail); }
5523
+ .status-icon.skip { color: var(--skip); }
5524
+ .spec-body { padding: 2px 16px 12px 36px; border-top: 1px solid var(--border); }
5525
+ .steps { list-style: none; margin: 10px 0; padding: 0; }
5526
+ .steps li {
5527
+ display: flex; align-items: center; gap: 8px; padding: 3px 8px;
5528
+ font-size: 13px; border-radius: 5px;
5529
+ }
5530
+ .steps li:hover { background: var(--surface-2); }
5531
+ .step-name { overflow-wrap: anywhere; }
5532
+ .analysis {
5533
+ border: 1px solid var(--border); border-left: 3px solid var(--accent);
5534
+ border-radius: 6px; background: var(--surface-2);
5535
+ padding: 10px 14px; margin: 10px 0;
5536
+ }
5537
+ .analysis.skipped { color: var(--text-dim); font-size: 13px; font-style: italic; border-left-color: var(--border); }
5538
+ .prediction { display: flex; align-items: center; gap: 12px; flex-wrap: wrap; }
5539
+ .badge {
5540
+ font-size: 11.5px; font-weight: 700; letter-spacing: 0.02em;
5541
+ padding: 2px 10px; border-radius: 4px; color: #fff; flex: 0 0 auto;
5542
+ }
5543
+ .badge.TEST_DRIFT { background: #b45309; }
5544
+ .badge.SPEC_CHANGE { background: #1d4ed8; }
5545
+ .badge.PRODUCT_BUG { background: #b91c1c; }
5546
+ .badge.UNKNOWN { background: #6b7280; }
5547
+ .confidence { display: inline-flex; align-items: center; gap: 7px; font-size: 12.5px; font-weight: 600; color: var(--text-dim); }
5548
+ .confidence-bar {
5549
+ display: inline-block; width: 64px; height: 6px; border-radius: 999px;
5550
+ background: var(--border); overflow: hidden;
5551
+ }
5552
+ .confidence-bar > span { display: block; height: 100%; background: var(--accent); border-radius: 999px; }
5553
+ .sub { font-size: 11px; background: var(--surface); border: 1px solid var(--border); color: var(--text-dim); padding: 1px 8px; border-radius: 999px; }
5554
+ .reasoning { font-size: 13px; margin: 9px 0; white-space: pre-wrap; line-height: 1.55; }
5555
+ .evidence { font-size: 12.5px; color: var(--text-dim); margin: 6px 0; padding-left: 18px; line-height: 1.5; }
5556
+ .evidence code { background: var(--surface); border: 1px solid var(--border); padding: 0 5px; border-radius: 4px; font-size: 11px; }
5557
+ .truth {
5558
+ display: flex; align-items: center; gap: 10px; flex-wrap: wrap;
5559
+ background: var(--surface); border: 1px dashed var(--border); border-radius: 6px;
5560
+ padding: 8px 12px; margin-top: 10px; font-size: 12.5px;
5561
+ }
5562
+ .truth-title { font-weight: 650; color: var(--text-dim); }
5563
+ .truth-option {
5564
+ display: inline-flex; align-items: center; gap: 5px; cursor: pointer;
5565
+ border: 1px solid var(--border); border-radius: 999px; padding: 2px 10px;
5566
+ }
5567
+ .truth-option:has(input:checked) { border-color: var(--accent); background: var(--surface-2); font-weight: 650; }
5568
+ .note { flex: 1; min-width: 150px; font: inherit; font-size: 12px; padding: 4px 9px; border: 1px solid var(--border); border-radius: 5px; background: var(--surface-2); color: var(--text); }
5569
+ details.raw, details.drift { margin: 7px 0; font-size: 13px; }
5570
+ details.raw summary, details.drift summary { cursor: pointer; color: var(--text-dim); }
5571
+ details.raw pre {
5572
+ background: var(--code-bg); color: var(--code-text);
5573
+ font-size: 11.5px; line-height: 1.5; padding: 12px 14px; border-radius: 6px;
5574
+ overflow-x: auto; white-space: pre-wrap; word-break: break-word; margin: 6px 0;
5575
+ }
5576
+ .severity { font-size: 10.5px; font-weight: 700; padding: 0 6px; border-radius: 4px; margin-right: 4px; }
5577
+ .severity.ERROR { background: var(--fail-bg); color: var(--fail); }
5578
+ .severity.WARN { background: rgba(212, 167, 44, 0.18); color: var(--skip); }
5579
+ .severity.OK { background: var(--pass-bg); color: var(--pass); }
5580
+ .drift ul { padding-left: 18px; font-size: 12.5px; line-height: 1.55; }
5581
+ table.matrix { border-collapse: collapse; font-size: 12.5px; margin: 10px 16px 10px 0; display: inline-table; vertical-align: top; }
5582
+ table.matrix th, table.matrix td { border: 1px solid var(--border); padding: 4px 12px; text-align: center; }
5583
+ table.matrix th { background: var(--surface-2); font-weight: 600; }
5584
+ table.matrix td { font-variant-numeric: tabular-nums; }
5585
+ table.matrix td.hit { background: var(--pass-bg); font-weight: 700; }
5586
+ table.matrix td.miss-nonzero { background: var(--fail-bg); }
5587
+ .stats { font-size: 13px; }
5588
+ .stats .big { font-size: 17px; font-weight: 700; }
5589
+ .measure-actions { display: flex; gap: 14px; align-items: center; font-size: 12.5px; }
5590
+ .measure-actions button {
5591
+ font: inherit; font-size: 12.5px; padding: 4px 13px; cursor: pointer;
5592
+ border: 1px solid var(--border); border-radius: 6px; background: var(--surface); color: var(--text);
5593
+ }
5594
+ .measure-actions button:hover { background: var(--surface-2); }
5595
+ .import-label { cursor: pointer; color: var(--text-dim); }
5596
+ .import-label input { display: none; }
5597
+ .empty-note { color: var(--text-dim); text-align: center; font-size: 13px; }
5598
+ `;
5599
+ const CLIENT_JS = `
5600
+ (function () {
5601
+ var dataEl = document.getElementById('ccqa-report-data');
5602
+ if (!dataEl) return;
5603
+ var data = JSON.parse(dataEl.textContent);
5604
+ var LABELS = ${JSON.stringify(FAILURE_LABELS)};
5605
+ var PRED_LABELS = LABELS.concat(['UNKNOWN']);
5606
+ var storageKey = 'ccqa-report:' + (data.runId || data.createdAt);
5607
+
5608
+ // ---- filtering ------------------------------------------------------
5609
+ var activeFilter = 'all';
5610
+ var searchQuery = '';
5611
+
5612
+ function applyFilters() {
5613
+ var sections = document.querySelectorAll('.spec');
5614
+ var visible = 0;
5615
+ sections.forEach(function (el) {
5616
+ var statusOk = activeFilter === 'all' || el.getAttribute('data-status') === activeFilter;
5617
+ var name = (el.getAttribute('data-case-id') || '').toLowerCase();
5618
+ var searchOk = !searchQuery || name.indexOf(searchQuery) >= 0;
5619
+ var show = statusOk && searchOk;
5620
+ el.style.display = show ? '' : 'none';
5621
+ if (show) visible++;
5622
+ });
5623
+ var note = document.getElementById('no-match');
5624
+ if (note) note.hidden = visible > 0;
5625
+ }
5626
+
5627
+ var chips = document.querySelectorAll('#filter-chips .chip');
5628
+ chips.forEach(function (chip) {
5629
+ chip.addEventListener('click', function () {
5630
+ activeFilter = chip.getAttribute('data-filter') || 'all';
5631
+ chips.forEach(function (c) { c.classList.toggle('active', c === chip); });
5632
+ applyFilters();
5633
+ });
5634
+ });
5635
+
5636
+ var search = document.getElementById('search');
5637
+ if (search) {
5638
+ search.addEventListener('input', function () {
5639
+ searchQuery = search.value.trim().toLowerCase();
5640
+ applyFilters();
5641
+ });
5642
+ }
5643
+
5644
+ // ---- measurement ----------------------------------------------------
5645
+ // cases: analyzed failures only — they carry a prediction we can grade.
5646
+ var cases = [];
5647
+ for (var i = 0; i < data.results.length; i++) {
5648
+ var r = data.results[i];
5649
+ if (r.status === 'failed' && r.analysis) {
5650
+ cases.push({ index: i, feature: r.feature, spec: r.spec, predicted: r.analysis.label });
5651
+ }
5652
+ }
5653
+
5654
+ var state = {};
5655
+ try { state = JSON.parse(localStorage.getItem(storageKey) || '{}'); } catch (e) { state = {}; }
5656
+
5657
+ function save() {
5658
+ try { localStorage.setItem(storageKey, JSON.stringify(state)); } catch (e) {}
5659
+ }
5660
+
5661
+ function caseKey(c) { return c.feature + '/' + c.spec; }
5662
+
5663
+ function applyStateToInputs() {
5664
+ cases.forEach(function (c) {
5665
+ var entry = state[caseKey(c)];
5666
+ if (!entry) return;
5667
+ // Guard: only known labels may flow into the attribute selector below
5668
+ // (localStorage is user-controlled; anything else is dropped).
5669
+ if (entry.label && LABELS.indexOf(entry.label) >= 0) {
5670
+ var radio = document.querySelector('input[name="label--' + c.index + '"][value="' + entry.label + '"]');
5671
+ if (radio) radio.checked = true;
5672
+ }
5673
+ var note = document.querySelector('.note[data-case-index="' + c.index + '"]');
5674
+ if (note && entry.note) note.value = entry.note;
5675
+ });
5676
+ }
5677
+
5678
+ function renderMetrics() {
5679
+ var target = document.getElementById('metrics');
5680
+ if (!target) return;
5681
+
5682
+ var m = {};
5683
+ PRED_LABELS.forEach(function (p) {
5684
+ m[p] = {};
5685
+ LABELS.forEach(function (a) { m[p][a] = 0; });
5686
+ });
5687
+
5688
+ var labeled = 0;
5689
+ var correct = 0;
5690
+ cases.forEach(function (c) {
5691
+ var entry = state[caseKey(c)];
5692
+ if (!entry || !entry.label || LABELS.indexOf(entry.label) < 0) return;
5693
+ labeled++;
5694
+ m[c.predicted][entry.label]++;
5695
+ if (c.predicted === entry.label) correct++;
5696
+ });
5697
+
5698
+ var html = '';
5699
+ html += '<div class="stats"><span class="big">' +
5700
+ (labeled === 0 ? '–' : Math.round((correct / labeled) * 100) + '%') +
5701
+ '</span> accuracy · ' + labeled + ' labeled / ' + cases.length + ' analyzed failures' +
5702
+ (cases.length - labeled > 0 ? ' · <strong>' + (cases.length - labeled) + ' unlabeled</strong>' : '') +
5703
+ '</div>';
5704
+
5705
+ html += '<table class="matrix"><thead><tr><th>predicted \\\\ actual</th>';
5706
+ LABELS.forEach(function (a) { html += '<th>' + a + '</th>'; });
5707
+ html += '</tr></thead><tbody>';
5708
+ PRED_LABELS.forEach(function (p) {
5709
+ html += '<tr><th>' + p + '</th>';
5710
+ LABELS.forEach(function (a) {
5711
+ var v = m[p][a];
5712
+ var cls = p === a ? 'hit' : (v > 0 ? 'miss-nonzero' : '');
5713
+ html += '<td class="' + cls + '">' + v + '</td>';
5714
+ });
5715
+ html += '</tr>';
5716
+ });
5717
+ html += '</tbody></table>';
5718
+
5719
+ html += '<table class="matrix"><thead><tr><th>class</th><th>precision</th><th>recall</th><th>F1</th><th>support</th></tr></thead><tbody>';
5720
+ LABELS.forEach(function (cls) {
5721
+ var tp = m[cls][cls];
5722
+ var predictedAs = 0;
5723
+ LABELS.forEach(function (a) { predictedAs += m[cls][a]; });
5724
+ var actualAs = 0;
5725
+ PRED_LABELS.forEach(function (p) { actualAs += m[p][cls]; });
5726
+ var precision = predictedAs > 0 ? tp / predictedAs : null;
5727
+ var recall = actualAs > 0 ? tp / actualAs : null;
5728
+ var f1 = precision !== null && recall !== null && precision + recall > 0
5729
+ ? (2 * precision * recall) / (precision + recall) : null;
5730
+ html += '<tr><th>' + cls + '</th><td>' + fmt(precision) + '</td><td>' + fmt(recall) +
5731
+ '</td><td>' + fmt(f1) + '</td><td>' + actualAs + '</td></tr>';
5732
+ });
5733
+ html += '</tbody></table>';
5734
+
5735
+ target.innerHTML = html;
5736
+ }
5737
+
5738
+ function fmt(v) { return v === null ? '–' : (Math.round(v * 100) / 100).toFixed(2); }
5739
+
5740
+ function findCaseByIndex(index) {
5741
+ for (var i = 0; i < cases.length; i++) {
5742
+ if (cases[i].index === index) return cases[i];
5743
+ }
5744
+ return null;
5745
+ }
5746
+
5747
+ document.addEventListener('change', function (e) {
5748
+ var t = e.target;
5749
+ if (t && t.name && t.name.indexOf('label--') === 0) {
5750
+ var index = parseInt(t.name.slice('label--'.length), 10);
5751
+ var c = findCaseByIndex(index);
5752
+ if (!c) return;
5753
+ var key = caseKey(c);
5754
+ state[key] = state[key] || {};
5755
+ state[key].label = t.value;
5756
+ save();
5757
+ renderMetrics();
5758
+ }
5759
+ });
5760
+
5761
+ document.addEventListener('input', function (e) {
5762
+ var t = e.target;
5763
+ if (t && t.classList && t.classList.contains('note')) {
5764
+ var index = parseInt(t.getAttribute('data-case-index'), 10);
5765
+ var c = findCaseByIndex(index);
5766
+ if (!c) return;
5767
+ var key = caseKey(c);
5768
+ state[key] = state[key] || {};
5769
+ state[key].note = t.value;
5770
+ save();
5771
+ }
5772
+ });
5773
+
5774
+ var exportBtn = document.getElementById('export-labels');
5775
+ if (exportBtn) {
5776
+ exportBtn.addEventListener('click', function () {
5777
+ var labels = [];
5778
+ cases.forEach(function (c) {
5779
+ var entry = state[caseKey(c)];
5780
+ if (!entry || !entry.label) return;
5781
+ var item = { feature: c.feature, spec: c.spec, predicted: c.predicted, label: entry.label };
5782
+ if (entry.note) item.note = entry.note;
5783
+ labels.push(item);
5784
+ });
5785
+ var payload = {
5786
+ schemaVersion: 1,
5787
+ runId: data.runId,
5788
+ promptVersion: data.promptVersion,
5789
+ exportedAt: new Date().toISOString(),
5790
+ labels: labels
5791
+ };
5792
+ var blob = new Blob([JSON.stringify(payload, null, 2)], { type: 'application/json' });
5793
+ var a = document.createElement('a');
5794
+ a.href = URL.createObjectURL(blob);
5795
+ a.download = 'ccqa-labels-' + (data.runId || data.createdAt).replace(/[^A-Za-z0-9_-]/g, '_') + '.json';
5796
+ a.click();
5797
+ URL.revokeObjectURL(a.href);
5798
+ });
5799
+ }
5800
+
5801
+ var importInput = document.getElementById('import-labels');
5802
+ if (importInput) {
5803
+ importInput.addEventListener('change', function () {
5804
+ var file = importInput.files && importInput.files[0];
5805
+ if (!file) return;
5806
+ var reader = new FileReader();
5807
+ reader.onload = function () {
5808
+ try {
5809
+ var payload = JSON.parse(String(reader.result));
5810
+ (payload.labels || []).forEach(function (item) {
5811
+ var key = item.feature + '/' + item.spec;
5812
+ state[key] = state[key] || {};
5813
+ if (item.label) state[key].label = item.label;
5814
+ if (item.note) state[key].note = item.note;
5815
+ });
5816
+ save();
5817
+ applyStateToInputs();
5818
+ renderMetrics();
5819
+ } catch (e) {
5820
+ alert('Could not parse labels JSON: ' + e.message);
5821
+ }
5822
+ };
5823
+ reader.readAsText(file);
5824
+ });
5825
+ }
5826
+
5827
+ applyStateToInputs();
5828
+ renderMetrics();
5829
+ })();
5830
+ `;
4626
5831
  //#endregion
4627
5832
  //#region src/cli/run.ts
4628
5833
  const USER_VITEST_CONFIG = resolve(".ccqa/vitest.config.ts");
5834
+ const DEFAULT_REPORT_DIR = "ccqa-report";
4629
5835
  async function resolveVitestConfig() {
4630
5836
  try {
4631
5837
  await access(USER_VITEST_CONFIG);
@@ -4634,7 +5840,7 @@ async function resolveVitestConfig() {
4634
5840
  return bundledVitestConfigPath();
4635
5841
  }
4636
5842
  }
4637
- const runCommand = new Command("run").argument("[target]", "Spec to run: '<feature>/<spec>', '<feature>', or omit for all").description("Run generated agent-browser test scripts. Pass --drift to invoke a Claude-driven drift analysis on each failing spec (skipped silently when no test fails). Requires ANTHROPIC_API_KEY or a local Claude login.").option("--drift", "On vitest failure, run drift analysis on the failing specs").option("--drift-strict", "Treat drift ERROR findings as a run failure (exit 1 even if vitest passed). Implies --drift.").option("--format <fmt>", "Output format for the drift block: text | json | github", "text").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Used by --drift only. Overrides CCQA_MODEL.").action(async (target, opts) => {
5843
+ const runCommand = addLanguageOption(new Command("run").argument("[target]", "Spec to run: '<feature>/<spec>', '<feature>', or omit for all").description("Run generated agent-browser test scripts. Pass --drift-report to also write a self-contained HTML run report: each failing spec gets a drift audit plus a root-cause call (TEST_DRIFT / SPEC_CHANGE / PRODUCT_BUG), and the report lets a human grade the calls to measure their accuracy. Requires ANTHROPIC_API_KEY or a local Claude login for the analysis part.").option("--drift-report [dir]", `Write an HTML run report with drift analysis of failures (default dir: ${DEFAULT_REPORT_DIR}/)`).option("--drift-base <ref>", "Base ref the source diff is taken against for failure analysis (default: GITHUB_BASE_REF, then origin/main)").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Used by --drift-report only. Overrides CCQA_MODEL.")).action(async (target, opts) => {
4638
5844
  await runTests(target, opts);
4639
5845
  });
4640
5846
  async function runTests(target, opts) {
@@ -4649,6 +5855,7 @@ async function runTests(target, opts) {
4649
5855
  const summaries = [];
4650
5856
  let overallExitCode = 0;
4651
5857
  const vitestConfig = await resolveVitestConfig();
5858
+ const captureOutput = Boolean(opts.driftReport);
4652
5859
  try {
4653
5860
  for (let i = 0; i < specs.length; i++) {
4654
5861
  const { featureName, specName } = specs[i];
@@ -4669,7 +5876,8 @@ async function runTests(target, opts) {
4669
5876
  "--reporter=json",
4670
5877
  `--outputFile.json=${reportFile}`
4671
5878
  ]);
4672
- await Promise.all([streamFiltered(proc.stdout, process.stdout), streamFiltered(proc.stderr, process.stderr)]);
5879
+ const tail = captureOutput ? new TailBuffer(OUTPUT_TAIL_CAP) : null;
5880
+ await Promise.all([streamFiltered(proc.stdout, process.stdout, tail), streamFiltered(proc.stderr, process.stderr, tail)]);
4673
5881
  const exitCode = await proc.exited;
4674
5882
  if (exitCode !== 0) overallExitCode = exitCode;
4675
5883
  const report = await readReport(reportFile);
@@ -4678,12 +5886,13 @@ async function runTests(target, opts) {
4678
5886
  specName,
4679
5887
  scriptFile,
4680
5888
  report,
4681
- exitCode
5889
+ exitCode,
5890
+ outputTail: tail ? tail.toString() : null
4682
5891
  });
4683
5892
  blank();
4684
5893
  }
4685
5894
  printSummary(summaries);
4686
- overallExitCode = await maybeRunDrift(summaries, opts, overallExitCode);
5895
+ await maybeWriteDriftReport(summaries, opts);
4687
5896
  } finally {
4688
5897
  await rm(tmpDir, {
4689
5898
  recursive: true,
@@ -4696,73 +5905,208 @@ function failedSpec(s) {
4696
5905
  if (s.exitCode !== 0) return true;
4697
5906
  return (s.report?.numFailedTests ?? 0) > 0;
4698
5907
  }
4699
- function parseDriftFormat(raw) {
4700
- const v = raw ?? "text";
4701
- if (v === "text" || v === "json" || v === "github") return v;
4702
- error(`invalid --format: ${v} (expected text|json|github)`);
4703
- process.exit(2);
4704
- }
4705
5908
  /**
4706
- * Choose which specs to drift-check. `--drift` is a fail-supplement: only the
4707
- * specs that failed get a drift analysis (the goal is to *explain* a vitest
4708
- * failure). `--drift-strict` is an audit: even passing specs are checked,
4709
- * because the CI need is "fail loud if the spec lags behind the source",
4710
- * which can absolutely happen while vitest is still green against a stale
4711
- * staging environment.
5909
+ * Opt-in post-vitest report hook. With `--drift-report`, a self-contained
5910
+ * HTML report is ALWAYS written (a green run is still a useful run summary);
5911
+ * failing specs additionally get a spec↔code drift audit and a three-way
5912
+ * root-cause call with the PR diff as context. The hook never changes the
5913
+ * exit code the run's outcome is determined by vitest alone — and when
5914
+ * Claude auth is unavailable only the analysis is skipped, not the report.
4712
5915
  */
4713
- function selectDriftTargets(summaries, opts) {
4714
- if (opts.driftStrict) return summaries;
4715
- if (opts.drift) return summaries.filter(failedSpec);
4716
- return [];
4717
- }
4718
- /**
4719
- * Opt-in post-vitest drift hook. With `--drift`, fires only when at least
4720
- * one spec failed (supplemental signal). With `--drift-strict`, fires
4721
- * unconditionally so a spec/source divergence is caught even when vitest
4722
- * passed. Skips silently when auth is unavailable so the run's exit code
4723
- * is determined by vitest alone.
4724
- */
4725
- async function maybeRunDrift(summaries, opts, currentExitCode) {
4726
- const candidates = selectDriftTargets(summaries, opts);
4727
- if (candidates.length === 0) return currentExitCode;
5916
+ async function maybeWriteDriftReport(summaries, opts) {
5917
+ if (!opts.driftReport) return;
5918
+ const outDir = typeof opts.driftReport === "string" ? opts.driftReport : DEFAULT_REPORT_DIR;
5919
+ const cwd = process.cwd();
4728
5920
  const auth = driftAuthAvailable();
4729
- if (!auth.ok) {
4730
- info(`drift analysis skipped (${auth.reason})`);
4731
- return currentExitCode;
5921
+ const failed = summaries.filter(failedSpec);
5922
+ if (!auth.ok && failed.length > 0) info(`failure analysis skipped (${auth.reason})`);
5923
+ const baseRef = resolveBaseRef(opts.driftBase);
5924
+ let diff = {
5925
+ ok: false,
5926
+ error: "diff not captured (no failures)"
5927
+ };
5928
+ if (failed.length > 0) {
5929
+ diff = await capturePrDiff(baseRef, cwd);
5930
+ if (!diff.ok) info(`drift-report: source diff unavailable (${diff.error}) — analyzing without diff context`);
5931
+ }
5932
+ const tree = failed.length > 0 ? await listFeatureTree(cwd) : [];
5933
+ const specInfoByKey = new Map(tree.flatMap((f) => f.specs.map((sp) => [`${f.featureName}/${sp.specName}`, sp])));
5934
+ const findSpecInfo = (s) => specInfoByKey.get(`${s.featureName}/${s.specName}`) ?? null;
5935
+ let driftResults = [];
5936
+ if (auth.ok && failed.length > 0) {
5937
+ const targets = failed.map((s) => {
5938
+ const spec = findSpecInfo(s);
5939
+ if (!spec) return null;
5940
+ const t = {
5941
+ featureName: s.featureName,
5942
+ specName: s.specName
5943
+ };
5944
+ if (spec.relatedPaths) t.relatedPaths = spec.relatedPaths;
5945
+ if (spec.includedBlocks) t.includedBlocks = spec.includedBlocks;
5946
+ return t;
5947
+ }).filter((t) => t !== null);
5948
+ if (targets.length > 0) driftResults = await analyzeDrift({
5949
+ targets,
5950
+ cwd,
5951
+ blocks: await loadAvailableBlocks(cwd),
5952
+ concurrency: Math.min(3, targets.length),
5953
+ ...opts.model ? { model: opts.model } : {},
5954
+ ...opts.language ? { language: opts.language } : {},
5955
+ onSpecStart: (t) => info(`drift audit: ${t.featureName}/${t.specName}`)
5956
+ });
4732
5957
  }
4733
- const format = parseDriftFormat(opts.format);
4734
- const cwd = process.cwd();
4735
- const tree = await listFeatureTree(cwd);
4736
- const targets = candidates.map((s) => {
4737
- const spec = tree.find((f) => f.featureName === s.featureName)?.specs.find((sp) => sp.specName === s.specName);
4738
- if (!spec) return null;
4739
- const t = {
4740
- featureName: s.featureName,
4741
- specName: s.specName
5958
+ const patchSections = diff.ok && diff.diff.patch.length > 0 ? splitPatchByFile(diff.diff.patch) : null;
5959
+ let printedHeader = false;
5960
+ const results = [];
5961
+ for (const s of summaries) {
5962
+ const assertions = collectAssertions(s);
5963
+ const base = {
5964
+ feature: s.featureName,
5965
+ spec: s.specName,
5966
+ testCounts: s.report ? {
5967
+ total: s.report.numTotalTests,
5968
+ passed: s.report.numPassedTests,
5969
+ failed: s.report.numFailedTests
5970
+ } : null,
5971
+ durationMs: assertions ? assertions.reduce((sum, a) => sum + (a.durationMs ?? 0), 0) : null,
5972
+ assertions
4742
5973
  };
4743
- if (spec.relatedPaths) t.relatedPaths = spec.relatedPaths;
4744
- if (spec.includedBlocks) t.includedBlocks = spec.includedBlocks;
4745
- return t;
4746
- }).filter((t) => t !== null);
4747
- if (targets.length === 0) {
4748
- info("drift analysis skipped (no spec.yaml found for failing specs)");
4749
- return currentExitCode;
4750
- }
4751
- const results = await analyzeDrift({
4752
- targets,
4753
- cwd,
4754
- blocks: await loadAvailableBlocks(cwd),
4755
- concurrency: Math.min(3, targets.length),
4756
- ...opts.model ? { model: opts.model } : {},
4757
- onSpecStart: (t) => {
4758
- if (format === "text") info(`drift: checking ${t.featureName}/${t.specName}`);
5974
+ if (!failedSpec(s)) {
5975
+ results.push({
5976
+ ...base,
5977
+ status: "passed",
5978
+ analysis: null,
5979
+ analysisSkipped: null,
5980
+ driftIssues: null,
5981
+ failureLogExcerpt: null,
5982
+ diffExcerpt: null,
5983
+ specYaml: null
5984
+ });
5985
+ continue;
5986
+ }
5987
+ const specYaml = await tryReadSpecFile(s.featureName, s.specName, cwd);
5988
+ const relatedPaths = findSpecInfo(s)?.relatedPaths ?? null;
5989
+ const diffExcerpt = patchSections ? scopePatchForSpec(patchSections, relatedPaths) : null;
5990
+ const driftResult = driftResults.find((r) => r.target.featureName === s.featureName && r.target.specName === s.specName);
5991
+ const driftIssues = driftResult?.ok ? driftResult.issues : null;
5992
+ const failureLog = buildFailureLog(s);
5993
+ let analysis = null;
5994
+ let analysisSkipped = null;
5995
+ if (!auth.ok) analysisSkipped = auth.reason;
5996
+ else if (specYaml === null) analysisSkipped = "no spec.yaml found for this spec";
5997
+ else {
5998
+ const script = await readScriptSafe(s.scriptFile);
5999
+ info(`failure analysis: ${s.featureName}/${s.specName}`);
6000
+ const outcome = await analyzeFailure({
6001
+ script,
6002
+ specYaml,
6003
+ failureLog,
6004
+ diffPatch: diffExcerpt,
6005
+ changedFiles: diff.ok ? diff.diff.nameStatus : null,
6006
+ baseRef: diff.ok ? baseRef : null,
6007
+ driftIssues,
6008
+ ...opts.language ? { outputLanguage: opts.language } : {}
6009
+ }, {
6010
+ ...opts.model ? { model: opts.model } : {},
6011
+ cwd
6012
+ });
6013
+ analysis = outcome.analysis;
6014
+ if (!printedHeader) {
6015
+ process.stdout.write(`\n${C.cyan}${C.bold}──────── failure analysis ────────${C.reset}\n`);
6016
+ printedHeader = true;
6017
+ }
6018
+ const pct = Math.round(outcome.analysis.confidence * 100);
6019
+ const firstLine = outcome.analysis.reasoning.split("\n")[0] ?? "";
6020
+ process.stdout.write(`${C.red}✖${C.reset} ${C.bold}${s.featureName}/${s.specName}${C.reset} → ${C.bold}${outcome.analysis.label}${C.reset} (${pct}%)${firstLine ? ` ${C.dim}${firstLine}${C.reset}` : ""}\n`);
4759
6021
  }
6022
+ results.push({
6023
+ ...base,
6024
+ status: "failed",
6025
+ analysis,
6026
+ analysisSkipped,
6027
+ driftIssues,
6028
+ failureLogExcerpt: failureLog.length > 0 ? failureLog : null,
6029
+ diffExcerpt,
6030
+ specYaml
6031
+ });
6032
+ }
6033
+ const data = {
6034
+ schemaVersion: 1,
6035
+ createdAt: (/* @__PURE__ */ new Date()).toISOString(),
6036
+ runId: process.env["GITHUB_RUN_ID"] ?? null,
6037
+ git: {
6038
+ head: diff.ok ? diff.diff.head : null,
6039
+ base: diff.ok ? baseRef : null
6040
+ },
6041
+ model: opts.model ?? null,
6042
+ promptVersion: "2",
6043
+ results
6044
+ };
6045
+ const reportPath = join(outDir, "index.html");
6046
+ await mkdir(outDir, { recursive: true });
6047
+ await writeFile(reportPath, renderRunReport(data), "utf8");
6048
+ info(`run report written to ${reportPath}`);
6049
+ }
6050
+ function collectAssertions(s) {
6051
+ if (!s.report) return null;
6052
+ const out = [];
6053
+ for (const file of s.report.testResults) for (const a of file.assertionResults) out.push({
6054
+ name: a.fullName,
6055
+ status: a.status === "passed" || a.status === "failed" ? a.status : "skipped",
6056
+ durationMs: a.duration ?? null
4760
6057
  });
4761
- if (format === "text") process.stdout.write(`\n${C.cyan}${C.bold}──────── drift analysis ────────${C.reset}\n`);
4762
- process.stdout.write(renderDrift(results, format, cwd));
4763
- if (opts.driftStrict && determineExitCode(results, "error") !== 0) return currentExitCode || 1;
4764
- return currentExitCode;
6058
+ return out;
6059
+ }
6060
+ /**
6061
+ * Compose the failure log fed to the analysis prompt and embedded in the
6062
+ * report. With `--reporter=json` vitest writes (almost) nothing to
6063
+ * stdout/stderr — the assertion failures live in the JSON report — so the
6064
+ * structured failureMessages come first and the raw output tail (console
6065
+ * logs, agent-browser noise) is appended as secondary context.
6066
+ */
6067
+ function buildFailureLog(s) {
6068
+ const parts = [];
6069
+ if (s.report) for (const file of s.report.testResults) for (const a of file.assertionResults) {
6070
+ if (a.status !== "failed") continue;
6071
+ parts.push(`✖ ${a.fullName}`);
6072
+ for (const m of a.failureMessages ?? []) parts.push(m);
6073
+ }
6074
+ const tail = s.outputTail?.trim();
6075
+ if (tail) {
6076
+ parts.push("--- vitest output (tail) ---");
6077
+ parts.push(tail);
6078
+ }
6079
+ return parts.join("\n");
6080
+ }
6081
+ async function readScriptSafe(path) {
6082
+ try {
6083
+ return await readFile(path, "utf8");
6084
+ } catch {
6085
+ return "";
6086
+ }
4765
6087
  }
6088
+ /** Cap on the per-spec output tail kept for the report / analysis prompt. */
6089
+ const OUTPUT_TAIL_CAP = 64 * 1024;
6090
+ /**
6091
+ * Keeps the LAST `cap` characters appended. Vitest puts the failure summary
6092
+ * at the end of its output, so the tail is the part worth keeping when a
6093
+ * noisy spec overflows the cap.
6094
+ */
6095
+ var TailBuffer = class {
6096
+ buf = "";
6097
+ cap;
6098
+ constructor(cap) {
6099
+ this.cap = cap;
6100
+ }
6101
+ append(s) {
6102
+ this.buf += s;
6103
+ if (this.buf.length > this.cap * 2) this.buf = this.buf.slice(-this.cap);
6104
+ }
6105
+ toString() {
6106
+ if (this.buf.length <= this.cap) return this.buf;
6107
+ return `[...output truncated...]\n${this.buf.slice(-this.cap)}`;
6108
+ }
6109
+ };
4766
6110
  async function readReport(path) {
4767
6111
  try {
4768
6112
  const raw = await readFile(path, "utf8");
@@ -4834,7 +6178,7 @@ function formatDuration(ms) {
4834
6178
  return `${(ms / 1e3).toFixed(2)}s`;
4835
6179
  }
4836
6180
  const NOISE_LINE_PATTERNS = [/^JSON report written to /];
4837
- async function streamFiltered(source, sink) {
6181
+ async function streamFiltered(source, sink, capture) {
4838
6182
  source.setEncoding("utf8");
4839
6183
  let buffer = "";
4840
6184
  for await (const chunk of source) {
@@ -4843,11 +6187,17 @@ async function streamFiltered(source, sink) {
4843
6187
  while (nl !== -1) {
4844
6188
  const line = buffer.slice(0, nl);
4845
6189
  buffer = buffer.slice(nl + 1);
4846
- if (!NOISE_LINE_PATTERNS.some((p) => p.test(line))) sink.write(line + "\n");
6190
+ if (!NOISE_LINE_PATTERNS.some((p) => p.test(line))) {
6191
+ sink.write(line + "\n");
6192
+ capture?.append(line + "\n");
6193
+ }
4847
6194
  nl = buffer.indexOf("\n");
4848
6195
  }
4849
6196
  }
4850
- if (buffer.length > 0 && !NOISE_LINE_PATTERNS.some((p) => p.test(buffer))) sink.write(buffer);
6197
+ if (buffer.length > 0 && !NOISE_LINE_PATTERNS.some((p) => p.test(buffer))) {
6198
+ sink.write(buffer);
6199
+ capture?.append(buffer);
6200
+ }
4851
6201
  }
4852
6202
  async function resolveSpecs(target) {
4853
6203
  if (!target) return listAllSpecs();
@@ -4866,7 +6216,7 @@ async function resolveSpecs(target) {
4866
6216
  //#endregion
4867
6217
  //#region src/cli/draft.ts
4868
6218
  const CATEGORY_LABEL = DRAFT_CATEGORY_LABEL;
4869
- const draftCommand = new Command("draft").argument("[feature/spec]", "Optional spec path (e.g. tasks/create-and-complete). If omitted, Claude proposes one from your intent.").description("Interactively draft and refine a spec.yaml with Claude Code").option("--instruction <text>", "Non-interactive single-shot instruction (skips the interactive loop)").option("--apply", "Auto-apply each generated patch without [y/N] confirmation", false).action(async (specPath, opts) => {
6219
+ const draftCommand = addLanguageOption(new Command("draft").argument("[feature/spec]", "Optional spec path (e.g. tasks/create-and-complete). If omitted, Claude proposes one from your intent.").description("Interactively draft and refine a spec.yaml with Claude Code").option("--instruction <text>", "Non-interactive single-shot instruction (skips the interactive loop)").option("--apply", "Auto-apply each generated patch without [y/N] confirmation", false)).action(async (specPath, opts) => {
4870
6220
  await ensureCcqaDir();
4871
6221
  let featureName;
4872
6222
  let specName;
@@ -4882,6 +6232,7 @@ const draftCommand = new Command("draft").argument("[feature/spec]", "Optional s
4882
6232
  });
4883
6233
  async function runDraft(featureName, specName, opts, prefilledIntent) {
4884
6234
  header("draft", `${featureName}/${specName}`);
6235
+ const ja = useJapanesePrompts(opts.language);
4885
6236
  const oneShot = opts.instruction !== void 0;
4886
6237
  let useIntentOnce = prefilledIntent !== null && !oneShot;
4887
6238
  while (true) {
@@ -4892,7 +6243,7 @@ async function runDraft(featureName, specName, opts, prefilledIntent) {
4892
6243
  else if (useIntentOnce && isFirstRun) {
4893
6244
  userInput = prefilledIntent ?? "";
4894
6245
  useIntentOnce = false;
4895
- } else userInput = await prompt(isFirstRun ? "What do you want to test? > " : "How would you like to refine? (empty = re-validate) > ");
6246
+ } else userInput = await prompt(isFirstRun ? ja ? "何をテストしたいですか? > " : "What do you want to test? > " : ja ? "どのように修正しますか? (空欄で再検証) > " : "How would you like to refine? (empty = re-validate) > ");
4896
6247
  if (isFirstRun && !userInput.trim()) {
4897
6248
  error("intent required for the first draft (no spec exists yet)");
4898
6249
  process.exit(1);
@@ -4902,11 +6253,12 @@ async function runDraft(featureName, specName, opts, prefilledIntent) {
4902
6253
  specName,
4903
6254
  existing,
4904
6255
  userInput: userInput.trim(),
4905
- autoApply: opts.apply === true
6256
+ autoApply: opts.apply === true,
6257
+ language: opts.language
4906
6258
  });
4907
6259
  if (oneShot) process.exit(turnResult.hasError && !turnResult.applied ? 1 : 0);
4908
6260
  blank();
4909
- if (/^y/i.test(await prompt("Are you done with this draft? [y/N] "))) {
6261
+ if (/^y/i.test(await prompt(ja ? "このドラフトは完了ですか? [y/N] " : "Are you done with this draft? [y/N] "))) {
4910
6262
  info("draft session complete.");
4911
6263
  hint(`run 'ccqa trace ${featureName}/${specName}' to record actions`);
4912
6264
  process.exit(0);
@@ -4914,9 +6266,9 @@ async function runDraft(featureName, specName, opts, prefilledIntent) {
4914
6266
  }
4915
6267
  }
4916
6268
  async function runOneTurn(input) {
4917
- const { featureName, specName, existing, userInput, autoApply } = input;
6269
+ const { featureName, specName, existing, userInput, autoApply, language } = input;
4918
6270
  const isFirstRun = existing === null;
4919
- const systemPrompt = buildDraftSystemPrompt(await loadAvailableBlocks());
6271
+ const systemPrompt = buildDraftSystemPrompt(await loadAvailableBlocks()) + languageDirective(language);
4920
6272
  const userPrompt = buildDraftPrompt({
4921
6273
  mode: isFirstRun ? "create" : "refine",
4922
6274
  existing: existing ?? "",
@@ -4979,7 +6331,7 @@ async function runOneTurn(input) {
4979
6331
  info("--- proposed changes ---");
4980
6332
  printUnifiedDiff(original, report.patch);
4981
6333
  blank();
4982
- if (!(autoApply ? true : /^y/i.test(await prompt("Apply this patch? [y/N] ")))) {
6334
+ if (!(autoApply ? true : /^y/i.test(await prompt(useJapanesePrompts(language) ? "このパッチを適用しますか? [y/N] " : "Apply this patch? [y/N] ")))) {
4983
6335
  info("aborted — no changes applied.");
4984
6336
  return {
4985
6337
  hasError,
@@ -5071,8 +6423,9 @@ function writeFinding(issue) {
5071
6423
  if (issue.detail) process.stdout.write(` └ ${issue.detail.replace(/\n/g, "\n ")}\n`);
5072
6424
  }
5073
6425
  async function proposeNaming(opts) {
6426
+ const ja = useJapanesePrompts(opts.language);
5074
6427
  const oneShot = opts.instruction !== void 0;
5075
- const intent = oneShot ? opts.instruction ?? "" : await prompt("What do you want to test? > ");
6428
+ const intent = oneShot ? opts.instruction ?? "" : await prompt(ja ? "何をテストしたいですか? > " : "What do you want to test? > ");
5076
6429
  if (!intent.trim()) {
5077
6430
  error("intent required to propose a feature/spec name");
5078
6431
  process.exit(1);
@@ -5124,13 +6477,13 @@ async function proposeNaming(opts) {
5124
6477
  naming: final,
5125
6478
  intent: intent.trim()
5126
6479
  };
5127
- const answer = await prompt(`Use this name? [y/N/edit] > `);
6480
+ const answer = await prompt(ja ? "この名前を使いますか? [y/N/edit] > " : "Use this name? [y/N/edit] > ");
5128
6481
  if (/^y/i.test(answer)) return {
5129
6482
  naming: final,
5130
6483
  intent: intent.trim()
5131
6484
  };
5132
6485
  if (/^e/i.test(answer)) {
5133
- const manual = await prompt("Enter feature/spec (e.g. tasks/create-and-complete) > ");
6486
+ const manual = await prompt(ja ? "feature/spec を入力 (例 tasks/create-and-complete) > " : "Enter feature/spec (e.g. tasks/create-and-complete) > ");
5134
6487
  const parts = manual.split("/");
5135
6488
  if (parts.length !== 2 || !parts[0] || !parts[1]) {
5136
6489
  error(`invalid spec path: "${manual}". Expected "<feature>/<spec>"`);
@@ -5230,163 +6583,141 @@ function truncate(s, n) {
5230
6583
  return s.slice(s.length - n);
5231
6584
  }
5232
6585
  //#endregion
5233
- //#region src/drift/affected.ts
5234
- const execFileP = promisify(execFile);
5235
- /**
5236
- * Resolve the base ref to diff against for `ccqa drift --changed`.
5237
- * Precedence: explicit override > GITHUB_BASE_REF > origin/main.
5238
- */
5239
- function resolveBaseRef(explicit) {
5240
- if (explicit && explicit.length > 0) return explicit;
5241
- const ghBase = process.env["GITHUB_BASE_REF"];
5242
- if (ghBase && ghBase.length > 0) return ghBase.startsWith("origin/") ? ghBase : `origin/${ghBase}`;
5243
- return "origin/main";
5244
- }
5245
- /**
5246
- * Run `git diff --name-status base...HEAD` from `cwd` and return one entry per
5247
- * changed file. Renames are reported under their NEW path with status
5248
- * "renamed" — the OLD path is dropped because the spec mapping is against the
5249
- * post-rename layout.
5250
- *
5251
- * Paths are re-rooted to be relative to `cwd`, not the git repo root. In a
5252
- * monorepo where `cwd` is a sub-package (e.g. `apps/foo`), git emits paths
5253
- * relative to the repo root, but specs declare relatedPaths relative to
5254
- * their own package. Changes outside `cwd` are dropped so an unrelated PR
5255
- * can never accidentally scope a sub-package's specs in.
5256
- */
5257
- async function getChangedFiles(base, cwd) {
5258
- const [{ stdout: rootOut }, { stdout: diffOut }] = await Promise.all([execFileP("git", ["rev-parse", "--show-toplevel"], { cwd }), execFileP("git", [
5259
- "diff",
5260
- "--name-status",
5261
- "-M",
5262
- `${base}...HEAD`
5263
- ], {
5264
- cwd,
5265
- maxBuffer: 32 * 1024 * 1024
5266
- })]);
5267
- return rerootChangedFiles(parseGitDiffOutput(diffOut), rootOut.trim(), cwd);
5268
- }
6586
+ //#region src/drift/format.ts
5269
6587
  /**
5270
- * Convert paths in `entries` from git-repo-root relative to `cwd` relative,
5271
- * dropping anything outside `cwd`. Exported for unit tests.
6588
+ * Render drift results as a string. The CLI commands and the `run` failure
6589
+ * hook are the only callers; both want the formatted output returned so
6590
+ * they can prefix / interleave / pipe it as needed.
5272
6591
  */
5273
- function rerootChangedFiles(entries, repoRoot, cwd) {
5274
- const prefix = relative(repoRoot, cwd);
5275
- if (!prefix) return entries;
5276
- const out = [];
5277
- for (const e of entries) {
5278
- const rel = relative(prefix, e.path);
5279
- if (rel.startsWith("..") || rel === "") continue;
5280
- out.push({
5281
- ...e,
5282
- path: rel
5283
- });
5284
- }
5285
- return out;
6592
+ function renderDrift(results, format, cwd) {
6593
+ if (format === "json") return renderJson(results);
6594
+ if (format === "github") return renderGithub(results, cwd);
6595
+ return renderText(results);
5286
6596
  }
5287
- function parseGitDiffOutput(stdout) {
6597
+ const HEAVY_RULE = "═".repeat(72);
6598
+ function renderText(results) {
5288
6599
  const out = [];
5289
- for (const line of stdout.split("\n")) {
5290
- if (!line.trim()) continue;
5291
- const parts = line.split(" ");
5292
- const code = parts[0];
5293
- if (!code) continue;
5294
- if (code.startsWith("R")) {
5295
- const newPath = parts[2];
5296
- if (newPath) out.push({
5297
- path: newPath,
5298
- status: "renamed"
5299
- });
6600
+ for (const r of results) {
6601
+ out.push("");
6602
+ const heading = `══ ${r.target.featureName}/${r.target.specName} `;
6603
+ const tail = "═".repeat(Math.max(3, 72 - heading.length));
6604
+ out.push(`${heading}${tail}`);
6605
+ if (r.error) {
6606
+ out.push(` ERROR ${r.error}`);
5300
6607
  continue;
5301
6608
  }
5302
- if (code.startsWith("C")) {
5303
- const newPath = parts[2];
5304
- if (newPath) out.push({
5305
- path: newPath,
5306
- status: "added"
5307
- });
6609
+ const errors = r.issues.filter((i) => i.severity === "ERROR");
6610
+ const warnings = r.issues.filter((i) => i.severity === "WARN");
6611
+ const passed = r.issues.filter((i) => i.severity === "OK");
6612
+ if (errors.length === 0 && warnings.length === 0) {
6613
+ const label = passed.length === 1 ? "check" : "checks";
6614
+ const detail = passed.length > 0 ? `all ${passed.length} ${label} passed` : "no issues";
6615
+ out.push(` ✓ ${detail}`);
5308
6616
  continue;
5309
6617
  }
5310
- const path = parts[1];
5311
- if (!path) continue;
5312
- switch (code[0]) {
5313
- case "A":
5314
- out.push({
5315
- path,
5316
- status: "added"
5317
- });
5318
- break;
5319
- case "M":
5320
- case "T":
5321
- out.push({
5322
- path,
5323
- status: "modified"
5324
- });
5325
- break;
5326
- case "D":
5327
- out.push({
5328
- path,
5329
- status: "deleted"
5330
- });
5331
- break;
5332
- default: out.push({
5333
- path,
5334
- status: "modified"
5335
- });
6618
+ for (const issue of errors) appendFinding(out, "ERROR", issue);
6619
+ for (const issue of warnings) appendFinding(out, "WARN", issue);
6620
+ if (passed.length > 0) {
6621
+ const names = passed.map((i) => DRAFT_CATEGORY_LABEL[i.category]).join(", ");
6622
+ out.push("");
6623
+ out.push(` ✓ passed (${passed.length}): ${names}`);
6624
+ }
6625
+ }
6626
+ out.push("");
6627
+ out.push(HEAVY_RULE);
6628
+ const totals = summarize(results);
6629
+ out.push(` specs ${results.length} (${totals.errored} errored)`);
6630
+ out.push(` findings ${totals.error} error, ${totals.warn} warn, ${totals.ok} ok`);
6631
+ out.push("");
6632
+ return out.join("\n");
6633
+ }
6634
+ function appendFinding(out, level, issue) {
6635
+ const stepPart = issue.stepId ? ` ${issue.stepId}` : "";
6636
+ out.push("");
6637
+ out.push(` ${level} ${DRAFT_CATEGORY_LABEL[issue.category]}${stepPart}`);
6638
+ out.push(` ${issue.message}`);
6639
+ if (issue.detail) out.push(` └ ${issue.detail.replace(/\n/g, "\n ")}`);
6640
+ }
6641
+ function renderJson(results) {
6642
+ const payload = { specs: results.map((r) => ({
6643
+ feature: r.target.featureName,
6644
+ spec: r.target.specName,
6645
+ ok: r.ok,
6646
+ ...r.error ? { error: r.error } : {},
6647
+ issues: r.issues.map((i) => ({
6648
+ severity: i.severity,
6649
+ category: i.category,
6650
+ stepId: i.stepId,
6651
+ message: i.message,
6652
+ ...i.detail ? { detail: i.detail } : {}
6653
+ }))
6654
+ })) };
6655
+ return `${JSON.stringify(payload, null, 2)}\n`;
6656
+ }
6657
+ function renderGithub(results, cwd) {
6658
+ const repoRoot = process.env["GITHUB_WORKSPACE"] ?? process.cwd();
6659
+ const lines = [];
6660
+ for (const r of results) {
6661
+ const file = githubRelPath(cwd, repoRoot, r.target.featureName, r.target.specName);
6662
+ if (r.error) {
6663
+ lines.push(`::error file=${file}::${escapeGhMessage(r.error)}`);
6664
+ continue;
6665
+ }
6666
+ for (const issue of r.issues) {
6667
+ if (issue.severity === "OK") continue;
6668
+ const level = issue.severity === "ERROR" ? "error" : "warning";
6669
+ const title = `${r.target.featureName}/${r.target.specName} — ${issue.category}${issue.stepId ? ` (${issue.stepId})` : ""}`;
6670
+ const body = issue.detail ? `${issue.message}\n${issue.detail}` : issue.message;
6671
+ lines.push(`::${level} file=${file},title=${escapeGhProp(title)}::${escapeGhMessage(body)}`);
5336
6672
  }
5337
6673
  }
5338
- return out;
6674
+ return lines.length === 0 ? "" : `${lines.join("\n")}\n`;
5339
6675
  }
5340
- function stripLeadingDotSlash(s) {
5341
- return s.startsWith("./") ? s.slice(2) : s;
6676
+ function githubRelPath(cwd, repoRoot, featureName, specName) {
6677
+ const abs = resolve(cwd, ".ccqa", "features", featureName, "test-cases", specName, "spec.yaml");
6678
+ const rel = relative(repoRoot, abs);
6679
+ return rel.startsWith("..") ? abs : rel;
5342
6680
  }
5343
- const REGEX_CACHE = /* @__PURE__ */ new Map();
5344
- /** Compiles `pattern` to a RegExp, memoized so repeated `--changed` matches don't re-build. */
5345
- function compileGlob(pattern) {
5346
- const cached = REGEX_CACHE.get(pattern);
5347
- if (cached) return cached;
5348
- const compiled = globToRegExp(stripLeadingDotSlash(pattern));
5349
- REGEX_CACHE.set(pattern, compiled);
5350
- return compiled;
6681
+ function escapeGhMessage(s) {
6682
+ return s.replace(/%/g, "%25").replace(/\r/g, "%0D").replace(/\n/g, "%0A");
5351
6683
  }
5352
- function globToRegExp(pattern) {
5353
- let re = "^";
5354
- let i = 0;
5355
- while (i < pattern.length) {
5356
- const ch = pattern[i];
5357
- if (ch === "?") {
5358
- re += "[^/]";
5359
- i++;
5360
- continue;
5361
- }
5362
- if (ch !== "*") {
5363
- re += /[.+^${}()|[\]\\]/.test(ch) ? "\\" + ch : ch;
5364
- i++;
5365
- continue;
5366
- }
5367
- if (pattern[i + 1] !== "*") {
5368
- re += "[^/]*";
5369
- i++;
5370
- continue;
5371
- }
5372
- const hasLeadingSlash = re.endsWith("/");
5373
- const hasTrailingSlash = pattern[i + 2] === "/";
5374
- if (hasLeadingSlash) re = re.slice(0, -1);
5375
- if (hasLeadingSlash || hasTrailingSlash) re += "(?:/?.*)?";
5376
- else re += ".*";
5377
- i += hasTrailingSlash ? 3 : 2;
6684
+ function escapeGhProp(s) {
6685
+ return s.replace(/%/g, "%25").replace(/\r/g, "%0D").replace(/\n/g, "%0A").replace(/,/g, "%2C").replace(/:/g, "%3A");
6686
+ }
6687
+ function summarize(results) {
6688
+ let error = 0;
6689
+ let warn = 0;
6690
+ let ok = 0;
6691
+ let errored = 0;
6692
+ for (const r of results) {
6693
+ if (r.error) errored++;
6694
+ for (const issue of r.issues) if (issue.severity === "ERROR") error++;
6695
+ else if (issue.severity === "WARN") warn++;
6696
+ else ok++;
5378
6697
  }
5379
- return new RegExp(re + "$");
6698
+ return {
6699
+ error,
6700
+ warn,
6701
+ ok,
6702
+ errored
6703
+ };
5380
6704
  }
6705
+ //#endregion
6706
+ //#region src/drift/exit-code.ts
5381
6707
  /**
5382
- * Returns true if `changedPath` is covered by any of `relatedPaths`. An empty
5383
- * `relatedPaths` returns false callers handle the "unscoped spec" case
5384
- * separately (treat the spec as always-affected) before calling this.
6708
+ * Map drift results to an exit code. Spec-level errors (Claude call failed)
6709
+ * always fail; otherwise ERROR severity always fails, WARN fails only when
6710
+ * the threshold is `warn`.
5385
6711
  */
5386
- function isPathAffectedBy(changedPath, relatedPaths) {
5387
- const stripped = stripLeadingDotSlash(changedPath);
5388
- for (const pattern of relatedPaths) if (compileGlob(pattern).test(stripped)) return true;
5389
- return false;
6712
+ function determineExitCode(results, threshold) {
6713
+ for (const r of results) {
6714
+ if (r.error) return 1;
6715
+ for (const issue of r.issues) {
6716
+ if (issue.severity === "ERROR") return 1;
6717
+ if (threshold === "warn" && issue.severity === "WARN") return 1;
6718
+ }
6719
+ }
6720
+ return 0;
5390
6721
  }
5391
6722
  //#endregion
5392
6723
  //#region src/drift/route-new-files.ts
@@ -5503,7 +6834,7 @@ Return the spec keys that might be affected by any of the new files. Conservativ
5503
6834
  //#endregion
5504
6835
  //#region src/cli/drift.ts
5505
6836
  const DEFAULT_CONCURRENCY = 3;
5506
- const driftCommand = new Command("drift").argument("[feature/spec]", "Optional spec id. If omitted, every spec under .ccqa/features/ is checked.").description("Check whether each spec.yaml is still in sync with the current codebase (CI-friendly, no patches applied).").option("--format <fmt>", "Output format: text | json | github", "text").option("--severity <level>", "Exit non-zero on this severity or higher: warn | error", "error").option("--concurrency <n>", `Parallel spec checks (default: ${DEFAULT_CONCURRENCY})`).option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").option("--cwd <path>", "Working directory used as both the .ccqa root and the codebase Claude reads. Useful for monorepos. Defaults to process.cwd().").option("--changed", "Restrict drift checks to specs whose relatedPaths intersect the git diff against --base (or, in CI, $GITHUB_BASE_REF, else origin/main). New files are routed to specs via a single lightweight Claude call.").option("--base <ref>", "Base ref to diff against when --changed is set. Defaults to $GITHUB_BASE_REF (CI) or origin/main.").action(async (specPath, opts) => {
6837
+ const driftCommand = addLanguageOption(new Command("drift").argument("[feature/spec]", "Optional spec id. If omitted, every spec under .ccqa/features/ is checked.").description("Check whether each spec.yaml is still in sync with the current codebase (CI-friendly, no patches applied).").option("--format <fmt>", "Output format: text | json | github", "text").option("--severity <level>", "Exit non-zero on this severity or higher: warn | error", "error").option("--concurrency <n>", `Parallel spec checks (default: ${DEFAULT_CONCURRENCY})`).option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").option("--cwd <path>", "Working directory used as both the .ccqa root and the codebase Claude reads. Useful for monorepos. Defaults to process.cwd().").option("--changed", "Restrict drift checks to specs whose relatedPaths intersect the git diff against --base (or, in CI, $GITHUB_BASE_REF, else origin/main). New files are routed to specs via a single lightweight Claude call.").option("--base <ref>", "Base ref to diff against when --changed is set. Defaults to $GITHUB_BASE_REF (CI) or origin/main.")).action(async (specPath, opts) => {
5507
6838
  const format = parseFormat(opts.format);
5508
6839
  const threshold = parseSeverity(opts.severity);
5509
6840
  const concurrency = parseConcurrency(opts.concurrency);
@@ -5538,6 +6869,7 @@ const driftCommand = new Command("drift").argument("[feature/spec]", "Optional s
5538
6869
  blocks,
5539
6870
  concurrency,
5540
6871
  ...opts.model ? { model: opts.model } : {},
6872
+ ...opts.language ? { language: opts.language } : {},
5541
6873
  onSpecStart: (t) => {
5542
6874
  if (format === "text") info(`checking ${t.featureName}/${t.specName}`);
5543
6875
  }
@@ -5650,6 +6982,446 @@ function parseConcurrency(raw) {
5650
6982
  return n;
5651
6983
  }
5652
6984
  //#endregion
6985
+ //#region src/prompts/perspectives.ts
6986
+ /**
6987
+ * Build the system prompt. By default the descriptive fields follow the
6988
+ * spec's own language (Japanese specs → Japanese fields). An explicit
6989
+ * `--language` is applied by the CLI via `languageDirective`, appended to
6990
+ * this prompt, so the language handling lives in one shared place.
6991
+ */
6992
+ function buildPerspectivesSystemPrompt() {
6993
+ return `You produce a factual inventory of the E2E test coverage that already exists in a ccqa project.
6994
+
6995
+ Think of it as a QA coverage stock-take: for each existing test case, fill in a few short, neutral descriptive fields derived from its steps. Nothing more.
6996
+
6997
+ ## Hard boundaries (do NOT cross)
6998
+
6999
+ - Do NOT assign severity, importance, priority, or risk. Whether a failure hurts the customer is a human + PdM decision; you are not authoring that here.
7000
+ - Do NOT do gap analysis. Do NOT list untested areas, missing coverage, or things the code has but the tests lack.
7001
+ - Do NOT evaluate whether the feature is good, complete, or correct.
7002
+ - Do NOT propose new test cases.
7003
+ - Do NOT restate the full step-by-step procedure or the per-step expected results — the spec.yaml is the source of truth for those and the inventory links to it.
7004
+ - Do NOT touch status, relatedPaths, feature names, or spec names — the CLI already fixed those.
7005
+
7006
+ ## Fields to write (per spec)
7007
+
7008
+ - \`summary\`: 1–2 sentences, factual and neutral. What the test exercises and what it ultimately asserts, derived from the spec's \`steps\` (\`instruction\` / \`expected\`).
7009
+ - \`startScreen\`: the screen/URL the test first lands on after setup (e.g. "Dashboard (/dashboard)"). Derive from the first non-login \`instruction\`. Omit if genuinely unclear.
7010
+ - \`testCondition\`: the state/precondition the scenario assumes, phrased as a condition (e.g. "Logged in as an admin", "Unauthenticated user"). Omit if none.
7011
+ - \`preconditions\`: array of short setup prerequisites (e.g. which role logs in, required prior state). Derive from \`include: login\` params and the opening steps. Empty/omit if none.
7012
+
7013
+ ## How to write
7014
+
7015
+ - Same language as the spec's title (if titles are Japanese, write these fields in Japanese).
7016
+ - Keep each field short. These are index entries, not the test itself.
7017
+ - You may use Read/Grep/Glob sparingly to clarify domain vocabulary, but the steps are the primary source. Do not over-explore.
7018
+
7019
+ ## Output contract (STRICT)
7020
+
7021
+ Output exactly ONE fenced \`\`\`json code block, and nothing else outside it. No prose before or after.
7022
+
7023
+ Schema:
7024
+
7025
+ \`\`\`json
7026
+ {
7027
+ "summaries": [
7028
+ {
7029
+ "featureName": "<verbatim from input>",
7030
+ "specName": "<verbatim from input>",
7031
+ "summary": "<1–2 sentence factual description of what this test verifies>",
7032
+ "startScreen": "<opening screen/URL, or omit>",
7033
+ "testCondition": "<assumed state phrased as a condition, or omit>",
7034
+ "preconditions": ["<setup prerequisite>", "..."]
7035
+ }
7036
+ ]
7037
+ }
7038
+ \`\`\`
7039
+
7040
+ Return one entry per spec given in the input. Echo featureName and specName verbatim so the CLI can match them. \`startScreen\`, \`testCondition\`, and \`preconditions\` are optional — omit a field (or use an empty array for preconditions) when the spec does not express it.
7041
+ `;
7042
+ }
7043
+ function buildPerspectivesPrompt(specs, instruction) {
7044
+ return `## Existing test cases to summarise
7045
+
7046
+ ${specs.map((s) => `### ${s.featureName}/${s.specName}
7047
+ title: ${s.title}
7048
+
7049
+ \`\`\`yaml
7050
+ ${s.specYaml.trimEnd()}
7051
+ \`\`\`
7052
+ `).join("\n")}
7053
+ ${instruction?.trim() ? `## Extra guidance from the user\n\n${instruction.trim()}\n\n` : ""}## Task
7054
+
7055
+ For each test case above, write a 1–2 sentence factual \`summary\` of what it verifies, derived from its steps. Return one entry per spec in the JSON contract. Do not assign severity, do gap analysis, or invent new cases.
7056
+ `;
7057
+ }
7058
+ //#endregion
7059
+ //#region src/cli/perspectives.ts
7060
+ const perspectivesCommand = addLanguageOption(new Command("perspectives").description("Generate/update .ccqa/perspectives.yaml — a factual inventory of existing test coverage (no severity, no gap analysis)").option("--instruction <text>", "Hint to steer how summaries are written").option("--apply", "Auto-apply without [y/N] confirmation", false).option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID")).action(async (opts) => {
7061
+ await runPerspectives(opts);
7062
+ });
7063
+ async function runPerspectives(opts) {
7064
+ header("perspectives", ".ccqa/perspectives.yaml");
7065
+ await ensureCcqaDir();
7066
+ const skeleton = await buildSkeleton(await listFeatureTree());
7067
+ const allSpecs = skeleton.flatMap((f) => f.specs);
7068
+ if (allSpecs.length === 0) {
7069
+ info("no test cases found under .ccqa/features — nothing to inventory.");
7070
+ return;
7071
+ }
7072
+ const existingRaw = await tryReadPerspectives() ?? "";
7073
+ const noteMap = extractNotes(existingRaw);
7074
+ const specBodies = await loadSpecBodies(skeleton);
7075
+ meta("language", opts.language ?? "auto");
7076
+ info(`Summarising ${allSpecs.length} test case(s) across ${skeleton.length} feature(s)...`);
7077
+ const summaries = await requestSummaries(specBodies, opts);
7078
+ if (summaries === null) process.exit(1);
7079
+ const merged = mergePerspectives(skeleton, summaries, noteMap);
7080
+ let validated;
7081
+ try {
7082
+ validated = PerspectivesSchema.parse(merged);
7083
+ } catch (e) {
7084
+ error(`refused to write: assembled perspectives failed validation (${e.message})`);
7085
+ process.exit(1);
7086
+ }
7087
+ const next = stringify(validated, { lineWidth: 0 });
7088
+ if (withoutGeneratedAt(existingRaw) === withoutGeneratedAt(next)) {
7089
+ blank();
7090
+ info("perspectives already up to date — no changes.");
7091
+ return;
7092
+ }
7093
+ blank();
7094
+ info("--- proposed changes (perspectives.yaml) ---");
7095
+ printUnifiedDiff(existingRaw, next);
7096
+ blank();
7097
+ if (!(opts.apply === true || /^y/i.test(await prompt(useJapanesePrompts(opts.language) ? "perspectives.yaml + .md を書き込みますか? [y/N] " : "Write perspectives.yaml + .md? [y/N] ")))) {
7098
+ info("aborted — no changes written.");
7099
+ return;
7100
+ }
7101
+ meta("saved", await savePerspectives(next));
7102
+ const labels = labelsFor(opts.language);
7103
+ meta("saved", await savePerspectivesMarkdown(renderIndexMarkdown(validated, labels)));
7104
+ for (const feature of validated.features) meta("saved", await saveFeaturePerspectivesMarkdown(feature.featureName, renderFeatureMarkdown(feature, labels)));
7105
+ }
7106
+ /**
7107
+ * Turn the feature tree into the skeleton perspectives features: title +
7108
+ * relatedPaths transcribed from each spec, status derived mechanically from
7109
+ * on-disk artifacts. `summary` is left empty here; Claude fills it later.
7110
+ * Specs whose spec.yaml is missing or unparsable are skipped.
7111
+ */
7112
+ async function buildSkeleton(tree) {
7113
+ return (await Promise.all(tree.map(async (feature) => {
7114
+ const specs = await Promise.all(feature.specs.filter((s) => s.hasSpecFile).map(async (s) => {
7115
+ const spec = await readSpecMeta(feature.featureName, s.specName);
7116
+ const status = await deriveStatus(feature.featureName, s.specName);
7117
+ const entry = {
7118
+ specName: s.specName,
7119
+ title: spec.title,
7120
+ summary: "",
7121
+ status
7122
+ };
7123
+ if (s.relatedPaths) entry.relatedPaths = s.relatedPaths;
7124
+ return entry;
7125
+ }));
7126
+ return {
7127
+ featureName: feature.featureName,
7128
+ specs
7129
+ };
7130
+ }))).filter((f) => f.specs.length > 0).map((f) => ({
7131
+ featureName: f.featureName,
7132
+ specs: [...f.specs].sort((a, b) => a.specName.localeCompare(b.specName))
7133
+ })).sort((a, b) => a.featureName.localeCompare(b.featureName));
7134
+ }
7135
+ /**
7136
+ * `(featureName, specName)` → human note, parsed from an existing
7137
+ * perspectives.yaml. Notes are preserved across regeneration; everything
7138
+ * else (title, status, summary) is recomputed. Returns an empty map when the
7139
+ * input is empty or unparsable — note preservation is best-effort and never
7140
+ * blocks regeneration.
7141
+ */
7142
+ function extractNotes(existingRaw) {
7143
+ const map = /* @__PURE__ */ new Map();
7144
+ if (!existingRaw.trim()) return map;
7145
+ let parsed;
7146
+ try {
7147
+ parsed = parse(existingRaw);
7148
+ } catch {
7149
+ return map;
7150
+ }
7151
+ const result = PerspectivesSchema.safeParse(parsed);
7152
+ if (!result.success) return map;
7153
+ for (const feature of result.data.features) for (const spec of feature.specs) if (spec.note !== void 0 && spec.note !== "") map.set(noteKey(feature.featureName, spec.specName), spec.note);
7154
+ return map;
7155
+ }
7156
+ /**
7157
+ * Merge the mechanical skeleton with Claude's summaries and the preserved
7158
+ * notes into the final perspectives object. Summaries are matched by
7159
+ * (featureName, specName); an unmatched spec keeps its empty summary.
7160
+ */
7161
+ function mergePerspectives(skeleton, summaries, noteMap) {
7162
+ const summaryMap = /* @__PURE__ */ new Map();
7163
+ for (const s of summaries) summaryMap.set(noteKey(s.featureName, s.specName), s);
7164
+ const features = skeleton.map((feature) => ({
7165
+ featureName: feature.featureName,
7166
+ specs: feature.specs.map((spec) => {
7167
+ const key = noteKey(feature.featureName, spec.specName);
7168
+ const entry = summaryMap.get(key);
7169
+ const merged = {
7170
+ ...spec,
7171
+ summary: entry?.summary ?? spec.summary
7172
+ };
7173
+ if (entry?.startScreen) merged.startScreen = entry.startScreen;
7174
+ if (entry?.testCondition) merged.testCondition = entry.testCondition;
7175
+ if (entry?.preconditions && entry.preconditions.length > 0) merged.preconditions = entry.preconditions;
7176
+ const note = noteMap.get(key);
7177
+ if (note !== void 0) merged.note = note;
7178
+ return merged;
7179
+ })
7180
+ }));
7181
+ return {
7182
+ generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
7183
+ features
7184
+ };
7185
+ }
7186
+ /**
7187
+ * Strip the top-level `generatedAt:` line so two serialised perspectives can
7188
+ * be compared for substantive equality without the always-fresh timestamp
7189
+ * defeating the "already up to date" check. Exported for unit testing.
7190
+ */
7191
+ function withoutGeneratedAt(yamlText) {
7192
+ return yamlText.split("\n").filter((line) => !/^generatedAt:/.test(line)).join("\n").trim();
7193
+ }
7194
+ function noteKey(featureName, specName) {
7195
+ return `${featureName}/${specName}`;
7196
+ }
7197
+ async function readSpecMeta(featureName, specName) {
7198
+ const raw = await tryReadSpecFile(featureName, specName);
7199
+ if (raw === null) return { title: specName };
7200
+ try {
7201
+ const parsed = parse(raw);
7202
+ if (typeof parsed.title === "string" && parsed.title.length > 0) return { title: parsed.title };
7203
+ } catch {}
7204
+ return { title: specName };
7205
+ }
7206
+ async function deriveStatus(featureName, specName) {
7207
+ return {
7208
+ traced: await stat(join(getSpecDir(featureName, specName), "actions.json")).then(() => true).catch(() => false),
7209
+ generated: await getTestScript(featureName, specName) !== null
7210
+ };
7211
+ }
7212
+ async function loadSpecBodies(skeleton) {
7213
+ return await Promise.all(skeleton.flatMap((feature) => feature.specs.map(async (spec) => {
7214
+ const specYaml = await tryReadSpecFile(feature.featureName, spec.specName) ?? "";
7215
+ return {
7216
+ featureName: feature.featureName,
7217
+ specName: spec.specName,
7218
+ title: spec.title,
7219
+ specYaml
7220
+ };
7221
+ })));
7222
+ }
7223
+ async function requestSummaries(specs, opts) {
7224
+ const toolCounts = {};
7225
+ const startedAt = Date.now();
7226
+ const { result, isError } = await invokeClaudeStreaming({
7227
+ prompt: buildPerspectivesPrompt(specs, opts.instruction),
7228
+ systemPrompt: buildPerspectivesSystemPrompt() + languageDirective(opts.language),
7229
+ allowedTools: [
7230
+ "Read",
7231
+ "Grep",
7232
+ "Glob"
7233
+ ],
7234
+ silenceBashLog: true,
7235
+ ...opts.model ? { model: opts.model } : {}
7236
+ }, (msg) => {
7237
+ if (msg.type !== "assistant") return;
7238
+ for (const block of msg.message.content ?? []) if (block.type === "tool_use") toolCounts[block.name] = (toolCounts[block.name] ?? 0) + 1;
7239
+ });
7240
+ process.stdout.write(`${formatToolSummary(toolCounts, Date.now() - startedAt)}\n`);
7241
+ if (isError) {
7242
+ error("Claude returned an error result");
7243
+ return null;
7244
+ }
7245
+ const json = extractJsonBlock(result);
7246
+ if (!json) {
7247
+ error("Claude did not return a json block");
7248
+ return null;
7249
+ }
7250
+ return parseSummaries(json);
7251
+ }
7252
+ /**
7253
+ * Parse the `{ summaries: [...] }` JSON contract into typed entries. Returns
7254
+ * null and logs when the payload is malformed. Exported for unit testing.
7255
+ */
7256
+ function parseSummaries(json) {
7257
+ let payload;
7258
+ try {
7259
+ payload = JSON.parse(json);
7260
+ } catch (e) {
7261
+ error(`failed to parse summaries JSON: ${e.message}`);
7262
+ return null;
7263
+ }
7264
+ if (typeof payload !== "object" || payload === null) {
7265
+ error("summaries payload is not an object");
7266
+ return null;
7267
+ }
7268
+ const summaries = payload.summaries;
7269
+ if (!Array.isArray(summaries)) {
7270
+ error("summaries payload missing a `summaries` array");
7271
+ return null;
7272
+ }
7273
+ const out = [];
7274
+ for (const item of summaries) {
7275
+ const rec = item ?? {};
7276
+ const { featureName, specName, summary } = rec;
7277
+ if (typeof featureName === "string" && typeof specName === "string" && typeof summary === "string") {
7278
+ const entry = {
7279
+ featureName,
7280
+ specName,
7281
+ summary
7282
+ };
7283
+ if (typeof rec.startScreen === "string" && rec.startScreen.length > 0) entry.startScreen = rec.startScreen;
7284
+ if (typeof rec.testCondition === "string" && rec.testCondition.length > 0) entry.testCondition = rec.testCondition;
7285
+ if (Array.isArray(rec.preconditions)) {
7286
+ const pre = rec.preconditions.filter((p) => typeof p === "string" && p.length > 0);
7287
+ if (pre.length > 0) entry.preconditions = pre;
7288
+ }
7289
+ out.push(entry);
7290
+ }
7291
+ }
7292
+ return out;
7293
+ }
7294
+ const LABELS_JA = {
7295
+ indexTitle: "テスト観点インデックス (perspectives)",
7296
+ caseCol: "ケース",
7297
+ itemCol: "項目",
7298
+ valueCol: "内容",
7299
+ summary: "検証内容",
7300
+ preconditions: "前提条件",
7301
+ startScreen: "開始画面",
7302
+ relatedCode: "関連コード"
7303
+ };
7304
+ const LABELS_EN = {
7305
+ indexTitle: "Test Perspectives (perspectives)",
7306
+ caseCol: "Case",
7307
+ itemCol: "Item",
7308
+ valueCol: "Value",
7309
+ summary: "Verifies",
7310
+ preconditions: "Preconditions",
7311
+ startScreen: "Start screen",
7312
+ relatedCode: "Related code"
7313
+ };
7314
+ /**
7315
+ * Pick the label set for a `--language` value. Only an explicit English tag
7316
+ * (`en`, `en-US`, …) switches to English labels; `auto`, `ja`, and anything
7317
+ * else keep Japanese, matching the source-following default the rest of the
7318
+ * command uses.
7319
+ */
7320
+ function labelsFor(language) {
7321
+ return /^en\b/i.test(language?.trim() ?? "") ? LABELS_EN : LABELS_JA;
7322
+ }
7323
+ /**
7324
+ * Path to a spec.yaml relative to the **root** `.ccqa/perspectives.md`
7325
+ * (i.e. relative to the `.ccqa/` dir). Used for the category index links.
7326
+ */
7327
+ function specRelPathFromRoot(featureName, specName) {
7328
+ return `features/${featureName}/test-cases/${specName}/spec.yaml`;
7329
+ }
7330
+ /**
7331
+ * Path to a category detail file relative to the **root** `.ccqa/perspectives.md`.
7332
+ * The detail file is written to `.ccqa/features/<feature>/perspectives.md`
7333
+ * (see `getFeaturePerspectivesMarkdownPath`), so the link must include the
7334
+ * `features/` segment — otherwise the category heading link 404s.
7335
+ */
7336
+ function featureDetailRelPathFromRoot(featureName) {
7337
+ return `features/${featureName}/perspectives.md`;
7338
+ }
7339
+ /**
7340
+ * Path to a spec.yaml relative to the **category** detail file
7341
+ * `.ccqa/features/<feature>/perspectives.md`. The spec lives alongside under
7342
+ * `test-cases/<spec>/`, so the category file links to it directly — which is
7343
+ * what makes the link resolve both on GitHub and in a local editor.
7344
+ */
7345
+ function specRelPathFromCategory(specName) {
7346
+ return `test-cases/${specName}/spec.yaml`;
7347
+ }
7348
+ /**
7349
+ * Render the root `.ccqa/perspectives.md`: a category-grouped index of which
7350
+ * cases exist. Each feature is a heading (linking to its own detail
7351
+ * `perspectives.md`) followed by a row per case — title, status, and a link
7352
+ * to that case's spec.yaml. The per-case *detail* (検証内容, preconditions,
7353
+ * note) still lives only in the per-category file; the root stays a scannable
7354
+ * "what is tested, and where" overview.
7355
+ *
7356
+ * Pure and deterministic, so the index rendering is easy to unit-test.
7357
+ */
7358
+ function renderIndexMarkdown(perspectives, labels = LABELS_JA) {
7359
+ const lines = [];
7360
+ lines.push(`# ${labels.indexTitle}`);
7361
+ lines.push("");
7362
+ for (const feature of perspectives.features) {
7363
+ const detailLink = featureDetailRelPathFromRoot(feature.featureName);
7364
+ lines.push(`## [${feature.featureName}](${detailLink})`);
7365
+ lines.push("");
7366
+ lines.push(`| ${labels.caseCol} | spec |`);
7367
+ lines.push("| --- | --- |");
7368
+ for (const spec of feature.specs) {
7369
+ const specLink = specRelPathFromRoot(feature.featureName, spec.specName);
7370
+ lines.push(`| ${mdCell(spec.title)} | [spec](${specLink}) |`);
7371
+ }
7372
+ lines.push("");
7373
+ }
7374
+ return lines.join("\n");
7375
+ }
7376
+ /**
7377
+ * Render one category's `.ccqa/features/<feature>/perspectives.md`: every
7378
+ * case in the category as a self-contained vertical table. All columns —
7379
+ * including the verification summary (検証内容) and the human note — live
7380
+ * inside the table; nothing is emitted outside it. Detailed steps / expected
7381
+ * results are still not restated (the spec.yaml is their single home); the
7382
+ * table links back to each spec instead.
7383
+ *
7384
+ * Pure and deterministic, so the per-case rendering is easy to unit-test.
7385
+ */
7386
+ function renderFeatureMarkdown(feature, labels = LABELS_JA) {
7387
+ const lines = [];
7388
+ lines.push(`# ${feature.featureName}`);
7389
+ lines.push("");
7390
+ for (const spec of feature.specs) lines.push(...renderSpecMarkdown(spec, labels));
7391
+ return lines.join("\n");
7392
+ }
7393
+ /**
7394
+ * Render one spec as a single vertical (item | content) Markdown table for a
7395
+ * category file. Verification summary and preconditions lead. The spec link
7396
+ * is relative to this category file so it resolves both on GitHub and in a
7397
+ * local editor. Related-code paths stay inline code rather than links: their
7398
+ * base (the cwd that hosts `.ccqa/`) is not reliably recoverable here — specs
7399
+ * carry a mix of cwd-relative (`src/...`) and repo-root (`pkg/app/src/...`)
7400
+ * forms — and many are globs that no link could open anyway. 検証内容
7401
+ * (summary) and note are rows inside the table; no prose blocks are emitted
7402
+ * around it. Exported for focused unit testing.
7403
+ */
7404
+ function renderSpecMarkdown(spec, labels = LABELS_JA) {
7405
+ const lines = [];
7406
+ lines.push(`## ${spec.title}`);
7407
+ lines.push("");
7408
+ lines.push(`| ${labels.itemCol} | ${labels.valueCol} |`);
7409
+ lines.push("| --- | --- |");
7410
+ if (spec.summary) lines.push(`| ${labels.summary} | ${mdCell(spec.summary)} |`);
7411
+ if (spec.preconditions && spec.preconditions.length > 0) lines.push(`| ${labels.preconditions} | ${spec.preconditions.map(mdCell).join("<br>")} |`);
7412
+ if (spec.startScreen) lines.push(`| ${labels.startScreen} | ${mdCell(spec.startScreen)} |`);
7413
+ const specPath = specRelPathFromCategory(spec.specName);
7414
+ lines.push(`| spec | [${specPath}](${specPath}) |`);
7415
+ if (spec.relatedPaths && spec.relatedPaths.length > 0) lines.push(`| ${labels.relatedCode} | ${spec.relatedPaths.map((p) => `\`${p}\``).join("<br>")} |`);
7416
+ if (spec.note) lines.push(`| 📝 note | ${mdCell(spec.note)} |`);
7417
+ lines.push("");
7418
+ return lines;
7419
+ }
7420
+ /** Escape pipes / newlines so a value stays inside one Markdown table cell. */
7421
+ function mdCell(value) {
7422
+ return value.replace(/\|/g, "\\|").replace(/\n/g, " ");
7423
+ }
7424
+ //#endregion
5653
7425
  //#region src/cli/index.ts
5654
7426
  const packageJsonPath = resolvePackageJson();
5655
7427
  const { version } = JSON.parse(readFileSync(packageJsonPath, "utf8"));
@@ -5667,6 +7439,7 @@ const program = new Command();
5667
7439
  program.name("ccqa").description("E2E test CLI using Claude Code + agent-browser").version(version);
5668
7440
  program.addCommand(draftCommand);
5669
7441
  program.addCommand(driftCommand);
7442
+ program.addCommand(perspectivesCommand);
5670
7443
  program.addCommand(traceCommand);
5671
7444
  program.addCommand(generateCommand);
5672
7445
  program.addCommand(runCommand);