npm - ccqa - Versions diffs - 0.5.1 → 0.7.0 - Mend

ccqa 0.5.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/bin/ccqa.mjs CHANGED Viewed

@@ -9,11 +9,11 @@ import { query } from "@anthropic-ai/claude-agent-sdk";
 import { ZodError, z } from "zod";
 import { delimiter, dirname, join, relative, resolve } from "node:path";
 import { parse, stringify } from "yaml";
-import { execFile, spawn } from "node:child_process";
+import { execFile, spawn, spawnSync } from "node:child_process";
 import { createInterface } from "node:readline";
 import { homedir, tmpdir } from "node:os";
-import { createInterface as createInterface$1 } from "node:readline/promises";
 import { promisify } from "node:util";
+import { createInterface as createInterface$1 } from "node:readline/promises";
 //#region src/prompts/trace.ts
 function generateSessionName() {
 	return `ccqa-trace-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-")}`;
@@ -562,6 +562,71 @@ function isParamRequired(param) {
 	return param.required !== false;
 }
 //#endregion
+//#region src/spec/perspectives-schema.ts
+/**
+* `perspectives.yaml` is an inventory of the test coverage that already
+* exists under `.ccqa/` — the ccqa equivalent of a hand-kept QA spreadsheet,
+* but scoped deliberately to *facts about what is tested today*.
+*
+* It intentionally does NOT carry severity / importance / priority. Deciding
+* "how badly does it hurt the customer if this breaks" is a human + PdM
+* decision, not something ccqa should author or silently overwrite. Keeping
+* those columns out of the schema (and `.strict()` rejecting them) makes the
+* boundary explicit: perspectives is a factual stock-take, severity lives
+* wherever the team decides on it.
+*
+* It also does NOT attempt code-vs-test gap analysis (listing untested
+* areas). A flat dump of "things in code with no test" is noise without
+* prioritisation; that is a separate, later concern.
+*/
+/**
+* Whether the spec has been traced / generated. Both are derived mechanically
+* by the CLI from on-disk artifacts (actions.json / test.spec.ts), never
+* written by Claude — these are facts and must not drift.
+*/
+const PerspectiveStatusSchema = z.object({
+	traced: z.boolean(),
+	generated: z.boolean()
+}).strict();
+/**
+* One test case in the inventory.
+*
+* - `title` / `relatedPaths` are transcribed verbatim from the spec.yaml.
+* - `status` is mechanically derived (see PerspectiveStatusSchema).
+* - `summary` is a 1–2 sentence description of *what the spec verifies*,
+*   derived from its steps by Claude.
+* - `startScreen` / `testCondition` / `preconditions` mirror the columns a
+*   hand-kept QA table carries. They are Claude-derived from the spec's
+*   steps (the opening screen, the state the test assumes, and the setup
+*   prerequisites such as which role logs in). Optional: a spec may not
+*   express all of them.
+* - `note` is a human-only field. Regenerating perspectives preserves it.
+*
+* The detailed test procedure and expected results are deliberately NOT
+* duplicated here — the spec.yaml steps are the single source of truth for
+* those. The Markdown view links back to the spec instead of restating them.
+*/
+const PerspectiveSpecSchema = z.object({
+	specName: z.string().min(1),
+	title: z.string().min(1),
+	summary: z.string(),
+	startScreen: z.string().optional(),
+	testCondition: z.string().optional(),
+	preconditions: z.array(z.string().min(1)).optional(),
+	relatedPaths: z.array(z.string().min(1)).optional(),
+	status: PerspectiveStatusSchema,
+	note: z.string().optional()
+}).strict();
+const PerspectiveFeatureSchema = z.object({
+	featureName: z.string().min(1),
+	specs: z.array(PerspectiveSpecSchema)
+}).strict();
+/** Top-level perspectives schema. `.strict()` rejects any unknown key. */
+const PerspectivesSchema = z.object({
+	generatedAt: z.string().optional(),
+	features: z.array(PerspectiveFeatureSchema)
+}).strict();
+//#endregion
 //#region src/types.ts
 const RouteStepSchema = z.object({
 	title: z.string(),
@@ -633,7 +698,7 @@ const DraftIssueSchema = z.object({
 	]),
 	stepId: z.string().nullable(),
 	message: z.string(),
-	detail: z.string().optional()
+	detail: z.string().nullish()
 });
 const DraftReportSchema = z.object({
 	issues: z.array(DraftIssueSchema),
@@ -1205,6 +1270,8 @@ function collectIncludedBlockNames(spec) {
 //#region src/store/index.ts
 const CCQA_DIR = ".ccqa";
 const SPEC_FILE = "spec.yaml";
+const PERSPECTIVES_FILE = "perspectives.yaml";
+const PERSPECTIVES_MD_FILE = "perspectives.md";
 function getCcqaDir(cwd = process.cwd()) {
 	return join(cwd, CCQA_DIR);
 }
@@ -1250,6 +1317,56 @@ async function saveSpecFile(featureName, specName, content, cwd) {
 	await writeFile(specPath, content.endsWith("\n") ? content : content + "\n", "utf-8");
 	return specPath;
 }
+/** Absolute path to the single repo-wide `.ccqa/perspectives.yaml`. */
+function getPerspectivesPath(cwd) {
+	return join(getCcqaDir(cwd), PERSPECTIVES_FILE);
+}
+/**
+* Read `.ccqa/perspectives.yaml` raw. Returns null when the file does not
+* exist (first-ever generation) so callers can treat it as optional.
+*/
+async function tryReadPerspectives(cwd) {
+	return readFile(getPerspectivesPath(cwd), "utf-8").catch(() => null);
+}
+/**
+* Write `.ccqa/perspectives.yaml`. Mirrors `saveSpecFile`: ensures the
+* directory exists and the content ends in a trailing newline.
+*/
+async function savePerspectives(content, cwd) {
+	await mkdir(getCcqaDir(cwd), { recursive: true });
+	const path = getPerspectivesPath(cwd);
+	await writeFile(path, content.endsWith("\n") ? content : content + "\n", "utf-8");
+	return path;
+}
+/**
+* Human-readable Markdown companion to perspectives.yaml. The `.yaml` is the
+* machine-readable source of truth; the `.md` is a rendered view for review.
+*/
+function getPerspectivesMarkdownPath(cwd) {
+	return join(getCcqaDir(cwd), PERSPECTIVES_MD_FILE);
+}
+async function savePerspectivesMarkdown(content, cwd) {
+	await mkdir(getCcqaDir(cwd), { recursive: true });
+	const path = getPerspectivesMarkdownPath(cwd);
+	await writeFile(path, content.endsWith("\n") ? content : content + "\n", "utf-8");
+	return path;
+}
+/**
+* Per-category detail view: `.ccqa/features/<feature>/perspectives.md`. The
+* root `perspectives.md` is a thin category index that links here; this file
+* carries the full per-case tables for one feature. The feature dir already
+* exists (it holds the test cases), but `mkdir -p` keeps this safe when called
+* in isolation.
+*/
+function getFeaturePerspectivesMarkdownPath(featureName, cwd) {
+	return join(getFeatureDir(featureName, cwd), PERSPECTIVES_MD_FILE);
+}
+async function saveFeaturePerspectivesMarkdown(featureName, content, cwd) {
+	await mkdir(getFeatureDir(featureName, cwd), { recursive: true });
+	const path = getFeaturePerspectivesMarkdownPath(featureName, cwd);
+	await writeFile(path, content.endsWith("\n") ? content : content + "\n", "utf-8");
+	return path;
+}
 /**
 * Replace (or insert) the `relatedPaths` key in the spec. Preserves every
 * other top-level field and the entire steps array. Returns the absolute
@@ -2188,16 +2305,60 @@ function formatUnstableDrop(drop) {
 	return `${`${action.command}${action.assertType ? " " + action.assertType : ""}`}: contains unstable literal (${ids}) — ${samples}`;
 }
 //#endregion
+//#region src/prompts/language.ts
+/**
+* Shared language handling for every Claude-driven command. Each command
+* writes some human-readable text (drift findings, trace observations, draft
+* prose, diagnose hints, perspectives summaries), so the language policy is a
+* single cross-cutting concern rather than per-command logic.
+*
+* The value is a BCP-47 tag (e.g. "ja", "en") or the sentinel "auto". With
+* "auto" the model follows the language of the material it is given — Japanese
+* specs/codebase yield Japanese output — and `languageDirective` returns an
+* empty string so prompts stay byte-identical to the no-flag baseline.
+*/
+const DEFAULT_LANGUAGE = "auto";
+/**
+* The instruction appended to a command's system prompt. Empty for "auto"
+* (and undefined / blank), so the model keeps its natural material-following
+* behaviour; otherwise it pins every human-readable field to the given tag.
+*/
+function languageDirective(language) {
+	const lang = (language ?? "auto").trim();
+	if (lang === "" || lang === "auto") return "";
+	return `\n\nIMPORTANT: Write every human-readable field, message, and explanation in **${lang}** (BCP-47 language tag), regardless of the language of the spec or codebase.`;
+}
+/**
+* Whether the CLI's own interactive prompts (the strings ccqa prints itself,
+* not the model's output) should be Japanese. Only an explicit Japanese tag
+* (`ja`, `ja-JP`, …) opts in; `auto` (the default) and every other tag keep
+* the English prompts, so an English user running with no flag is unaffected.
+*/
+function useJapanesePrompts(language) {
+	return /^ja\b/i.test((language ?? "").trim());
+}
+//#endregion
+//#region src/cli/options.ts
+/**
+* Shared `--language` flag. Every Claude-driven command writes some
+* human-readable text, so language is a cross-cutting concern handled the same
+* way everywhere — much like `--model`. The value is a BCP-47 tag (e.g. "ja",
+* "en") or "auto" (default), which follows the language of the material.
+*/
+function addLanguageOption(command) {
+	return command.option("--language <bcp47>", "Language for human-readable output (e.g. 'en', 'ja'). Default 'auto' follows the language of the spec/codebase.", DEFAULT_LANGUAGE);
+}
+//#endregion
 //#region src/cli/trace.ts
 const VALIDATION_MODES = ["lenient", "strict"];
-const traceCommand = new Command("trace").argument("<feature/spec>", "Spec id in '<feature>/<spec>' form (resolves to .ccqa/features/<feature>/test-cases/<spec>/)").description("Run agent-browser, verify assertions, and record structured actions").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").option("--validation-mode <mode>", "Post-trace validation behaviour: 'lenient' (default) tags failing actions with a warning but keeps them; 'strict' drops them from actions.json.", (raw) => {
+const traceCommand = addLanguageOption(new Command("trace").argument("<feature/spec>", "Spec id in '<feature>/<spec>' form (resolves to .ccqa/features/<feature>/test-cases/<spec>/)").description("Run agent-browser, verify assertions, and record structured actions").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").option("--validation-mode <mode>", "Post-trace validation behaviour: 'lenient' (default) tags failing actions with a warning but keeps them; 'strict' drops them from actions.json.", (raw) => {
 	if (VALIDATION_MODES.includes(raw)) return raw;
 	throw new Error(`--validation-mode must be one of ${VALIDATION_MODES.join(" | ")}`);
-}, "lenient").action(async (specPath, opts) => {
+}, "lenient")).action(async (specPath, opts) => {
 	const { featureName, specName } = parseSpecPath(specPath);
-	await runTrace(featureName, specName, opts.model, opts.validationMode ?? "lenient");
+	await runTrace(featureName, specName, opts.model, opts.validationMode ?? "lenient", opts.language);
 });
-async function runTrace(featureName, specName, model, validationMode = "lenient") {
+async function runTrace(featureName, specName, model, validationMode = "lenient", language) {
 	header("trace", `${featureName}/${specName}`);
 	try {
 		meta("agent-browser", assertAgentBrowserAvailable());
@@ -2228,7 +2389,7 @@ async function runTrace(featureName, specName, model, validationMode = "lenient"
 	});
 	const userPrompt = await loadTraceUserPrompt();
 	if (userPrompt !== null) meta("user-prompt", ".ccqa/prompts/trace.user.md");
-	const systemPrompt = userPrompt === null ? baseSystemPrompt : `${baseSystemPrompt}\n## Project-specific guidance\n\n${userPrompt}\n`;
+	const systemPrompt = (userPrompt === null ? baseSystemPrompt : `${baseSystemPrompt}\n## Project-specific guidance\n\n${userPrompt}\n`) + languageDirective(language);
 	const prompt = buildTracePrompt(spec.title);
 	info("Running agent-browser session...");
 	blank();
@@ -3217,18 +3378,47 @@ function previewDiff(before, after) {
 	return out.join("\n");
 }
 //#endregion
+//#region src/prompts/format.ts
+/**
+* Formatting helpers shared by the Claude prompt builders (diagnose, report).
+* Centralised so the prompts cannot drift apart on mechanics that must stay
+* consistent across commands.
+*/
+/** Prefix every line with its 1-based number, the form fix suggestions cite. */
+function numberLines(script) {
+	return script.split("\n").map((l, i) => `${i + 1}: ${l}`).join("\n");
+}
+/**
+* The "## Output language" prompt section. Empty for "auto" so the prompt
+* stays byte-identical to the no-flag baseline. `fields` names the
+* human-readable JSON fields to translate; `verbatimNames` names the
+* enum-like values that must never be translated.
+*/
+function outputLanguageBlock(outputLanguage, fields, verbatimNames) {
+	if (outputLanguage === "auto") return "";
+	return `## Output language
+Write all human-readable fields (${fields}) in **${outputLanguage}** (BCP-47 tag).
+Selectors, file paths, identifiers, ${verbatimNames}, JSON keys, and quoted strings stay verbatim regardless of language.
+`;
+}
+//#endregion
 //#region src/diagnose/prompt.ts
 function buildDiagnosePrompt(input) {
-	const { script, specYaml, actions, failureLog, pageSnapshot, outputLanguage = "en" } = input;
-	const numbered = script.split("\n").map((l, i) => `${i + 1}: ${l}`).join("\n");
+	const { script, specYaml, actions, failureLog, pageSnapshot, outputLanguage = "auto" } = input;
+	const numbered = numberLines(script);
+	const actionsSummary = actions.map((a, i) => {
+		const parts = [`${i + 1}. ${a.command}`];
+		if (a.assertType) parts.push(`assertType="${a.assertType}"`);
+		if (a.selector) parts.push(`selector="${a.selector}"`);
+		if (a.value) parts.push(`value="${a.value}"`);
+		if (a.observation) parts.push(`→ ${a.observation}`);
+		return parts.join(" ");
+	}).join("\n");
 	return `You are diagnosing a failing E2E test. The test was generated from a recorded trace of the original interaction. Compare the failing run against the original spec and recorded actions to determine WHY the test failed and what the right fix is.
-## Output language
-Write all human-readable fields (\`reasoning\`, \`reason\`) in **${outputLanguage}** (BCP-47 tag).
-Selectors, file paths, identifiers, code, type names (TIMING_ISSUE, etc.), JSON keys, and quoted strings stay verbatim regardless of language.
-## You have read-only filesystem tools
+${outputLanguageBlock(outputLanguage, "`reasoning`, `reason`", "code, type names (TIMING_ISSUE, etc.)")}## You have read-only filesystem tools
 You can call \`Grep\`, \`Glob\`, and \`Read\` against the current repository before producing the JSON.
@@ -3317,14 +3507,7 @@ Pick exactly ONE category. The output JSON must follow the shape for that catego
 ${specYaml}
 ## Recorded Actions (actions.json summary)
-${actions.map((a, i) => {
-		const parts = [`${i + 1}. ${a.command}`];
-		if (a.assertType) parts.push(`assertType="${a.assertType}"`);
-		if (a.selector) parts.push(`selector="${a.selector}"`);
-		if (a.value) parts.push(`value="${a.value}"`);
-		if (a.observation) parts.push(`→ ${a.observation}`);
-		return parts.join(" ");
-	}).join("\n")}
+${actionsSummary}
 ## Test Script (with line numbers)
 ${numbered}
@@ -3901,11 +4084,11 @@ function resolveMode(opts) {
 }
 //#endregion
 //#region src/cli/generate.ts
-const generateCommand = new Command("generate").argument("<feature/spec>", "Spec id in '<feature>/<spec>' form (resolves to .ccqa/features/<feature>/test-cases/<spec>/)").description("Generate agent-browser test script from recorded trace actions. test.spec.ts is regenerated from actions.json on every run; pass --force to overwrite manual edits.").option("--max-retries <n>", "Maximum number of auto-fix retries", "3").option("--auto", "Apply auto-fixes without confirmation regardless of confidence (CI use)").option("--no-interactive", "Never prompt; only auto-apply when confidence is high, otherwise give up").option("--force", "Overwrite an existing test.spec.ts without warning").option("--no-snapshot", "Don't pin AGENT_BROWSER_SESSION / capture page snapshots after a failure (debug toggle)").option("--language <bcp47>", "Language for diagnose reasoning / hint text (e.g. 'en', 'ja')", "en").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").action(async (specPath, opts) => {
+const generateCommand = addLanguageOption(new Command("generate").argument("<feature/spec>", "Spec id in '<feature>/<spec>' form (resolves to .ccqa/features/<feature>/test-cases/<spec>/)").description("Generate agent-browser test script from recorded trace actions. test.spec.ts is regenerated from actions.json on every run; pass --force to overwrite manual edits.").option("--max-retries <n>", "Maximum number of auto-fix retries", "3").option("--auto", "Apply auto-fixes without confirmation regardless of confidence (CI use)").option("--no-interactive", "Never prompt; only auto-apply when confidence is high, otherwise give up").option("--force", "Overwrite an existing test.spec.ts without warning").option("--no-snapshot", "Don't pin AGENT_BROWSER_SESSION / capture page snapshots after a failure (debug toggle)").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.")).action(async (specPath, opts) => {
 	const { featureName, specName } = parseSpecPath(specPath);
 	const mode = resolveMode(opts);
 	const useSnapshot = opts.snapshot !== false;
-	await runGenerate(featureName, specName, parseInt(opts.maxRetries, 10), mode, opts.force ?? false, useSnapshot, opts.language ?? "en", opts.model);
+	await runGenerate(featureName, specName, parseInt(opts.maxRetries, 10), mode, opts.force ?? false, useSnapshot, opts.language ?? "auto", opts.model);
 });
 async function runGenerate(featureName, specName, maxRetries, mode, force, useSnapshot, outputLanguage, model) {
 	header("generate", `${featureName}/${specName}`);
@@ -4395,7 +4578,7 @@ const DEFAULT_CONCURRENCY$1 = 3;
 * `cli/run` calls this with just the failing specs after vitest.
 */
 async function analyzeDrift(input) {
-	const { targets, cwd, blocks, concurrency = DEFAULT_CONCURRENCY$1, model, onSpecStart } = input;
+	const { targets, cwd, blocks, concurrency = DEFAULT_CONCURRENCY$1, model, language, onSpecStart } = input;
 	const results = new Array(targets.length);
 	let cursor = 0;
 	const worker = async () => {
@@ -4407,7 +4590,8 @@ async function analyzeDrift(input) {
 			results[idx] = await checkSpec(target, {
 				cwd,
 				blocks,
-				model
+				model,
+				language
 			});
 		}
 	};
@@ -4426,7 +4610,7 @@ async function checkSpec(target, opts) {
 	};
 	const { result, isError } = await invokeClaudeStreaming({
 		prompt: buildDriftUserPrompt(existing),
-		systemPrompt: buildDriftSystemPrompt(opts.blocks),
+		systemPrompt: buildDriftSystemPrompt(opts.blocks) + languageDirective(opts.language),
 		allowedTools: [
 			"Read",
 			"Grep",
@@ -4467,165 +4651,1187 @@ async function checkSpec(target, opts) {
 	};
 }
 //#endregion
-//#region src/drift/format.ts
+//#region src/drift/affected.ts
+const execFileP = promisify(execFile);
 /**
-* Render drift results as a string. The CLI commands and the `run` failure
-* hook are the only callers; both want the formatted output returned so
-* they can prefix / interleave / pipe it as needed.
+* Resolve the base ref to diff against for `ccqa drift --changed`.
+* Precedence: explicit override > GITHUB_BASE_REF > origin/main.
 */
-function renderDrift(results, format, cwd) {
-	if (format === "json") return renderJson(results);
-	if (format === "github") return renderGithub(results, cwd);
-	return renderText(results);
+function resolveBaseRef(explicit) {
+	if (explicit && explicit.length > 0) return explicit;
+	const ghBase = process.env["GITHUB_BASE_REF"];
+	if (ghBase && ghBase.length > 0) return ghBase.startsWith("origin/") ? ghBase : `origin/${ghBase}`;
+	return "origin/main";
 }
-const HEAVY_RULE = "═".repeat(72);
-function renderText(results) {
+/**
+* Run `git diff --name-status base...HEAD` from `cwd` and return one entry per
+* changed file. Renames are reported under their NEW path with status
+* "renamed" — the OLD path is dropped because the spec mapping is against the
+* post-rename layout.
+*
+* Paths are re-rooted to be relative to `cwd`, not the git repo root. In a
+* monorepo where `cwd` is a sub-package (e.g. `apps/foo`), git emits paths
+* relative to the repo root, but specs declare relatedPaths relative to
+* their own package. Changes outside `cwd` are dropped so an unrelated PR
+* can never accidentally scope a sub-package's specs in.
+*/
+async function getChangedFiles(base, cwd) {
+	const [{ stdout: rootOut }, { stdout: diffOut }] = await Promise.all([execFileP("git", ["rev-parse", "--show-toplevel"], { cwd }), execFileP("git", [
+		"diff",
+		"--name-status",
+		"-M",
+		`${base}...HEAD`
+	], {
+		cwd,
+		maxBuffer: 32 * 1024 * 1024
+	})]);
+	return rerootChangedFiles(parseGitDiffOutput(diffOut), rootOut.trim(), cwd);
+}
+/**
+* Convert paths in `entries` from git-repo-root relative to `cwd` relative,
+* dropping anything outside `cwd`. Exported for unit tests.
+*/
+function rerootChangedFiles(entries, repoRoot, cwd) {
+	const prefix = relative(repoRoot, cwd);
+	if (!prefix) return entries;
 	const out = [];
-	for (const r of results) {
-		out.push("");
-		const heading = `══ ${r.target.featureName}/${r.target.specName} `;
-		const tail = "═".repeat(Math.max(3, 72 - heading.length));
-		out.push(`${heading}${tail}`);
-		if (r.error) {
-			out.push(`  ERROR  ${r.error}`);
+	for (const e of entries) {
+		const rel = relative(prefix, e.path);
+		if (rel.startsWith("..") || rel === "") continue;
+		out.push({
+			...e,
+			path: rel
+		});
+	}
+	return out;
+}
+function parseGitDiffOutput(stdout) {
+	const out = [];
+	for (const line of stdout.split("\n")) {
+		if (!line.trim()) continue;
+		const parts = line.split("	");
+		const code = parts[0];
+		if (!code) continue;
+		if (code.startsWith("R")) {
+			const newPath = parts[2];
+			if (newPath) out.push({
+				path: newPath,
+				status: "renamed"
+			});
 			continue;
 		}
-		const errors = r.issues.filter((i) => i.severity === "ERROR");
-		const warnings = r.issues.filter((i) => i.severity === "WARN");
-		const passed = r.issues.filter((i) => i.severity === "OK");
-		if (errors.length === 0 && warnings.length === 0) {
-			const label = passed.length === 1 ? "check" : "checks";
-			const detail = passed.length > 0 ? `all ${passed.length} ${label} passed` : "no issues";
-			out.push(`  ✓  ${detail}`);
+		if (code.startsWith("C")) {
+			const newPath = parts[2];
+			if (newPath) out.push({
+				path: newPath,
+				status: "added"
+			});
 			continue;
 		}
-		for (const issue of errors) appendFinding(out, "ERROR", issue);
-		for (const issue of warnings) appendFinding(out, "WARN", issue);
-		if (passed.length > 0) {
-			const names = passed.map((i) => DRAFT_CATEGORY_LABEL[i.category]).join(", ");
-			out.push("");
-			out.push(`  ✓  passed (${passed.length}): ${names}`);
+		const path = parts[1];
+		if (!path) continue;
+		switch (code[0]) {
+			case "A":
+				out.push({
+					path,
+					status: "added"
+				});
+				break;
+			case "M":
+			case "T":
+				out.push({
+					path,
+					status: "modified"
+				});
+				break;
+			case "D":
+				out.push({
+					path,
+					status: "deleted"
+				});
+				break;
+			default: out.push({
+				path,
+				status: "modified"
+			});
 		}
 	}
-	out.push("");
-	out.push(HEAVY_RULE);
-	const totals = summarize(results);
-	out.push(`  specs    ${results.length} (${totals.errored} errored)`);
-	out.push(`  findings ${totals.error} error, ${totals.warn} warn, ${totals.ok} ok`);
-	out.push("");
-	return out.join("\n");
+	return out;
 }
-function appendFinding(out, level, issue) {
-	const stepPart = issue.stepId ? ` ${issue.stepId}` : "";
-	out.push("");
-	out.push(`  ${level}  ${DRAFT_CATEGORY_LABEL[issue.category]}${stepPart}`);
-	out.push(`    ${issue.message}`);
-	if (issue.detail) out.push(`    └ ${issue.detail.replace(/\n/g, "\n      ")}`);
+function stripLeadingDotSlash(s) {
+	return s.startsWith("./") ? s.slice(2) : s;
 }
-function renderJson(results) {
-	const payload = { specs: results.map((r) => ({
-		feature: r.target.featureName,
-		spec: r.target.specName,
-		ok: r.ok,
-		...r.error ? { error: r.error } : {},
-		issues: r.issues.map((i) => ({
-			severity: i.severity,
-			category: i.category,
-			stepId: i.stepId,
-			message: i.message,
-			...i.detail ? { detail: i.detail } : {}
-		}))
-	})) };
-	return `${JSON.stringify(payload, null, 2)}\n`;
+const REGEX_CACHE = /* @__PURE__ */ new Map();
+/** Compiles `pattern` to a RegExp, memoized so repeated `--changed` matches don't re-build. */
+function compileGlob(pattern) {
+	const cached = REGEX_CACHE.get(pattern);
+	if (cached) return cached;
+	const compiled = globToRegExp(stripLeadingDotSlash(pattern));
+	REGEX_CACHE.set(pattern, compiled);
+	return compiled;
 }
-function renderGithub(results, cwd) {
-	const repoRoot = process.env["GITHUB_WORKSPACE"] ?? process.cwd();
-	const lines = [];
-	for (const r of results) {
-		const file = githubRelPath(cwd, repoRoot, r.target.featureName, r.target.specName);
-		if (r.error) {
-			lines.push(`::error file=${file}::${escapeGhMessage(r.error)}`);
-			continue;
-		}
-		for (const issue of r.issues) {
-			if (issue.severity === "OK") continue;
-			const level = issue.severity === "ERROR" ? "error" : "warning";
-			const title = `${r.target.featureName}/${r.target.specName} — ${issue.category}${issue.stepId ? ` (${issue.stepId})` : ""}`;
-			const body = issue.detail ? `${issue.message}\n${issue.detail}` : issue.message;
-			lines.push(`::${level} file=${file},title=${escapeGhProp(title)}::${escapeGhMessage(body)}`);
+function globToRegExp(pattern) {
+	let re = "^";
+	let i = 0;
+	while (i < pattern.length) {
+		const ch = pattern[i];
+		if (ch === "?") {
+			re += "[^/]";
+			i++;
+			continue;
 		}
+		if (ch !== "*") {
+			re += /[.+^${}()|[\]\\]/.test(ch) ? "\\" + ch : ch;
+			i++;
+			continue;
+		}
+		if (pattern[i + 1] !== "*") {
+			re += "[^/]*";
+			i++;
+			continue;
+		}
+		const hasLeadingSlash = re.endsWith("/");
+		const hasTrailingSlash = pattern[i + 2] === "/";
+		if (hasLeadingSlash) re = re.slice(0, -1);
+		if (hasLeadingSlash || hasTrailingSlash) re += "(?:/?.*)?";
+		else re += ".*";
+		i += hasTrailingSlash ? 3 : 2;
 	}
-	return lines.length === 0 ? "" : `${lines.join("\n")}\n`;
-}
-function githubRelPath(cwd, repoRoot, featureName, specName) {
-	const abs = resolve(cwd, ".ccqa", "features", featureName, "test-cases", specName, "spec.yaml");
-	const rel = relative(repoRoot, abs);
-	return rel.startsWith("..") ? abs : rel;
-}
-function escapeGhMessage(s) {
-	return s.replace(/%/g, "%25").replace(/\r/g, "%0D").replace(/\n/g, "%0A");
-}
-function escapeGhProp(s) {
-	return s.replace(/%/g, "%25").replace(/\r/g, "%0D").replace(/\n/g, "%0A").replace(/,/g, "%2C").replace(/:/g, "%3A");
-}
-function summarize(results) {
-	let error = 0;
-	let warn = 0;
-	let ok = 0;
-	let errored = 0;
-	for (const r of results) {
-		if (r.error) errored++;
-		for (const issue of r.issues) if (issue.severity === "ERROR") error++;
-		else if (issue.severity === "WARN") warn++;
-		else ok++;
-	}
-	return {
-		error,
-		warn,
-		ok,
-		errored
-	};
+	return new RegExp(re + "$");
 }
-//#endregion
-//#region src/drift/exit-code.ts
 /**
-* Map drift results to an exit code. Spec-level errors (Claude call failed)
-* always fail; otherwise ERROR severity always fails, WARN fails only when
-* the threshold is `warn`.
+* Returns true if `changedPath` is covered by any of `relatedPaths`. An empty
+* `relatedPaths` returns false — callers handle the "unscoped spec" case
+* separately (treat the spec as always-affected) before calling this.
 */
-function determineExitCode(results, threshold) {
-	for (const r of results) {
-		if (r.error) return 1;
-		for (const issue of r.issues) {
-			if (issue.severity === "ERROR") return 1;
-			if (threshold === "warn" && issue.severity === "WARN") return 1;
-		}
-	}
-	return 0;
+function isPathAffectedBy(changedPath, relatedPaths) {
+	const stripped = stripLeadingDotSlash(changedPath);
+	for (const pattern of relatedPaths) if (compileGlob(pattern).test(stripped)) return true;
+	return false;
 }
 //#endregion
 //#region src/drift/auth.ts
 /**
 * Probe whether the host has any credential the Anthropic SDK can pick up:
 *   1. ANTHROPIC_API_KEY env var (CI / scripted use)
-*   2. ~/.claude/.credentials.json (local Claude Code login)
+*   2. ~/.claude/.credentials.json (Claude Code login, file-based platforms)
+*   3. macOS Keychain item "Claude Code-credentials" (Claude Code login on
+*      darwin stores the OAuth credentials in the Keychain, not on disk)
 *
-* `run --drift` is opt-in, so the caller will only consult this after the
-* user has asked for drift. We never throw — auth absence is a normal flow
-* that surfaces as "drift analysis skipped".
+* Claude-driven hooks are opt-in, so the caller only consults this after the
+* user has asked for analysis. We never throw — auth absence is a normal flow
+* that surfaces as "analysis skipped".
 */
 function driftAuthAvailable() {
 	const key = process.env["ANTHROPIC_API_KEY"];
 	if (typeof key === "string" && key.length > 0) return { ok: true };
 	if (existsSync(join(homedir(), ".claude", ".credentials.json"))) return { ok: true };
+	if (process.platform === "darwin" && keychainHasClaudeCredentials()) return { ok: true };
 	return {
 		ok: false,
 		reason: "no ANTHROPIC_API_KEY / claude login"
 	};
 }
+/**
+* `security find-generic-password` without `-w` only checks the item's
+* existence (exit 0) — it never reads the secret, so no Keychain unlock
+* prompt is triggered. Resolved via PATH so tests can stub the binary.
+*/
+function keychainHasClaudeCredentials() {
+	try {
+		return spawnSync("security", [
+			"find-generic-password",
+			"-s",
+			"Claude Code-credentials"
+		], {
+			stdio: "ignore",
+			timeout: 3e3
+		}).status === 0;
+	} catch {
+		return false;
+	}
+}
+//#endregion
+//#region src/report/prompt.ts
+function buildFailureAnalysisPrompt(input) {
+	const { script, specYaml, failureLog, diffPatch, changedFiles, baseRef, driftIssues, outputLanguage = "auto" } = input;
+	const numbered = numberLines(script);
+	return `You are analyzing a failing E2E regression test right after a source change landed. Your job is a root-cause CALL, not a fix: decide which of three categories explains the failure, using the source diff as your primary context.
+${outputLanguageBlock(outputLanguage, "`reasoning`, `detail`", "label names (TEST_DRIFT, etc.)")}## The three categories
+The question that separates them: **is the behavior the spec describes still what the product intends?**
+1. TEST_DRIFT — what the spec verifies is unchanged; only the test code drifted from the source. Typical: a selector/aria-label/placeholder rename, a timing change, an over-tight assertion. The diff shows a change that is invisible to the user's intent but visible to the test.
+2. SPEC_CHANGE — the thing being verified itself changed: the UI flow, the layout, the feature's intended behavior. The diff deliberately changes what the spec asserts. You MUST cite the diff hunk (file + what changed) as evidence for this label.
+3. PRODUCT_BUG — neither of the above: the failure is not explained by the diff nor by test staleness. The product regressed.
+If the evidence is too weak to choose, answer UNKNOWN — a wrong confident call is worse than an honest UNKNOWN, because humans grade these predictions to measure accuracy.
+## You have read-only filesystem tools
+You can call \`Grep\`, \`Glob\`, and \`Read\` against the current repository (post-change state) before producing the JSON. Use them to:
+- confirm a suspected selector rename (grep for \`aria-label=\`, \`placeholder=\`, \`data-testid\`, i18n strings),
+- read the changed files in full when the truncated patch is not enough,
+- check whether the element/flow the spec describes still exists in the source.
+You have **up to 12 tool turns**. Do NOT write, edit, run shell commands, or hit the network.
+## Decision guidance
+- Diff touches only attributes/identifiers the test selects on (labels, testids, class names, timing) while the user-visible flow is intact → TEST_DRIFT.
+- Diff intentionally removes/reworks the UI or flow that a spec step verifies (component deleted, page restructured, copy redefined, feature flag flipped) → SPEC_CHANGE.
+- Diff UNINTENTIONALLY breaks behavior the spec still intends — e.g. a refactor that drops a side effect, an inverted condition, a regression hiding inside a cleanup commit — → PRODUCT_BUG, citing the diff hunk as evidence. A product bug is often introduced BY the diff; what separates it from SPEC_CHANGE is intent: does the change read as a deliberate redesign of what the spec verifies, or as collateral damage?
+- Diff is unrelated to the failing step (or there is no relevant diff) and the test was passing before → lean PRODUCT_BUG; first rule out timing/data flakiness and infrastructure errors (daemon not running, network down, missing credentials) — those read as UNKNOWN with low confidence, not PRODUCT_BUG.
+- The drift audit findings (when present) flag spec↔code mismatches; an ERROR there usually supports TEST_DRIFT or SPEC_CHANGE over PRODUCT_BUG.
+## Sub-diagnosis vocabulary
+Alongside the label, report the closest fine-grained mechanic:
+- SELECTOR_DRIFT, TIMING_ISSUE, OVER_ASSERTION — usually under TEST_DRIFT
+- DATA_MISSING — missing test data/state; usually UNKNOWN or PRODUCT_BUG depending on cause
+- NONE — when nothing fits (typical for SPEC_CHANGE and PRODUCT_BUG)
+## Output
+Your **final** assistant message must start with \`{\` and end with \`}\` — a single JSON object, nothing before or after. No prose preamble, no markdown fences, no tool calls in the same turn.
+{
+  "label": "TEST_DRIFT" | "SPEC_CHANGE" | "PRODUCT_BUG" | "UNKNOWN",
+  "confidence": <0.0-1.0>,
+  "subDiagnosis": "SELECTOR_DRIFT" | "TIMING_ISSUE" | "OVER_ASSERTION" | "DATA_MISSING" | "NONE",
+  "evidence": [
+    { "file": "<file:line or diff hunk reference, omit if log-only>", "detail": "<what this shows>" }
+  ],
+  "reasoning": "<why this label, citing the evidence>"
+}
+## Confidence guidance
+- 0.9-1.0: the diff (or a file you read) directly shows the cause
+- 0.7-0.9: strong indirect evidence
+- 0.4-0.7: plausible but another category could explain it
+- < 0.4: answer UNKNOWN instead of guessing
+Evidence rules: TEST_DRIFT and SPEC_CHANGE require at least one concrete \`file\` reference (diff hunk or file:line you actually read). PRODUCT_BUG should explain why the diff does NOT account for the failure.
+## Test Spec (spec.yaml)
+${specYaml}
+## Test Script (with line numbers)
+${numbered}
+${diffPatch ? `## Source changes since ${baseRef ?? "base"} (git diff, may be truncated)
+### Changed files (name-status)
+${changedFiles ?? "(unavailable)"}
+### Patch
+\`\`\`diff
+${diffPatch}
+\`\`\`
+` : `## Source changes
+No diff context is available (the base ref could not be resolved, or there are no changes). Classify from the failure log, the spec, and what you can read in the repository — and be correspondingly more conservative: prefer UNKNOWN over a confident SPEC_CHANGE/PRODUCT_BUG call without diff evidence.
+`}
+${driftIssues && driftIssues.length > 0 ? `## Spec↔code drift audit findings
+A separate read-only audit compared the spec against the current source. Treat these as hints, not verdicts:
+${driftIssues.map((i) => `- [${i.severity}] (${DRAFT_CATEGORY_LABEL[i.category]}${i.stepId ? `, step ${i.stepId}` : ""}) ${i.message}${i.detail ? ` — ${i.detail}` : ""}`).join("\n")}
+` : ""}## Failure Log
+${failureLog.slice(0, 8e3)}`;
+}
+//#endregion
+//#region src/diagnose/types.ts
+/**
+* The concrete (fixable) diagnosis tags as a value, for consumers that need
+* to enumerate them (e.g. the run report's subDiagnosis vocabulary). The
+* `satisfies` clause makes renaming a union member without updating this
+* list a compile error.
+*/
+const FIXABLE_DIAGNOSIS_TYPES = [
+	"SELECTOR_DRIFT",
+	"TIMING_ISSUE",
+	"OVER_ASSERTION",
+	"DATA_MISSING"
+];
+//#endregion
+//#region src/report/schema.ts
+/**
+* The three-way root-cause call for a failing spec, framed as drift analysis:
+*  - TEST_DRIFT:  what the spec verifies is unchanged; only the test code
+*                 drifted from the source (selector rename, timing, ...).
+*                 Future iterations may auto-fix these.
+*  - SPEC_CHANGE: the thing being verified itself changed (UI redesign,
+*                 spec change). Never auto-fix — a human must re-draft.
+*  - PRODUCT_BUG: neither of the above explains the failure — treat it as
+*                 a product regression.
+*
+* The stakeholder ask behind this module is measurement-first: the call is
+* known to be hard, so every prediction is embedded in the HTML report where
+* a human records the ground truth and the report computes the confusion
+* matrix client-side. Accuracy may start low; it must be *visible*.
+*/
+const FAILURE_LABELS = [
+	"TEST_DRIFT",
+	"SPEC_CHANGE",
+	"PRODUCT_BUG"
+];
+const FailureLabelSchema = z.enum(FAILURE_LABELS);
+/** What the model may answer: the three labels, or UNKNOWN when evidence is weak. */
+const PREDICTED_LABELS = [...FAILURE_LABELS, "UNKNOWN"];
+const PredictedLabelSchema = z.enum(PREDICTED_LABELS);
+const SUB_DIAGNOSES = [...FIXABLE_DIAGNOSIS_TYPES, "NONE"];
+const FailureEvidenceSchema = z.object({
+	file: z.string().optional(),
+	detail: z.string()
+});
+/**
+* LLM output shape. Deliberately NOT .strict(): the model occasionally adds
+* keys, and rejecting the whole analysis over an extra field would collapse
+* a usable prediction into UNKNOWN. Zod's default strips unknown keys.
+*/
+const FailureAnalysisSchema = z.object({
+	label: PredictedLabelSchema,
+	confidence: z.number().min(0).max(1),
+	subDiagnosis: z.enum(SUB_DIAGNOSES).optional(),
+	evidence: z.array(FailureEvidenceSchema),
+	reasoning: z.string()
+});
+const ReportAssertionSchema = z.object({
+	name: z.string(),
+	status: z.enum([
+		"passed",
+		"failed",
+		"skipped"
+	]),
+	durationMs: z.number().nullable()
+});
+const ReportSpecResultSchema = z.object({
+	feature: z.string(),
+	spec: z.string(),
+	status: z.enum(["passed", "failed"]),
+	testCounts: z.object({
+		total: z.number(),
+		passed: z.number(),
+		failed: z.number()
+	}).nullable(),
+	durationMs: z.number().nullable(),
+	assertions: z.array(ReportAssertionSchema).nullable(),
+	analysis: FailureAnalysisSchema.nullable(),
+	analysisSkipped: z.string().nullable(),
+	driftIssues: z.array(DraftIssueSchema).nullable(),
+	failureLogExcerpt: z.string().nullable(),
+	diffExcerpt: z.string().nullable(),
+	specYaml: z.string().nullable()
+});
+z.object({
+	schemaVersion: z.literal(1),
+	createdAt: z.string(),
+	runId: z.string().nullable(),
+	git: z.object({
+		head: z.string().nullable(),
+		base: z.string().nullable()
+	}),
+	model: z.string().nullable(),
+	promptVersion: z.string(),
+	results: z.array(ReportSpecResultSchema)
+});
+/** Shape of the "export labels" download produced by the report's client-side JS. */
+const LabelEntrySchema = z.object({
+	feature: z.string(),
+	spec: z.string(),
+	predicted: PredictedLabelSchema,
+	label: FailureLabelSchema,
+	note: z.string().optional()
+});
+z.object({
+	schemaVersion: z.literal(1),
+	runId: z.string().nullable(),
+	promptVersion: z.string(),
+	exportedAt: z.string(),
+	labels: z.array(LabelEntrySchema)
+});
+//#endregion
+//#region src/report/analyze.ts
+/**
+* Classify one failing spec into TEST_DRIFT / SPEC_CHANGE / PRODUCT_BUG /
+* UNKNOWN. Same resilience contract as diagnose(): read-only tools, JSON-only
+* final message, and any parse failure degrades to UNKNOWN with confidence 0
+* rather than throwing — the report must always render.
+*/
+async function analyzeFailure(input, options = {}) {
+	const { result: raw, isError } = await invokeClaudeStreaming({
+		prompt: buildFailureAnalysisPrompt(input),
+		allowedTools: [
+			"Read",
+			"Grep",
+			"Glob"
+		],
+		silenceBashLog: true,
+		maxTurns: 12,
+		...options.model ? { model: options.model } : {},
+		...options.cwd ? { cwd: options.cwd } : {}
+	}, () => {});
+	if (isError || !raw) return {
+		analysis: unknownAnalysis(isError ? "Claude returned an error result" : "Claude returned no output"),
+		raw: raw ?? "",
+		sdkError: isError
+	};
+	for (const candidate of extractJsonCandidates(raw)) {
+		let parsed;
+		try {
+			parsed = JSON.parse(candidate);
+		} catch {
+			continue;
+		}
+		const normalised = normaliseFailureAnalysis(parsed);
+		if (normalised) return {
+			analysis: normalised,
+			raw,
+			sdkError: false
+		};
+	}
+	return {
+		analysis: unknownAnalysis(`analysis returned no parseable JSON: ${truncate$2(raw, 500)}`),
+		raw,
+		sdkError: false
+	};
+}
+function unknownAnalysis(reasoning) {
+	return {
+		label: "UNKNOWN",
+		confidence: 0,
+		subDiagnosis: "NONE",
+		evidence: [],
+		reasoning
+	};
+}
+const LABELS = new Set(PREDICTED_LABELS);
+const SUB_SET = new Set(SUB_DIAGNOSES);
+/**
+* Manual, lenient normalisation (mirrors diagnose's normaliseResult): a
+* missing/extra field should degrade gracefully, not reject the whole
+* prediction — only an unrecognisable label makes the candidate unusable.
+*/
+function normaliseFailureAnalysis(parsed) {
+	if (!isObject(parsed)) return null;
+	const label = parsed["label"];
+	if (typeof label !== "string" || !LABELS.has(label)) return null;
+	const confidence = typeof parsed["confidence"] === "number" ? clamp(parsed["confidence"], 0, 1) : 0;
+	const reasoning = typeof parsed["reasoning"] === "string" ? parsed["reasoning"] : "";
+	const rawSub = parsed["subDiagnosis"];
+	const subDiagnosis = typeof rawSub === "string" && SUB_SET.has(rawSub) ? rawSub : "NONE";
+	const evidence = [];
+	if (Array.isArray(parsed["evidence"])) for (const item of parsed["evidence"]) {
+		if (!isObject(item)) continue;
+		const detail = typeof item["detail"] === "string" ? item["detail"] : null;
+		if (detail === null) continue;
+		const file = typeof item["file"] === "string" ? item["file"] : void 0;
+		evidence.push(file !== void 0 ? {
+			file,
+			detail
+		} : { detail });
+	}
+	return {
+		label,
+		confidence,
+		subDiagnosis,
+		evidence,
+		reasoning
+	};
+}
+/**
+* Capture the PR diff used as context for failure analysis. `--relative`
+* re-roots paths to `cwd` and drops changes outside it, matching how
+* relatedPaths are declared in a monorepo sub-package.
+*
+* Errors (unknown base ref, not a git repo, ...) are returned, not thrown:
+* the report is still worth generating without diff context.
+*/
+async function capturePrDiff(base, cwd) {
+	try {
+		const [{ stdout: head }, { stdout: patch }, { stdout: nameStatus }] = await Promise.all([
+			execFileP("git", [
+				"rev-parse",
+				"--short",
+				"HEAD"
+			], { cwd }),
+			execFileP("git", [
+				"diff",
+				"-M",
+				"--relative",
+				`${base}...HEAD`
+			], {
+				cwd,
+				maxBuffer: 64 * 1024 * 1024
+			}),
+			execFileP("git", [
+				"diff",
+				"--name-status",
+				"-M",
+				"--relative",
+				`${base}...HEAD`
+			], {
+				cwd,
+				maxBuffer: 32 * 1024 * 1024
+			})
+		]);
+		return {
+			ok: true,
+			diff: {
+				patch,
+				nameStatus: nameStatus.trim(),
+				head: head.trim()
+			}
+		};
+	} catch (e) {
+		return {
+			ok: false,
+			error: e.message.split("\n")[0] ?? "git diff failed"
+		};
+	}
+}
+/**
+* Split a unified diff into per-file sections on `diff --git` boundaries.
+* The path is taken from the `b/` side so renames/edits key on the
+* post-change layout — the same side relatedPaths are written against.
+*/
+const DIFF_HEADER = /^diff --git a\/(.+) b\/(.+)$/;
+function splitPatchByFile(patch) {
+	const sections = [];
+	const lines = patch.split("\n");
+	let current = null;
+	const flush = () => {
+		if (current) sections.push({
+			path: current.path,
+			body: current.lines.join("\n")
+		});
+		current = null;
+	};
+	for (const line of lines) {
+		const m = DIFF_HEADER.exec(line);
+		if (m) {
+			flush();
+			current = {
+				path: m[2],
+				lines: [line]
+			};
+		} else if (current) current.lines.push(line);
+	}
+	flush();
+	return sections;
+}
+/**
+* Scope a full patch down to the files a spec depends on, then truncate so
+* the analysis prompt stays bounded. `relatedPaths` null/empty means the
+* spec is unscoped — keep the whole patch (still truncated). Callers scoping
+* the same patch for many specs can pass pre-split sections instead.
+*/
+function scopePatchForSpec(patch, relatedPaths, caps = {}) {
+	const perFile = caps.perFile ?? 8192;
+	const total = caps.total ?? 49152;
+	let sections = typeof patch === "string" ? splitPatchByFile(patch) : patch;
+	if (relatedPaths && relatedPaths.length > 0) {
+		const scoped = sections.filter((s) => isPathAffectedBy(s.path, relatedPaths));
+		if (scoped.length > 0) sections = scoped;
+	}
+	const parts = [];
+	let used = 0;
+	let droppedFiles = 0;
+	for (const s of sections) {
+		if (used >= total) {
+			droppedFiles++;
+			continue;
+		}
+		let body = s.body;
+		if (body.length > perFile) body = `${body.slice(0, perFile)}\n[truncated: ${body.length - perFile} more chars of ${s.path}]`;
+		if (used + body.length > total) body = `${body.slice(0, total - used)}\n[truncated: total patch cap reached]`;
+		parts.push(body);
+		used += body.length;
+	}
+	if (droppedFiles > 0) parts.push(`[truncated: ${droppedFiles} more changed file(s) omitted]`);
+	return parts.join("\n");
+}
+//#endregion
+//#region src/report/render.ts
+/**
+* Render the run report as ONE self-contained HTML file (inline CSS/JS, no
+* network). It is meant to be uploaded as a CI artifact like Playwright's
+* HTML report and opened locally; the layout deliberately mirrors that
+* report's conventions — header stats that double as filters, a search box,
+* collapsible per-spec cards with a step list and durations, automatic
+* light/dark theme.
+*
+* The measurement loop lives client-side: each analyzed failure gets
+* ground-truth radio buttons, and a vanilla-JS block recomputes accuracy /
+* confusion matrix / per-class precision-recall on every change. Labels
+* persist in localStorage and can be exported/imported as JSON
+* (LabelsExportSchema) so the grading work survives the browser session.
+*/
+function renderRunReport(data) {
+	const failed = data.results.filter((r) => r.status === "failed");
+	const analyzed = failed.filter((r) => r.analysis !== null);
+	const passedCount = data.results.length - failed.length;
+	const totalDuration = data.results.reduce((sum, r) => sum + (r.durationMs ?? 0), 0);
+	const dataJson = JSON.stringify(data).replace(/</g, "\\u003c");
+	return `<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>ccqa run report</title>
+<style>${CSS}</style>
+</head>
+<body>
+<header>
+  <div class="header-inner">
+    <div class="header-top">
+      <h1>ccqa run report</h1>
+      <div class="meta">
+        <span title="generated at">${esc(formatDate(data.createdAt))}</span>
+        ${totalDuration > 0 ? `<span>${formatDuration$1(totalDuration)}</span>` : ""}
+        ${data.runId ? `<span>CI run ${esc(data.runId)}</span>` : ""}
+        ${data.git.head ? `<span><code>${esc(data.git.head)}</code>${data.git.base ? ` vs <code>${esc(data.git.base)}</code>` : ""}</span>` : ""}
+        <span class="dim">prompt v${esc(data.promptVersion)}</span>
+      </div>
+    </div>
+    <div class="toolbar">
+      <div class="chips" id="filter-chips">
+        <button type="button" class="chip active" data-filter="all">All <span class="count">${data.results.length}</span></button>
+        <button type="button" class="chip chip-pass" data-filter="passed">${passedCount} passed</button>
+        <button type="button" class="chip chip-fail" data-filter="failed">${failed.length} failed</button>
+      </div>
+      <input type="search" id="search" placeholder="Filter by name…" autocomplete="off">
+    </div>
+  </div>
+</header>
+<div class="page">
+${analyzed.length > 0 ? metricsPanel() : ""}
+<main id="spec-list">
+${data.results.map((r, i) => renderResult(r, i)).join("\n")}
+</main>
+<p class="empty-note" id="no-match" hidden>No specs match the current filter.</p>
+</div>
+<script type="application/json" id="ccqa-report-data">${dataJson}<\/script>
+<script>${CLIENT_JS}<\/script>
+</body>
+</html>
+`;
+}
+function metricsPanel() {
+	return `<section class="panel" id="measure-panel">
+  <div class="panel-head">
+    <h2>Prediction accuracy</h2>
+    <div class="measure-actions">
+      <button type="button" id="export-labels">Export labels (JSON)</button>
+      <label class="import-label">Import labels<input type="file" id="import-labels" accept="application/json"></label>
+    </div>
+  </div>
+  <p class="hint">Grade each failed case below with its true cause; the matrix updates live. Labels are saved in this browser (localStorage) — export them to keep or merge.</p>
+  <div id="metrics"></div>
+</section>`;
+}
+function renderResult(r, index) {
+	const id = `${r.feature}/${r.spec}`;
+	const duration = r.durationMs != null && r.durationMs > 0 ? `<span class="duration">${formatDuration$1(r.durationMs)}</span>` : "";
+	const counts = r.testCounts ? `<span class="counts">${r.testCounts.passed}/${r.testCounts.total}</span>` : "";
+	const predictionChip = r.status === "failed" && r.analysis ? `<span class="badge ${r.analysis.label}">${r.analysis.label}</span>` : "";
+	return `<details class="spec ${r.status}" data-status="${r.status}" data-case-id="${esc(id)}"${r.status === "failed" ? " open" : ""}>
+  <summary>
+    ${statusIcon(r.status)}
+    <span class="spec-name">${esc(id)}</span>
+    ${predictionChip}
+    <span class="spacer"></span>
+    ${counts}
+    ${duration}
+  </summary>
+  <div class="spec-body">
+    ${renderAssertions(r)}
+    ${r.status === "failed" ? r.analysis ? renderAnalysis(r, index) : renderSkipped(r) : ""}
+    ${renderDriftIssues(r)}
+    ${collapsible("Failure log", r.failureLogExcerpt)}
+    ${collapsible("Source diff (scoped)", r.diffExcerpt, "diff")}
+    ${collapsible("spec.yaml", r.specYaml)}
+  </div>
+</details>`;
+}
+function statusIcon(status) {
+	if (status === "passed") return `<span class="status-icon pass" aria-label="passed">✓</span>`;
+	if (status === "failed") return `<span class="status-icon fail" aria-label="failed">✕</span>`;
+	return `<span class="status-icon skip" aria-label="skipped">◌</span>`;
+}
+function renderAssertions(r) {
+	if (!r.assertions || r.assertions.length === 0) return "";
+	return `<ul class="steps">${r.assertions.map((a) => {
+		const dur = a.durationMs != null ? `<span class="duration">${formatDuration$1(a.durationMs)}</span>` : "";
+		return `<li>${statusIcon(a.status)}<span class="step-name">${esc(a.name)}</span><span class="spacer"></span>${dur}</li>`;
+	}).join("")}</ul>`;
+}
+function renderAnalysis(r, index) {
+	const a = r.analysis;
+	const pct = Math.round(a.confidence * 100);
+	const evidence = a.evidence.length > 0 ? `<ul class="evidence">${a.evidence.map((e) => `<li>${e.file ? `<code>${esc(e.file)}</code> — ` : ""}${esc(e.detail)}</li>`).join("")}</ul>` : "";
+	return `<div class="analysis">
+  <div class="prediction">
+    <span class="badge ${a.label}">${a.label}</span>
+    <span class="confidence" title="confidence"><span class="confidence-bar"><span style="width:${pct}%"></span></span>${pct}%</span>
+    ${a.subDiagnosis && a.subDiagnosis !== "NONE" ? `<span class="sub">${esc(a.subDiagnosis)}</span>` : ""}
+  </div>
+  <p class="reasoning">${esc(a.reasoning)}</p>
+  ${evidence}
+  <div class="truth">
+    <span class="truth-title">True cause</span>
+    ${FAILURE_LABELS.map((label) => `<label class="truth-option ${label}"><input type="radio" name="label--${index}" value="${label}"><span>${label}</span></label>`).join("\n    ")}
+    <input type="text" class="note" placeholder="note (optional)" data-case-index="${index}">
+  </div>
+</div>`;
+}
+function renderSkipped(r) {
+	return `<div class="analysis skipped">analysis skipped${r.analysisSkipped ? `: ${esc(r.analysisSkipped)}` : ""}</div>`;
+}
+function renderDriftIssues(r) {
+	if (!r.driftIssues || r.driftIssues.length === 0) return "";
+	const items = r.driftIssues.map((i) => `<li><span class="severity ${i.severity}">${i.severity}</span> (${esc(DRAFT_CATEGORY_LABEL[i.category])}${i.stepId ? `, step ${esc(i.stepId)}` : ""}) ${esc(i.message)}${i.detail ? ` — ${esc(i.detail)}` : ""}</li>`).join("");
+	return `<details class="drift"><summary>Spec↔code drift audit (${r.driftIssues.length})</summary><ul>${items}</ul></details>`;
+}
+function collapsible(title, content, kind = "") {
+	if (!content) return "";
+	return `<details class="raw ${kind}"><summary>${esc(title)}</summary><pre>${esc(content)}</pre></details>`;
+}
+const ESC_MAP = {
+	"&": "&amp;",
+	"<": "&lt;",
+	">": "&gt;",
+	"\"": "&quot;",
+	"'": "&#39;"
+};
+function esc(s) {
+	return s.replace(/[&<>"']/g, (c) => ESC_MAP[c]);
+}
+function formatDuration$1(ms) {
+	if (ms < 1e3) return `${Math.round(ms)}ms`;
+	if (ms < 6e4) return `${(ms / 1e3).toFixed(1)}s`;
+	return `${Math.floor(ms / 6e4)}m ${Math.round(ms % 6e4 / 1e3)}s`;
+}
+function formatDate(iso) {
+	return iso.replace("T", " ").replace(/\.\d+Z$/, " UTC");
+}
+const CSS = `
+:root {
+  color-scheme: light dark;
+  --bg: #f4f5f7;
+  --surface: #ffffff;
+  --surface-2: #f8f9fa;
+  --border: #e1e4e8;
+  --text: #1f2328;
+  --text-dim: #656d76;
+  --accent: #1f6feb;
+  --pass: #1a7f37;
+  --pass-bg: #dafbe1;
+  --fail: #cf222e;
+  --fail-bg: #ffebe9;
+  --skip: #9a6700;
+  --code-bg: #0d1117;
+  --code-text: #e6edf3;
+  --shadow: 0 1px 3px rgba(31, 35, 40, 0.06);
+}
+@media (prefers-color-scheme: dark) {
+  :root {
+    --bg: #0d1117;
+    --surface: #161b22;
+    --surface-2: #1c2129;
+    --border: #30363d;
+    --text: #e6edf3;
+    --text-dim: #8b949e;
+    --accent: #58a6ff;
+    --pass: #3fb950;
+    --pass-bg: rgba(63, 185, 80, 0.15);
+    --fail: #f85149;
+    --fail-bg: rgba(248, 81, 73, 0.15);
+    --skip: #d29922;
+    --code-bg: #010409;
+    --code-text: #e6edf3;
+    --shadow: none;
+  }
+}
+* { box-sizing: border-box; }
+body {
+  font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Hiragino Sans", "Noto Sans JP", sans-serif;
+  margin: 0; background: var(--bg); color: var(--text); font-size: 14px;
+}
+header {
+  position: sticky; top: 0; z-index: 10;
+  background: var(--surface); border-bottom: 1px solid var(--border);
+}
+.header-inner { max-width: 1080px; margin: 0 auto; padding: 14px 24px 10px; }
+.header-top { display: flex; align-items: baseline; gap: 18px; flex-wrap: wrap; }
+h1 { font-size: 17px; margin: 0; font-weight: 650; }
+h2 { font-size: 14px; margin: 0; font-weight: 650; }
+.meta { font-size: 12px; color: var(--text-dim); display: flex; gap: 14px; flex-wrap: wrap; }
+.meta code { background: var(--surface-2); border: 1px solid var(--border); padding: 0 5px; border-radius: 4px; font-size: 11px; }
+.dim { color: var(--text-dim); }
+.toolbar { display: flex; align-items: center; gap: 12px; margin-top: 10px; flex-wrap: wrap; }
+.chips { display: flex; gap: 6px; }
+.chip {
+  font: inherit; font-size: 12.5px; font-weight: 600; cursor: pointer;
+  padding: 3px 12px; border-radius: 999px; border: 1px solid var(--border);
+  background: var(--surface); color: var(--text-dim);
+}
+.chip .count { opacity: 0.7; }
+.chip.active { background: var(--text); color: var(--surface); border-color: var(--text); }
+.chip-pass.active { background: var(--pass); border-color: var(--pass); color: #fff; }
+.chip-fail.active { background: var(--fail); border-color: var(--fail); color: #fff; }
+#search {
+  font: inherit; font-size: 13px; flex: 1; min-width: 180px; max-width: 320px; margin-left: auto;
+  padding: 5px 12px; border: 1px solid var(--border); border-radius: 6px;
+  background: var(--surface-2); color: var(--text);
+}
+#search:focus { outline: 2px solid var(--accent); outline-offset: -1px; }
+.page { max-width: 1080px; margin: 16px auto; padding: 0 24px; }
+.panel {
+  background: var(--surface); border: 1px solid var(--border); border-radius: 8px;
+  padding: 14px 18px; margin-bottom: 16px; box-shadow: var(--shadow);
+}
+.panel-head { display: flex; align-items: center; justify-content: space-between; gap: 12px; flex-wrap: wrap; }
+.hint { font-size: 12px; color: var(--text-dim); margin: 6px 0 10px; }
+.spec {
+  background: var(--surface); border: 1px solid var(--border); border-radius: 8px;
+  margin-bottom: 8px; box-shadow: var(--shadow);
+}
+.spec > summary {
+  display: flex; align-items: center; gap: 10px; padding: 10px 16px;
+  cursor: pointer; list-style: none; user-select: none;
+}
+.spec > summary::-webkit-details-marker { display: none; }
+.spec > summary::before {
+  content: "▸"; color: var(--text-dim); font-size: 11px;
+  transition: transform 0.12s ease; flex: 0 0 auto;
+}
+.spec[open] > summary::before { transform: rotate(90deg); }
+.spec-name { font-weight: 600; font-size: 13.5px; }
+.spacer { flex: 1; }
+.counts { font-size: 12px; color: var(--text-dim); }
+.duration { font-size: 12px; color: var(--text-dim); font-variant-numeric: tabular-nums; }
+.status-icon { font-weight: 700; font-size: 13px; flex: 0 0 auto; }
+.status-icon.pass { color: var(--pass); }
+.status-icon.fail { color: var(--fail); }
+.status-icon.skip { color: var(--skip); }
+.spec-body { padding: 2px 16px 12px 36px; border-top: 1px solid var(--border); }
+.steps { list-style: none; margin: 10px 0; padding: 0; }
+.steps li {
+  display: flex; align-items: center; gap: 8px; padding: 3px 8px;
+  font-size: 13px; border-radius: 5px;
+}
+.steps li:hover { background: var(--surface-2); }
+.step-name { overflow-wrap: anywhere; }
+.analysis {
+  border: 1px solid var(--border); border-left: 3px solid var(--accent);
+  border-radius: 6px; background: var(--surface-2);
+  padding: 10px 14px; margin: 10px 0;
+}
+.analysis.skipped { color: var(--text-dim); font-size: 13px; font-style: italic; border-left-color: var(--border); }
+.prediction { display: flex; align-items: center; gap: 12px; flex-wrap: wrap; }
+.badge {
+  font-size: 11.5px; font-weight: 700; letter-spacing: 0.02em;
+  padding: 2px 10px; border-radius: 4px; color: #fff; flex: 0 0 auto;
+}
+.badge.TEST_DRIFT { background: #b45309; }
+.badge.SPEC_CHANGE { background: #1d4ed8; }
+.badge.PRODUCT_BUG { background: #b91c1c; }
+.badge.UNKNOWN { background: #6b7280; }
+.confidence { display: inline-flex; align-items: center; gap: 7px; font-size: 12.5px; font-weight: 600; color: var(--text-dim); }
+.confidence-bar {
+  display: inline-block; width: 64px; height: 6px; border-radius: 999px;
+  background: var(--border); overflow: hidden;
+}
+.confidence-bar > span { display: block; height: 100%; background: var(--accent); border-radius: 999px; }
+.sub { font-size: 11px; background: var(--surface); border: 1px solid var(--border); color: var(--text-dim); padding: 1px 8px; border-radius: 999px; }
+.reasoning { font-size: 13px; margin: 9px 0; white-space: pre-wrap; line-height: 1.55; }
+.evidence { font-size: 12.5px; color: var(--text-dim); margin: 6px 0; padding-left: 18px; line-height: 1.5; }
+.evidence code { background: var(--surface); border: 1px solid var(--border); padding: 0 5px; border-radius: 4px; font-size: 11px; }
+.truth {
+  display: flex; align-items: center; gap: 10px; flex-wrap: wrap;
+  background: var(--surface); border: 1px dashed var(--border); border-radius: 6px;
+  padding: 8px 12px; margin-top: 10px; font-size: 12.5px;
+}
+.truth-title { font-weight: 650; color: var(--text-dim); }
+.truth-option {
+  display: inline-flex; align-items: center; gap: 5px; cursor: pointer;
+  border: 1px solid var(--border); border-radius: 999px; padding: 2px 10px;
+}
+.truth-option:has(input:checked) { border-color: var(--accent); background: var(--surface-2); font-weight: 650; }
+.note { flex: 1; min-width: 150px; font: inherit; font-size: 12px; padding: 4px 9px; border: 1px solid var(--border); border-radius: 5px; background: var(--surface-2); color: var(--text); }
+details.raw, details.drift { margin: 7px 0; font-size: 13px; }
+details.raw summary, details.drift summary { cursor: pointer; color: var(--text-dim); }
+details.raw pre {
+  background: var(--code-bg); color: var(--code-text);
+  font-size: 11.5px; line-height: 1.5; padding: 12px 14px; border-radius: 6px;
+  overflow-x: auto; white-space: pre-wrap; word-break: break-word; margin: 6px 0;
+}
+.severity { font-size: 10.5px; font-weight: 700; padding: 0 6px; border-radius: 4px; margin-right: 4px; }
+.severity.ERROR { background: var(--fail-bg); color: var(--fail); }
+.severity.WARN { background: rgba(212, 167, 44, 0.18); color: var(--skip); }
+.severity.OK { background: var(--pass-bg); color: var(--pass); }
+.drift ul { padding-left: 18px; font-size: 12.5px; line-height: 1.55; }
+table.matrix { border-collapse: collapse; font-size: 12.5px; margin: 10px 16px 10px 0; display: inline-table; vertical-align: top; }
+table.matrix th, table.matrix td { border: 1px solid var(--border); padding: 4px 12px; text-align: center; }
+table.matrix th { background: var(--surface-2); font-weight: 600; }
+table.matrix td { font-variant-numeric: tabular-nums; }
+table.matrix td.hit { background: var(--pass-bg); font-weight: 700; }
+table.matrix td.miss-nonzero { background: var(--fail-bg); }
+.stats { font-size: 13px; }
+.stats .big { font-size: 17px; font-weight: 700; }
+.measure-actions { display: flex; gap: 14px; align-items: center; font-size: 12.5px; }
+.measure-actions button {
+  font: inherit; font-size: 12.5px; padding: 4px 13px; cursor: pointer;
+  border: 1px solid var(--border); border-radius: 6px; background: var(--surface); color: var(--text);
+}
+.measure-actions button:hover { background: var(--surface-2); }
+.import-label { cursor: pointer; color: var(--text-dim); }
+.import-label input { display: none; }
+.empty-note { color: var(--text-dim); text-align: center; font-size: 13px; }
+`;
+const CLIENT_JS = `
+(function () {
+  var dataEl = document.getElementById('ccqa-report-data');
+  if (!dataEl) return;
+  var data = JSON.parse(dataEl.textContent);
+  var LABELS = ${JSON.stringify(FAILURE_LABELS)};
+  var PRED_LABELS = LABELS.concat(['UNKNOWN']);
+  var storageKey = 'ccqa-report:' + (data.runId || data.createdAt);
+  // ---- filtering ------------------------------------------------------
+  var activeFilter = 'all';
+  var searchQuery = '';
+  function applyFilters() {
+    var sections = document.querySelectorAll('.spec');
+    var visible = 0;
+    sections.forEach(function (el) {
+      var statusOk = activeFilter === 'all' || el.getAttribute('data-status') === activeFilter;
+      var name = (el.getAttribute('data-case-id') || '').toLowerCase();
+      var searchOk = !searchQuery || name.indexOf(searchQuery) >= 0;
+      var show = statusOk && searchOk;
+      el.style.display = show ? '' : 'none';
+      if (show) visible++;
+    });
+    var note = document.getElementById('no-match');
+    if (note) note.hidden = visible > 0;
+  }
+  var chips = document.querySelectorAll('#filter-chips .chip');
+  chips.forEach(function (chip) {
+    chip.addEventListener('click', function () {
+      activeFilter = chip.getAttribute('data-filter') || 'all';
+      chips.forEach(function (c) { c.classList.toggle('active', c === chip); });
+      applyFilters();
+    });
+  });
+  var search = document.getElementById('search');
+  if (search) {
+    search.addEventListener('input', function () {
+      searchQuery = search.value.trim().toLowerCase();
+      applyFilters();
+    });
+  }
+  // ---- measurement ----------------------------------------------------
+  // cases: analyzed failures only — they carry a prediction we can grade.
+  var cases = [];
+  for (var i = 0; i < data.results.length; i++) {
+    var r = data.results[i];
+    if (r.status === 'failed' && r.analysis) {
+      cases.push({ index: i, feature: r.feature, spec: r.spec, predicted: r.analysis.label });
+    }
+  }
+  var state = {};
+  try { state = JSON.parse(localStorage.getItem(storageKey) || '{}'); } catch (e) { state = {}; }
+  function save() {
+    try { localStorage.setItem(storageKey, JSON.stringify(state)); } catch (e) {}
+  }
+  function caseKey(c) { return c.feature + '/' + c.spec; }
+  function applyStateToInputs() {
+    cases.forEach(function (c) {
+      var entry = state[caseKey(c)];
+      if (!entry) return;
+      // Guard: only known labels may flow into the attribute selector below
+      // (localStorage is user-controlled; anything else is dropped).
+      if (entry.label && LABELS.indexOf(entry.label) >= 0) {
+        var radio = document.querySelector('input[name="label--' + c.index + '"][value="' + entry.label + '"]');
+        if (radio) radio.checked = true;
+      }
+      var note = document.querySelector('.note[data-case-index="' + c.index + '"]');
+      if (note && entry.note) note.value = entry.note;
+    });
+  }
+  function renderMetrics() {
+    var target = document.getElementById('metrics');
+    if (!target) return;
+    var m = {};
+    PRED_LABELS.forEach(function (p) {
+      m[p] = {};
+      LABELS.forEach(function (a) { m[p][a] = 0; });
+    });
+    var labeled = 0;
+    var correct = 0;
+    cases.forEach(function (c) {
+      var entry = state[caseKey(c)];
+      if (!entry || !entry.label || LABELS.indexOf(entry.label) < 0) return;
+      labeled++;
+      m[c.predicted][entry.label]++;
+      if (c.predicted === entry.label) correct++;
+    });
+    var html = '';
+    html += '<div class="stats"><span class="big">' +
+      (labeled === 0 ? '–' : Math.round((correct / labeled) * 100) + '%') +
+      '</span> accuracy · ' + labeled + ' labeled / ' + cases.length + ' analyzed failures' +
+      (cases.length - labeled > 0 ? ' · <strong>' + (cases.length - labeled) + ' unlabeled</strong>' : '') +
+      '</div>';
+    html += '<table class="matrix"><thead><tr><th>predicted \\\\ actual</th>';
+    LABELS.forEach(function (a) { html += '<th>' + a + '</th>'; });
+    html += '</tr></thead><tbody>';
+    PRED_LABELS.forEach(function (p) {
+      html += '<tr><th>' + p + '</th>';
+      LABELS.forEach(function (a) {
+        var v = m[p][a];
+        var cls = p === a ? 'hit' : (v > 0 ? 'miss-nonzero' : '');
+        html += '<td class="' + cls + '">' + v + '</td>';
+      });
+      html += '</tr>';
+    });
+    html += '</tbody></table>';
+    html += '<table class="matrix"><thead><tr><th>class</th><th>precision</th><th>recall</th><th>F1</th><th>support</th></tr></thead><tbody>';
+    LABELS.forEach(function (cls) {
+      var tp = m[cls][cls];
+      var predictedAs = 0;
+      LABELS.forEach(function (a) { predictedAs += m[cls][a]; });
+      var actualAs = 0;
+      PRED_LABELS.forEach(function (p) { actualAs += m[p][cls]; });
+      var precision = predictedAs > 0 ? tp / predictedAs : null;
+      var recall = actualAs > 0 ? tp / actualAs : null;
+      var f1 = precision !== null && recall !== null && precision + recall > 0
+        ? (2 * precision * recall) / (precision + recall) : null;
+      html += '<tr><th>' + cls + '</th><td>' + fmt(precision) + '</td><td>' + fmt(recall) +
+        '</td><td>' + fmt(f1) + '</td><td>' + actualAs + '</td></tr>';
+    });
+    html += '</tbody></table>';
+    target.innerHTML = html;
+  }
+  function fmt(v) { return v === null ? '–' : (Math.round(v * 100) / 100).toFixed(2); }
+  function findCaseByIndex(index) {
+    for (var i = 0; i < cases.length; i++) {
+      if (cases[i].index === index) return cases[i];
+    }
+    return null;
+  }
+  document.addEventListener('change', function (e) {
+    var t = e.target;
+    if (t && t.name && t.name.indexOf('label--') === 0) {
+      var index = parseInt(t.name.slice('label--'.length), 10);
+      var c = findCaseByIndex(index);
+      if (!c) return;
+      var key = caseKey(c);
+      state[key] = state[key] || {};
+      state[key].label = t.value;
+      save();
+      renderMetrics();
+    }
+  });
+  document.addEventListener('input', function (e) {
+    var t = e.target;
+    if (t && t.classList && t.classList.contains('note')) {
+      var index = parseInt(t.getAttribute('data-case-index'), 10);
+      var c = findCaseByIndex(index);
+      if (!c) return;
+      var key = caseKey(c);
+      state[key] = state[key] || {};
+      state[key].note = t.value;
+      save();
+    }
+  });
+  var exportBtn = document.getElementById('export-labels');
+  if (exportBtn) {
+    exportBtn.addEventListener('click', function () {
+      var labels = [];
+      cases.forEach(function (c) {
+        var entry = state[caseKey(c)];
+        if (!entry || !entry.label) return;
+        var item = { feature: c.feature, spec: c.spec, predicted: c.predicted, label: entry.label };
+        if (entry.note) item.note = entry.note;
+        labels.push(item);
+      });
+      var payload = {
+        schemaVersion: 1,
+        runId: data.runId,
+        promptVersion: data.promptVersion,
+        exportedAt: new Date().toISOString(),
+        labels: labels
+      };
+      var blob = new Blob([JSON.stringify(payload, null, 2)], { type: 'application/json' });
+      var a = document.createElement('a');
+      a.href = URL.createObjectURL(blob);
+      a.download = 'ccqa-labels-' + (data.runId || data.createdAt).replace(/[^A-Za-z0-9_-]/g, '_') + '.json';
+      a.click();
+      URL.revokeObjectURL(a.href);
+    });
+  }
+  var importInput = document.getElementById('import-labels');
+  if (importInput) {
+    importInput.addEventListener('change', function () {
+      var file = importInput.files && importInput.files[0];
+      if (!file) return;
+      var reader = new FileReader();
+      reader.onload = function () {
+        try {
+          var payload = JSON.parse(String(reader.result));
+          (payload.labels || []).forEach(function (item) {
+            var key = item.feature + '/' + item.spec;
+            state[key] = state[key] || {};
+            if (item.label) state[key].label = item.label;
+            if (item.note) state[key].note = item.note;
+          });
+          save();
+          applyStateToInputs();
+          renderMetrics();
+        } catch (e) {
+          alert('Could not parse labels JSON: ' + e.message);
+        }
+      };
+      reader.readAsText(file);
+    });
+  }
+  applyStateToInputs();
+  renderMetrics();
+})();
+`;
 //#endregion
 //#region src/cli/run.ts
 const USER_VITEST_CONFIG = resolve(".ccqa/vitest.config.ts");
+const DEFAULT_REPORT_DIR = "ccqa-report";
 async function resolveVitestConfig() {
 	try {
 		await access(USER_VITEST_CONFIG);
@@ -4634,7 +5840,7 @@ async function resolveVitestConfig() {
 		return bundledVitestConfigPath();
 	}
 }
-const runCommand = new Command("run").argument("[target]", "Spec to run: '<feature>/<spec>', '<feature>', or omit for all").description("Run generated agent-browser test scripts. Pass --drift to invoke a Claude-driven drift analysis on each failing spec (skipped silently when no test fails). Requires ANTHROPIC_API_KEY or a local Claude login.").option("--drift", "On vitest failure, run drift analysis on the failing specs").option("--drift-strict", "Treat drift ERROR findings as a run failure (exit 1 even if vitest passed). Implies --drift.").option("--format <fmt>", "Output format for the drift block: text | json | github", "text").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Used by --drift only. Overrides CCQA_MODEL.").action(async (target, opts) => {
+const runCommand = addLanguageOption(new Command("run").argument("[target]", "Spec to run: '<feature>/<spec>', '<feature>', or omit for all").description("Run generated agent-browser test scripts. Pass --drift-report to also write a self-contained HTML run report: each failing spec gets a drift audit plus a root-cause call (TEST_DRIFT / SPEC_CHANGE / PRODUCT_BUG), and the report lets a human grade the calls to measure their accuracy. Requires ANTHROPIC_API_KEY or a local Claude login for the analysis part.").option("--drift-report [dir]", `Write an HTML run report with drift analysis of failures (default dir: ${DEFAULT_REPORT_DIR}/)`).option("--drift-base <ref>", "Base ref the source diff is taken against for failure analysis (default: GITHUB_BASE_REF, then origin/main)").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Used by --drift-report only. Overrides CCQA_MODEL.")).action(async (target, opts) => {
 	await runTests(target, opts);
 });
 async function runTests(target, opts) {
@@ -4649,6 +5855,7 @@ async function runTests(target, opts) {
 	const summaries = [];
 	let overallExitCode = 0;
 	const vitestConfig = await resolveVitestConfig();
+	const captureOutput = Boolean(opts.driftReport);
 	try {
 		for (let i = 0; i < specs.length; i++) {
 			const { featureName, specName } = specs[i];
@@ -4669,7 +5876,8 @@ async function runTests(target, opts) {
 				"--reporter=json",
 				`--outputFile.json=${reportFile}`
 			]);
-			await Promise.all([streamFiltered(proc.stdout, process.stdout), streamFiltered(proc.stderr, process.stderr)]);
+			const tail = captureOutput ? new TailBuffer(OUTPUT_TAIL_CAP) : null;
+			await Promise.all([streamFiltered(proc.stdout, process.stdout, tail), streamFiltered(proc.stderr, process.stderr, tail)]);
 			const exitCode = await proc.exited;
 			if (exitCode !== 0) overallExitCode = exitCode;
 			const report = await readReport(reportFile);
@@ -4678,12 +5886,13 @@ async function runTests(target, opts) {
 				specName,
 				scriptFile,
 				report,
-				exitCode
+				exitCode,
+				outputTail: tail ? tail.toString() : null
 			});
 			blank();
 		}
 		printSummary(summaries);
-		overallExitCode = await maybeRunDrift(summaries, opts, overallExitCode);
+		await maybeWriteDriftReport(summaries, opts);
 	} finally {
 		await rm(tmpDir, {
 			recursive: true,
@@ -4696,73 +5905,208 @@ function failedSpec(s) {
 	if (s.exitCode !== 0) return true;
 	return (s.report?.numFailedTests ?? 0) > 0;
 }
-function parseDriftFormat(raw) {
-	const v = raw ?? "text";
-	if (v === "text" || v === "json" || v === "github") return v;
-	error(`invalid --format: ${v} (expected text|json|github)`);
-	process.exit(2);
-}
 /**
-* Choose which specs to drift-check. `--drift` is a fail-supplement: only the
-* specs that failed get a drift analysis (the goal is to *explain* a vitest
-* failure). `--drift-strict` is an audit: even passing specs are checked,
-* because the CI need is "fail loud if the spec lags behind the source",
-* which can absolutely happen while vitest is still green against a stale
-* staging environment.
+* Opt-in post-vitest report hook. With `--drift-report`, a self-contained
+* HTML report is ALWAYS written (a green run is still a useful run summary);
+* failing specs additionally get a spec↔code drift audit and a three-way
+* root-cause call with the PR diff as context. The hook never changes the
+* exit code — the run's outcome is determined by vitest alone — and when
+* Claude auth is unavailable only the analysis is skipped, not the report.
 */
-function selectDriftTargets(summaries, opts) {
-	if (opts.driftStrict) return summaries;
-	if (opts.drift) return summaries.filter(failedSpec);
-	return [];
-}
-/**
-* Opt-in post-vitest drift hook. With `--drift`, fires only when at least
-* one spec failed (supplemental signal). With `--drift-strict`, fires
-* unconditionally so a spec/source divergence is caught even when vitest
-* passed. Skips silently when auth is unavailable so the run's exit code
-* is determined by vitest alone.
-*/
-async function maybeRunDrift(summaries, opts, currentExitCode) {
-	const candidates = selectDriftTargets(summaries, opts);
-	if (candidates.length === 0) return currentExitCode;
+async function maybeWriteDriftReport(summaries, opts) {
+	if (!opts.driftReport) return;
+	const outDir = typeof opts.driftReport === "string" ? opts.driftReport : DEFAULT_REPORT_DIR;
+	const cwd = process.cwd();
 	const auth = driftAuthAvailable();
-	if (!auth.ok) {
-		info(`drift analysis skipped (${auth.reason})`);
-		return currentExitCode;
+	const failed = summaries.filter(failedSpec);
+	if (!auth.ok && failed.length > 0) info(`failure analysis skipped (${auth.reason})`);
+	const baseRef = resolveBaseRef(opts.driftBase);
+	let diff = {
+		ok: false,
+		error: "diff not captured (no failures)"
+	};
+	if (failed.length > 0) {
+		diff = await capturePrDiff(baseRef, cwd);
+		if (!diff.ok) info(`drift-report: source diff unavailable (${diff.error}) — analyzing without diff context`);
+	}
+	const tree = failed.length > 0 ? await listFeatureTree(cwd) : [];
+	const specInfoByKey = new Map(tree.flatMap((f) => f.specs.map((sp) => [`${f.featureName}/${sp.specName}`, sp])));
+	const findSpecInfo = (s) => specInfoByKey.get(`${s.featureName}/${s.specName}`) ?? null;
+	let driftResults = [];
+	if (auth.ok && failed.length > 0) {
+		const targets = failed.map((s) => {
+			const spec = findSpecInfo(s);
+			if (!spec) return null;
+			const t = {
+				featureName: s.featureName,
+				specName: s.specName
+			};
+			if (spec.relatedPaths) t.relatedPaths = spec.relatedPaths;
+			if (spec.includedBlocks) t.includedBlocks = spec.includedBlocks;
+			return t;
+		}).filter((t) => t !== null);
+		if (targets.length > 0) driftResults = await analyzeDrift({
+			targets,
+			cwd,
+			blocks: await loadAvailableBlocks(cwd),
+			concurrency: Math.min(3, targets.length),
+			...opts.model ? { model: opts.model } : {},
+			...opts.language ? { language: opts.language } : {},
+			onSpecStart: (t) => info(`drift audit: ${t.featureName}/${t.specName}`)
+		});
 	}
-	const format = parseDriftFormat(opts.format);
-	const cwd = process.cwd();
-	const tree = await listFeatureTree(cwd);
-	const targets = candidates.map((s) => {
-		const spec = tree.find((f) => f.featureName === s.featureName)?.specs.find((sp) => sp.specName === s.specName);
-		if (!spec) return null;
-		const t = {
-			featureName: s.featureName,
-			specName: s.specName
+	const patchSections = diff.ok && diff.diff.patch.length > 0 ? splitPatchByFile(diff.diff.patch) : null;
+	let printedHeader = false;
+	const results = [];
+	for (const s of summaries) {
+		const assertions = collectAssertions(s);
+		const base = {
+			feature: s.featureName,
+			spec: s.specName,
+			testCounts: s.report ? {
+				total: s.report.numTotalTests,
+				passed: s.report.numPassedTests,
+				failed: s.report.numFailedTests
+			} : null,
+			durationMs: assertions ? assertions.reduce((sum, a) => sum + (a.durationMs ?? 0), 0) : null,
+			assertions
 		};
-		if (spec.relatedPaths) t.relatedPaths = spec.relatedPaths;
-		if (spec.includedBlocks) t.includedBlocks = spec.includedBlocks;
-		return t;
-	}).filter((t) => t !== null);
-	if (targets.length === 0) {
-		info("drift analysis skipped (no spec.yaml found for failing specs)");
-		return currentExitCode;
-	}
-	const results = await analyzeDrift({
-		targets,
-		cwd,
-		blocks: await loadAvailableBlocks(cwd),
-		concurrency: Math.min(3, targets.length),
-		...opts.model ? { model: opts.model } : {},
-		onSpecStart: (t) => {
-			if (format === "text") info(`drift: checking ${t.featureName}/${t.specName}`);
+		if (!failedSpec(s)) {
+			results.push({
+				...base,
+				status: "passed",
+				analysis: null,
+				analysisSkipped: null,
+				driftIssues: null,
+				failureLogExcerpt: null,
+				diffExcerpt: null,
+				specYaml: null
+			});
+			continue;
+		}
+		const specYaml = await tryReadSpecFile(s.featureName, s.specName, cwd);
+		const relatedPaths = findSpecInfo(s)?.relatedPaths ?? null;
+		const diffExcerpt = patchSections ? scopePatchForSpec(patchSections, relatedPaths) : null;
+		const driftResult = driftResults.find((r) => r.target.featureName === s.featureName && r.target.specName === s.specName);
+		const driftIssues = driftResult?.ok ? driftResult.issues : null;
+		const failureLog = buildFailureLog(s);
+		let analysis = null;
+		let analysisSkipped = null;
+		if (!auth.ok) analysisSkipped = auth.reason;
+		else if (specYaml === null) analysisSkipped = "no spec.yaml found for this spec";
+		else {
+			const script = await readScriptSafe(s.scriptFile);
+			info(`failure analysis: ${s.featureName}/${s.specName}`);
+			const outcome = await analyzeFailure({
+				script,
+				specYaml,
+				failureLog,
+				diffPatch: diffExcerpt,
+				changedFiles: diff.ok ? diff.diff.nameStatus : null,
+				baseRef: diff.ok ? baseRef : null,
+				driftIssues,
+				...opts.language ? { outputLanguage: opts.language } : {}
+			}, {
+				...opts.model ? { model: opts.model } : {},
+				cwd
+			});
+			analysis = outcome.analysis;
+			if (!printedHeader) {
+				process.stdout.write(`\n${C.cyan}${C.bold}──────── failure analysis ────────${C.reset}\n`);
+				printedHeader = true;
+			}
+			const pct = Math.round(outcome.analysis.confidence * 100);
+			const firstLine = outcome.analysis.reasoning.split("\n")[0] ?? "";
+			process.stdout.write(`${C.red}✖${C.reset} ${C.bold}${s.featureName}/${s.specName}${C.reset} → ${C.bold}${outcome.analysis.label}${C.reset} (${pct}%)${firstLine ? ` ${C.dim}${firstLine}${C.reset}` : ""}\n`);
 		}
+		results.push({
+			...base,
+			status: "failed",
+			analysis,
+			analysisSkipped,
+			driftIssues,
+			failureLogExcerpt: failureLog.length > 0 ? failureLog : null,
+			diffExcerpt,
+			specYaml
+		});
+	}
+	const data = {
+		schemaVersion: 1,
+		createdAt: (/* @__PURE__ */ new Date()).toISOString(),
+		runId: process.env["GITHUB_RUN_ID"] ?? null,
+		git: {
+			head: diff.ok ? diff.diff.head : null,
+			base: diff.ok ? baseRef : null
+		},
+		model: opts.model ?? null,
+		promptVersion: "2",
+		results
+	};
+	const reportPath = join(outDir, "index.html");
+	await mkdir(outDir, { recursive: true });
+	await writeFile(reportPath, renderRunReport(data), "utf8");
+	info(`run report written to ${reportPath}`);
+}
+function collectAssertions(s) {
+	if (!s.report) return null;
+	const out = [];
+	for (const file of s.report.testResults) for (const a of file.assertionResults) out.push({
+		name: a.fullName,
+		status: a.status === "passed" || a.status === "failed" ? a.status : "skipped",
+		durationMs: a.duration ?? null
 	});
-	if (format === "text") process.stdout.write(`\n${C.cyan}${C.bold}──────── drift analysis ────────${C.reset}\n`);
-	process.stdout.write(renderDrift(results, format, cwd));
-	if (opts.driftStrict && determineExitCode(results, "error") !== 0) return currentExitCode || 1;
-	return currentExitCode;
+	return out;
+}
+/**
+* Compose the failure log fed to the analysis prompt and embedded in the
+* report. With `--reporter=json` vitest writes (almost) nothing to
+* stdout/stderr — the assertion failures live in the JSON report — so the
+* structured failureMessages come first and the raw output tail (console
+* logs, agent-browser noise) is appended as secondary context.
+*/
+function buildFailureLog(s) {
+	const parts = [];
+	if (s.report) for (const file of s.report.testResults) for (const a of file.assertionResults) {
+		if (a.status !== "failed") continue;
+		parts.push(`✖ ${a.fullName}`);
+		for (const m of a.failureMessages ?? []) parts.push(m);
+	}
+	const tail = s.outputTail?.trim();
+	if (tail) {
+		parts.push("--- vitest output (tail) ---");
+		parts.push(tail);
+	}
+	return parts.join("\n");
+}
+async function readScriptSafe(path) {
+	try {
+		return await readFile(path, "utf8");
+	} catch {
+		return "";
+	}
 }
+/** Cap on the per-spec output tail kept for the report / analysis prompt. */
+const OUTPUT_TAIL_CAP = 64 * 1024;
+/**
+* Keeps the LAST `cap` characters appended. Vitest puts the failure summary
+* at the end of its output, so the tail is the part worth keeping when a
+* noisy spec overflows the cap.
+*/
+var TailBuffer = class {
+	buf = "";
+	cap;
+	constructor(cap) {
+		this.cap = cap;
+	}
+	append(s) {
+		this.buf += s;
+		if (this.buf.length > this.cap * 2) this.buf = this.buf.slice(-this.cap);
+	}
+	toString() {
+		if (this.buf.length <= this.cap) return this.buf;
+		return `[...output truncated...]\n${this.buf.slice(-this.cap)}`;
+	}
+};
 async function readReport(path) {
 	try {
 		const raw = await readFile(path, "utf8");
@@ -4834,7 +6178,7 @@ function formatDuration(ms) {
 	return `${(ms / 1e3).toFixed(2)}s`;
 }
 const NOISE_LINE_PATTERNS = [/^JSON report written to /];
-async function streamFiltered(source, sink) {
+async function streamFiltered(source, sink, capture) {
 	source.setEncoding("utf8");
 	let buffer = "";
 	for await (const chunk of source) {
@@ -4843,11 +6187,17 @@ async function streamFiltered(source, sink) {
 		while (nl !== -1) {
 			const line = buffer.slice(0, nl);
 			buffer = buffer.slice(nl + 1);
-			if (!NOISE_LINE_PATTERNS.some((p) => p.test(line))) sink.write(line + "\n");
+			if (!NOISE_LINE_PATTERNS.some((p) => p.test(line))) {
+				sink.write(line + "\n");
+				capture?.append(line + "\n");
+			}
 			nl = buffer.indexOf("\n");
 		}
 	}
-	if (buffer.length > 0 && !NOISE_LINE_PATTERNS.some((p) => p.test(buffer))) sink.write(buffer);
+	if (buffer.length > 0 && !NOISE_LINE_PATTERNS.some((p) => p.test(buffer))) {
+		sink.write(buffer);
+		capture?.append(buffer);
+	}
 }
 async function resolveSpecs(target) {
 	if (!target) return listAllSpecs();
@@ -4866,7 +6216,7 @@ async function resolveSpecs(target) {
 //#endregion
 //#region src/cli/draft.ts
 const CATEGORY_LABEL = DRAFT_CATEGORY_LABEL;
-const draftCommand = new Command("draft").argument("[feature/spec]", "Optional spec path (e.g. tasks/create-and-complete). If omitted, Claude proposes one from your intent.").description("Interactively draft and refine a spec.yaml with Claude Code").option("--instruction <text>", "Non-interactive single-shot instruction (skips the interactive loop)").option("--apply", "Auto-apply each generated patch without [y/N] confirmation", false).action(async (specPath, opts) => {
+const draftCommand = addLanguageOption(new Command("draft").argument("[feature/spec]", "Optional spec path (e.g. tasks/create-and-complete). If omitted, Claude proposes one from your intent.").description("Interactively draft and refine a spec.yaml with Claude Code").option("--instruction <text>", "Non-interactive single-shot instruction (skips the interactive loop)").option("--apply", "Auto-apply each generated patch without [y/N] confirmation", false)).action(async (specPath, opts) => {
 	await ensureCcqaDir();
 	let featureName;
 	let specName;
@@ -4882,6 +6232,7 @@ const draftCommand = new Command("draft").argument("[feature/spec]", "Optional s
 });
 async function runDraft(featureName, specName, opts, prefilledIntent) {
 	header("draft", `${featureName}/${specName}`);
+	const ja = useJapanesePrompts(opts.language);
 	const oneShot = opts.instruction !== void 0;
 	let useIntentOnce = prefilledIntent !== null && !oneShot;
 	while (true) {
@@ -4892,7 +6243,7 @@ async function runDraft(featureName, specName, opts, prefilledIntent) {
 		else if (useIntentOnce && isFirstRun) {
 			userInput = prefilledIntent ?? "";
 			useIntentOnce = false;
-		} else userInput = await prompt(isFirstRun ? "What do you want to test? > " : "How would you like to refine? (empty = re-validate) > ");
+		} else userInput = await prompt(isFirstRun ? ja ? "何をテストしたいですか? > " : "What do you want to test? > " : ja ? "どのように修正しますか? (空欄で再検証) > " : "How would you like to refine? (empty = re-validate) > ");
 		if (isFirstRun && !userInput.trim()) {
 			error("intent required for the first draft (no spec exists yet)");
 			process.exit(1);
@@ -4902,11 +6253,12 @@ async function runDraft(featureName, specName, opts, prefilledIntent) {
 			specName,
 			existing,
 			userInput: userInput.trim(),
-			autoApply: opts.apply === true
+			autoApply: opts.apply === true,
+			language: opts.language
 		});
 		if (oneShot) process.exit(turnResult.hasError && !turnResult.applied ? 1 : 0);
 		blank();
-		if (/^y/i.test(await prompt("Are you done with this draft? [y/N] "))) {
+		if (/^y/i.test(await prompt(ja ? "このドラフトは完了ですか? [y/N] " : "Are you done with this draft? [y/N] "))) {
 			info("draft session complete.");
 			hint(`run 'ccqa trace ${featureName}/${specName}' to record actions`);
 			process.exit(0);
@@ -4914,9 +6266,9 @@ async function runDraft(featureName, specName, opts, prefilledIntent) {
 	}
 }
 async function runOneTurn(input) {
-	const { featureName, specName, existing, userInput, autoApply } = input;
+	const { featureName, specName, existing, userInput, autoApply, language } = input;
 	const isFirstRun = existing === null;
-	const systemPrompt = buildDraftSystemPrompt(await loadAvailableBlocks());
+	const systemPrompt = buildDraftSystemPrompt(await loadAvailableBlocks()) + languageDirective(language);
 	const userPrompt = buildDraftPrompt({
 		mode: isFirstRun ? "create" : "refine",
 		existing: existing ?? "",
@@ -4979,7 +6331,7 @@ async function runOneTurn(input) {
 	info("--- proposed changes ---");
 	printUnifiedDiff(original, report.patch);
 	blank();
-	if (!(autoApply ? true : /^y/i.test(await prompt("Apply this patch? [y/N] ")))) {
+	if (!(autoApply ? true : /^y/i.test(await prompt(useJapanesePrompts(language) ? "このパッチを適用しますか? [y/N] " : "Apply this patch? [y/N] ")))) {
 		info("aborted — no changes applied.");
 		return {
 			hasError,
@@ -5071,8 +6423,9 @@ function writeFinding(issue) {
 	if (issue.detail) process.stdout.write(`      └ ${issue.detail.replace(/\n/g, "\n        ")}\n`);
 }
 async function proposeNaming(opts) {
+	const ja = useJapanesePrompts(opts.language);
 	const oneShot = opts.instruction !== void 0;
-	const intent = oneShot ? opts.instruction ?? "" : await prompt("What do you want to test? > ");
+	const intent = oneShot ? opts.instruction ?? "" : await prompt(ja ? "何をテストしたいですか? > " : "What do you want to test? > ");
 	if (!intent.trim()) {
 		error("intent required to propose a feature/spec name");
 		process.exit(1);
@@ -5124,13 +6477,13 @@ async function proposeNaming(opts) {
 		naming: final,
 		intent: intent.trim()
 	};
-	const answer = await prompt(`Use this name? [y/N/edit] > `);
+	const answer = await prompt(ja ? "この名前を使いますか? [y/N/edit] > " : "Use this name? [y/N/edit] > ");
 	if (/^y/i.test(answer)) return {
 		naming: final,
 		intent: intent.trim()
 	};
 	if (/^e/i.test(answer)) {
-		const manual = await prompt("Enter feature/spec (e.g. tasks/create-and-complete) > ");
+		const manual = await prompt(ja ? "feature/spec を入力 (例 tasks/create-and-complete) > " : "Enter feature/spec (e.g. tasks/create-and-complete) > ");
 		const parts = manual.split("/");
 		if (parts.length !== 2 || !parts[0] || !parts[1]) {
 			error(`invalid spec path: "${manual}". Expected "<feature>/<spec>"`);
@@ -5230,163 +6583,141 @@ function truncate(s, n) {
 	return s.slice(s.length - n);
 }
 //#endregion
-//#region src/drift/affected.ts
-const execFileP = promisify(execFile);
-/**
-* Resolve the base ref to diff against for `ccqa drift --changed`.
-* Precedence: explicit override > GITHUB_BASE_REF > origin/main.
-*/
-function resolveBaseRef(explicit) {
-	if (explicit && explicit.length > 0) return explicit;
-	const ghBase = process.env["GITHUB_BASE_REF"];
-	if (ghBase && ghBase.length > 0) return ghBase.startsWith("origin/") ? ghBase : `origin/${ghBase}`;
-	return "origin/main";
-}
-/**
-* Run `git diff --name-status base...HEAD` from `cwd` and return one entry per
-* changed file. Renames are reported under their NEW path with status
-* "renamed" — the OLD path is dropped because the spec mapping is against the
-* post-rename layout.
-*
-* Paths are re-rooted to be relative to `cwd`, not the git repo root. In a
-* monorepo where `cwd` is a sub-package (e.g. `apps/foo`), git emits paths
-* relative to the repo root, but specs declare relatedPaths relative to
-* their own package. Changes outside `cwd` are dropped so an unrelated PR
-* can never accidentally scope a sub-package's specs in.
-*/
-async function getChangedFiles(base, cwd) {
-	const [{ stdout: rootOut }, { stdout: diffOut }] = await Promise.all([execFileP("git", ["rev-parse", "--show-toplevel"], { cwd }), execFileP("git", [
-		"diff",
-		"--name-status",
-		"-M",
-		`${base}...HEAD`
-	], {
-		cwd,
-		maxBuffer: 32 * 1024 * 1024
-	})]);
-	return rerootChangedFiles(parseGitDiffOutput(diffOut), rootOut.trim(), cwd);
-}
+//#region src/drift/format.ts
 /**
-* Convert paths in `entries` from git-repo-root relative to `cwd` relative,
-* dropping anything outside `cwd`. Exported for unit tests.
+* Render drift results as a string. The CLI commands and the `run` failure
+* hook are the only callers; both want the formatted output returned so
+* they can prefix / interleave / pipe it as needed.
 */
-function rerootChangedFiles(entries, repoRoot, cwd) {
-	const prefix = relative(repoRoot, cwd);
-	if (!prefix) return entries;
-	const out = [];
-	for (const e of entries) {
-		const rel = relative(prefix, e.path);
-		if (rel.startsWith("..") || rel === "") continue;
-		out.push({
-			...e,
-			path: rel
-		});
-	}
-	return out;
+function renderDrift(results, format, cwd) {
+	if (format === "json") return renderJson(results);
+	if (format === "github") return renderGithub(results, cwd);
+	return renderText(results);
 }
-function parseGitDiffOutput(stdout) {
+const HEAVY_RULE = "═".repeat(72);
+function renderText(results) {
 	const out = [];
-	for (const line of stdout.split("\n")) {
-		if (!line.trim()) continue;
-		const parts = line.split("	");
-		const code = parts[0];
-		if (!code) continue;
-		if (code.startsWith("R")) {
-			const newPath = parts[2];
-			if (newPath) out.push({
-				path: newPath,
-				status: "renamed"
-			});
+	for (const r of results) {
+		out.push("");
+		const heading = `══ ${r.target.featureName}/${r.target.specName} `;
+		const tail = "═".repeat(Math.max(3, 72 - heading.length));
+		out.push(`${heading}${tail}`);
+		if (r.error) {
+			out.push(`  ERROR  ${r.error}`);
 			continue;
 		}
-		if (code.startsWith("C")) {
-			const newPath = parts[2];
-			if (newPath) out.push({
-				path: newPath,
-				status: "added"
-			});
+		const errors = r.issues.filter((i) => i.severity === "ERROR");
+		const warnings = r.issues.filter((i) => i.severity === "WARN");
+		const passed = r.issues.filter((i) => i.severity === "OK");
+		if (errors.length === 0 && warnings.length === 0) {
+			const label = passed.length === 1 ? "check" : "checks";
+			const detail = passed.length > 0 ? `all ${passed.length} ${label} passed` : "no issues";
+			out.push(`  ✓  ${detail}`);
 			continue;
 		}
-		const path = parts[1];
-		if (!path) continue;
-		switch (code[0]) {
-			case "A":
-				out.push({
-					path,
-					status: "added"
-				});
-				break;
-			case "M":
-			case "T":
-				out.push({
-					path,
-					status: "modified"
-				});
-				break;
-			case "D":
-				out.push({
-					path,
-					status: "deleted"
-				});
-				break;
-			default: out.push({
-				path,
-				status: "modified"
-			});
+		for (const issue of errors) appendFinding(out, "ERROR", issue);
+		for (const issue of warnings) appendFinding(out, "WARN", issue);
+		if (passed.length > 0) {
+			const names = passed.map((i) => DRAFT_CATEGORY_LABEL[i.category]).join(", ");
+			out.push("");
+			out.push(`  ✓  passed (${passed.length}): ${names}`);
+		}
+	}
+	out.push("");
+	out.push(HEAVY_RULE);
+	const totals = summarize(results);
+	out.push(`  specs    ${results.length} (${totals.errored} errored)`);
+	out.push(`  findings ${totals.error} error, ${totals.warn} warn, ${totals.ok} ok`);
+	out.push("");
+	return out.join("\n");
+}
+function appendFinding(out, level, issue) {
+	const stepPart = issue.stepId ? ` ${issue.stepId}` : "";
+	out.push("");
+	out.push(`  ${level}  ${DRAFT_CATEGORY_LABEL[issue.category]}${stepPart}`);
+	out.push(`    ${issue.message}`);
+	if (issue.detail) out.push(`    └ ${issue.detail.replace(/\n/g, "\n      ")}`);
+}
+function renderJson(results) {
+	const payload = { specs: results.map((r) => ({
+		feature: r.target.featureName,
+		spec: r.target.specName,
+		ok: r.ok,
+		...r.error ? { error: r.error } : {},
+		issues: r.issues.map((i) => ({
+			severity: i.severity,
+			category: i.category,
+			stepId: i.stepId,
+			message: i.message,
+			...i.detail ? { detail: i.detail } : {}
+		}))
+	})) };
+	return `${JSON.stringify(payload, null, 2)}\n`;
+}
+function renderGithub(results, cwd) {
+	const repoRoot = process.env["GITHUB_WORKSPACE"] ?? process.cwd();
+	const lines = [];
+	for (const r of results) {
+		const file = githubRelPath(cwd, repoRoot, r.target.featureName, r.target.specName);
+		if (r.error) {
+			lines.push(`::error file=${file}::${escapeGhMessage(r.error)}`);
+			continue;
+		}
+		for (const issue of r.issues) {
+			if (issue.severity === "OK") continue;
+			const level = issue.severity === "ERROR" ? "error" : "warning";
+			const title = `${r.target.featureName}/${r.target.specName} — ${issue.category}${issue.stepId ? ` (${issue.stepId})` : ""}`;
+			const body = issue.detail ? `${issue.message}\n${issue.detail}` : issue.message;
+			lines.push(`::${level} file=${file},title=${escapeGhProp(title)}::${escapeGhMessage(body)}`);
 		}
 	}
-	return out;
+	return lines.length === 0 ? "" : `${lines.join("\n")}\n`;
 }
-function stripLeadingDotSlash(s) {
-	return s.startsWith("./") ? s.slice(2) : s;
+function githubRelPath(cwd, repoRoot, featureName, specName) {
+	const abs = resolve(cwd, ".ccqa", "features", featureName, "test-cases", specName, "spec.yaml");
+	const rel = relative(repoRoot, abs);
+	return rel.startsWith("..") ? abs : rel;
 }
-const REGEX_CACHE = /* @__PURE__ */ new Map();
-/** Compiles `pattern` to a RegExp, memoized so repeated `--changed` matches don't re-build. */
-function compileGlob(pattern) {
-	const cached = REGEX_CACHE.get(pattern);
-	if (cached) return cached;
-	const compiled = globToRegExp(stripLeadingDotSlash(pattern));
-	REGEX_CACHE.set(pattern, compiled);
-	return compiled;
+function escapeGhMessage(s) {
+	return s.replace(/%/g, "%25").replace(/\r/g, "%0D").replace(/\n/g, "%0A");
 }
-function globToRegExp(pattern) {
-	let re = "^";
-	let i = 0;
-	while (i < pattern.length) {
-		const ch = pattern[i];
-		if (ch === "?") {
-			re += "[^/]";
-			i++;
-			continue;
-		}
-		if (ch !== "*") {
-			re += /[.+^${}()|[\]\\]/.test(ch) ? "\\" + ch : ch;
-			i++;
-			continue;
-		}
-		if (pattern[i + 1] !== "*") {
-			re += "[^/]*";
-			i++;
-			continue;
-		}
-		const hasLeadingSlash = re.endsWith("/");
-		const hasTrailingSlash = pattern[i + 2] === "/";
-		if (hasLeadingSlash) re = re.slice(0, -1);
-		if (hasLeadingSlash || hasTrailingSlash) re += "(?:/?.*)?";
-		else re += ".*";
-		i += hasTrailingSlash ? 3 : 2;
+function escapeGhProp(s) {
+	return s.replace(/%/g, "%25").replace(/\r/g, "%0D").replace(/\n/g, "%0A").replace(/,/g, "%2C").replace(/:/g, "%3A");
+}
+function summarize(results) {
+	let error = 0;
+	let warn = 0;
+	let ok = 0;
+	let errored = 0;
+	for (const r of results) {
+		if (r.error) errored++;
+		for (const issue of r.issues) if (issue.severity === "ERROR") error++;
+		else if (issue.severity === "WARN") warn++;
+		else ok++;
 	}
-	return new RegExp(re + "$");
+	return {
+		error,
+		warn,
+		ok,
+		errored
+	};
 }
+//#endregion
+//#region src/drift/exit-code.ts
 /**
-* Returns true if `changedPath` is covered by any of `relatedPaths`. An empty
-* `relatedPaths` returns false — callers handle the "unscoped spec" case
-* separately (treat the spec as always-affected) before calling this.
+* Map drift results to an exit code. Spec-level errors (Claude call failed)
+* always fail; otherwise ERROR severity always fails, WARN fails only when
+* the threshold is `warn`.
 */
-function isPathAffectedBy(changedPath, relatedPaths) {
-	const stripped = stripLeadingDotSlash(changedPath);
-	for (const pattern of relatedPaths) if (compileGlob(pattern).test(stripped)) return true;
-	return false;
+function determineExitCode(results, threshold) {
+	for (const r of results) {
+		if (r.error) return 1;
+		for (const issue of r.issues) {
+			if (issue.severity === "ERROR") return 1;
+			if (threshold === "warn" && issue.severity === "WARN") return 1;
+		}
+	}
+	return 0;
 }
 //#endregion
 //#region src/drift/route-new-files.ts
@@ -5503,7 +6834,7 @@ Return the spec keys that might be affected by any of the new files. Conservativ
 //#endregion
 //#region src/cli/drift.ts
 const DEFAULT_CONCURRENCY = 3;
-const driftCommand = new Command("drift").argument("[feature/spec]", "Optional spec id. If omitted, every spec under .ccqa/features/ is checked.").description("Check whether each spec.yaml is still in sync with the current codebase (CI-friendly, no patches applied).").option("--format <fmt>", "Output format: text | json | github", "text").option("--severity <level>", "Exit non-zero on this severity or higher: warn | error", "error").option("--concurrency <n>", `Parallel spec checks (default: ${DEFAULT_CONCURRENCY})`).option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").option("--cwd <path>", "Working directory used as both the .ccqa root and the codebase Claude reads. Useful for monorepos. Defaults to process.cwd().").option("--changed", "Restrict drift checks to specs whose relatedPaths intersect the git diff against --base (or, in CI, $GITHUB_BASE_REF, else origin/main). New files are routed to specs via a single lightweight Claude call.").option("--base <ref>", "Base ref to diff against when --changed is set. Defaults to $GITHUB_BASE_REF (CI) or origin/main.").action(async (specPath, opts) => {
+const driftCommand = addLanguageOption(new Command("drift").argument("[feature/spec]", "Optional spec id. If omitted, every spec under .ccqa/features/ is checked.").description("Check whether each spec.yaml is still in sync with the current codebase (CI-friendly, no patches applied).").option("--format <fmt>", "Output format: text | json | github", "text").option("--severity <level>", "Exit non-zero on this severity or higher: warn | error", "error").option("--concurrency <n>", `Parallel spec checks (default: ${DEFAULT_CONCURRENCY})`).option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").option("--cwd <path>", "Working directory used as both the .ccqa root and the codebase Claude reads. Useful for monorepos. Defaults to process.cwd().").option("--changed", "Restrict drift checks to specs whose relatedPaths intersect the git diff against --base (or, in CI, $GITHUB_BASE_REF, else origin/main). New files are routed to specs via a single lightweight Claude call.").option("--base <ref>", "Base ref to diff against when --changed is set. Defaults to $GITHUB_BASE_REF (CI) or origin/main.")).action(async (specPath, opts) => {
 	const format = parseFormat(opts.format);
 	const threshold = parseSeverity(opts.severity);
 	const concurrency = parseConcurrency(opts.concurrency);
@@ -5538,6 +6869,7 @@ const driftCommand = new Command("drift").argument("[feature/spec]", "Optional s
 		blocks,
 		concurrency,
 		...opts.model ? { model: opts.model } : {},
+		...opts.language ? { language: opts.language } : {},
 		onSpecStart: (t) => {
 			if (format === "text") info(`checking ${t.featureName}/${t.specName}`);
 		}
@@ -5650,6 +6982,446 @@ function parseConcurrency(raw) {
 	return n;
 }
 //#endregion
+//#region src/prompts/perspectives.ts
+/**
+* Build the system prompt. By default the descriptive fields follow the
+* spec's own language (Japanese specs → Japanese fields). An explicit
+* `--language` is applied by the CLI via `languageDirective`, appended to
+* this prompt, so the language handling lives in one shared place.
+*/
+function buildPerspectivesSystemPrompt() {
+	return `You produce a factual inventory of the E2E test coverage that already exists in a ccqa project.
+Think of it as a QA coverage stock-take: for each existing test case, fill in a few short, neutral descriptive fields derived from its steps. Nothing more.
+## Hard boundaries (do NOT cross)
+- Do NOT assign severity, importance, priority, or risk. Whether a failure hurts the customer is a human + PdM decision; you are not authoring that here.
+- Do NOT do gap analysis. Do NOT list untested areas, missing coverage, or things the code has but the tests lack.
+- Do NOT evaluate whether the feature is good, complete, or correct.
+- Do NOT propose new test cases.
+- Do NOT restate the full step-by-step procedure or the per-step expected results — the spec.yaml is the source of truth for those and the inventory links to it.
+- Do NOT touch status, relatedPaths, feature names, or spec names — the CLI already fixed those.
+## Fields to write (per spec)
+- \`summary\`: 1–2 sentences, factual and neutral. What the test exercises and what it ultimately asserts, derived from the spec's \`steps\` (\`instruction\` / \`expected\`).
+- \`startScreen\`: the screen/URL the test first lands on after setup (e.g. "Dashboard (/dashboard)"). Derive from the first non-login \`instruction\`. Omit if genuinely unclear.
+- \`testCondition\`: the state/precondition the scenario assumes, phrased as a condition (e.g. "Logged in as an admin", "Unauthenticated user"). Omit if none.
+- \`preconditions\`: array of short setup prerequisites (e.g. which role logs in, required prior state). Derive from \`include: login\` params and the opening steps. Empty/omit if none.
+## How to write
+- Same language as the spec's title (if titles are Japanese, write these fields in Japanese).
+- Keep each field short. These are index entries, not the test itself.
+- You may use Read/Grep/Glob sparingly to clarify domain vocabulary, but the steps are the primary source. Do not over-explore.
+## Output contract (STRICT)
+Output exactly ONE fenced \`\`\`json code block, and nothing else outside it. No prose before or after.
+Schema:
+\`\`\`json
+{
+  "summaries": [
+    {
+      "featureName": "<verbatim from input>",
+      "specName": "<verbatim from input>",
+      "summary": "<1–2 sentence factual description of what this test verifies>",
+      "startScreen": "<opening screen/URL, or omit>",
+      "testCondition": "<assumed state phrased as a condition, or omit>",
+      "preconditions": ["<setup prerequisite>", "..."]
+    }
+  ]
+}
+\`\`\`
+Return one entry per spec given in the input. Echo featureName and specName verbatim so the CLI can match them. \`startScreen\`, \`testCondition\`, and \`preconditions\` are optional — omit a field (or use an empty array for preconditions) when the spec does not express it.
+`;
+}
+function buildPerspectivesPrompt(specs, instruction) {
+	return `## Existing test cases to summarise
+${specs.map((s) => `### ${s.featureName}/${s.specName}
+title: ${s.title}
+\`\`\`yaml
+${s.specYaml.trimEnd()}
+\`\`\`
+`).join("\n")}
+${instruction?.trim() ? `## Extra guidance from the user\n\n${instruction.trim()}\n\n` : ""}## Task
+For each test case above, write a 1–2 sentence factual \`summary\` of what it verifies, derived from its steps. Return one entry per spec in the JSON contract. Do not assign severity, do gap analysis, or invent new cases.
+`;
+}
+//#endregion
+//#region src/cli/perspectives.ts
+const perspectivesCommand = addLanguageOption(new Command("perspectives").description("Generate/update .ccqa/perspectives.yaml — a factual inventory of existing test coverage (no severity, no gap analysis)").option("--instruction <text>", "Hint to steer how summaries are written").option("--apply", "Auto-apply without [y/N] confirmation", false).option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID")).action(async (opts) => {
+	await runPerspectives(opts);
+});
+async function runPerspectives(opts) {
+	header("perspectives", ".ccqa/perspectives.yaml");
+	await ensureCcqaDir();
+	const skeleton = await buildSkeleton(await listFeatureTree());
+	const allSpecs = skeleton.flatMap((f) => f.specs);
+	if (allSpecs.length === 0) {
+		info("no test cases found under .ccqa/features — nothing to inventory.");
+		return;
+	}
+	const existingRaw = await tryReadPerspectives() ?? "";
+	const noteMap = extractNotes(existingRaw);
+	const specBodies = await loadSpecBodies(skeleton);
+	meta("language", opts.language ?? "auto");
+	info(`Summarising ${allSpecs.length} test case(s) across ${skeleton.length} feature(s)...`);
+	const summaries = await requestSummaries(specBodies, opts);
+	if (summaries === null) process.exit(1);
+	const merged = mergePerspectives(skeleton, summaries, noteMap);
+	let validated;
+	try {
+		validated = PerspectivesSchema.parse(merged);
+	} catch (e) {
+		error(`refused to write: assembled perspectives failed validation (${e.message})`);
+		process.exit(1);
+	}
+	const next = stringify(validated, { lineWidth: 0 });
+	if (withoutGeneratedAt(existingRaw) === withoutGeneratedAt(next)) {
+		blank();
+		info("perspectives already up to date — no changes.");
+		return;
+	}
+	blank();
+	info("--- proposed changes (perspectives.yaml) ---");
+	printUnifiedDiff(existingRaw, next);
+	blank();
+	if (!(opts.apply === true || /^y/i.test(await prompt(useJapanesePrompts(opts.language) ? "perspectives.yaml + .md を書き込みますか? [y/N] " : "Write perspectives.yaml + .md? [y/N] ")))) {
+		info("aborted — no changes written.");
+		return;
+	}
+	meta("saved", await savePerspectives(next));
+	const labels = labelsFor(opts.language);
+	meta("saved", await savePerspectivesMarkdown(renderIndexMarkdown(validated, labels)));
+	for (const feature of validated.features) meta("saved", await saveFeaturePerspectivesMarkdown(feature.featureName, renderFeatureMarkdown(feature, labels)));
+}
+/**
+* Turn the feature tree into the skeleton perspectives features: title +
+* relatedPaths transcribed from each spec, status derived mechanically from
+* on-disk artifacts. `summary` is left empty here; Claude fills it later.
+* Specs whose spec.yaml is missing or unparsable are skipped.
+*/
+async function buildSkeleton(tree) {
+	return (await Promise.all(tree.map(async (feature) => {
+		const specs = await Promise.all(feature.specs.filter((s) => s.hasSpecFile).map(async (s) => {
+			const spec = await readSpecMeta(feature.featureName, s.specName);
+			const status = await deriveStatus(feature.featureName, s.specName);
+			const entry = {
+				specName: s.specName,
+				title: spec.title,
+				summary: "",
+				status
+			};
+			if (s.relatedPaths) entry.relatedPaths = s.relatedPaths;
+			return entry;
+		}));
+		return {
+			featureName: feature.featureName,
+			specs
+		};
+	}))).filter((f) => f.specs.length > 0).map((f) => ({
+		featureName: f.featureName,
+		specs: [...f.specs].sort((a, b) => a.specName.localeCompare(b.specName))
+	})).sort((a, b) => a.featureName.localeCompare(b.featureName));
+}
+/**
+* `(featureName, specName)` → human note, parsed from an existing
+* perspectives.yaml. Notes are preserved across regeneration; everything
+* else (title, status, summary) is recomputed. Returns an empty map when the
+* input is empty or unparsable — note preservation is best-effort and never
+* blocks regeneration.
+*/
+function extractNotes(existingRaw) {
+	const map = /* @__PURE__ */ new Map();
+	if (!existingRaw.trim()) return map;
+	let parsed;
+	try {
+		parsed = parse(existingRaw);
+	} catch {
+		return map;
+	}
+	const result = PerspectivesSchema.safeParse(parsed);
+	if (!result.success) return map;
+	for (const feature of result.data.features) for (const spec of feature.specs) if (spec.note !== void 0 && spec.note !== "") map.set(noteKey(feature.featureName, spec.specName), spec.note);
+	return map;
+}
+/**
+* Merge the mechanical skeleton with Claude's summaries and the preserved
+* notes into the final perspectives object. Summaries are matched by
+* (featureName, specName); an unmatched spec keeps its empty summary.
+*/
+function mergePerspectives(skeleton, summaries, noteMap) {
+	const summaryMap = /* @__PURE__ */ new Map();
+	for (const s of summaries) summaryMap.set(noteKey(s.featureName, s.specName), s);
+	const features = skeleton.map((feature) => ({
+		featureName: feature.featureName,
+		specs: feature.specs.map((spec) => {
+			const key = noteKey(feature.featureName, spec.specName);
+			const entry = summaryMap.get(key);
+			const merged = {
+				...spec,
+				summary: entry?.summary ?? spec.summary
+			};
+			if (entry?.startScreen) merged.startScreen = entry.startScreen;
+			if (entry?.testCondition) merged.testCondition = entry.testCondition;
+			if (entry?.preconditions && entry.preconditions.length > 0) merged.preconditions = entry.preconditions;
+			const note = noteMap.get(key);
+			if (note !== void 0) merged.note = note;
+			return merged;
+		})
+	}));
+	return {
+		generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
+		features
+	};
+}
+/**
+* Strip the top-level `generatedAt:` line so two serialised perspectives can
+* be compared for substantive equality without the always-fresh timestamp
+* defeating the "already up to date" check. Exported for unit testing.
+*/
+function withoutGeneratedAt(yamlText) {
+	return yamlText.split("\n").filter((line) => !/^generatedAt:/.test(line)).join("\n").trim();
+}
+function noteKey(featureName, specName) {
+	return `${featureName}/${specName}`;
+}
+async function readSpecMeta(featureName, specName) {
+	const raw = await tryReadSpecFile(featureName, specName);
+	if (raw === null) return { title: specName };
+	try {
+		const parsed = parse(raw);
+		if (typeof parsed.title === "string" && parsed.title.length > 0) return { title: parsed.title };
+	} catch {}
+	return { title: specName };
+}
+async function deriveStatus(featureName, specName) {
+	return {
+		traced: await stat(join(getSpecDir(featureName, specName), "actions.json")).then(() => true).catch(() => false),
+		generated: await getTestScript(featureName, specName) !== null
+	};
+}
+async function loadSpecBodies(skeleton) {
+	return await Promise.all(skeleton.flatMap((feature) => feature.specs.map(async (spec) => {
+		const specYaml = await tryReadSpecFile(feature.featureName, spec.specName) ?? "";
+		return {
+			featureName: feature.featureName,
+			specName: spec.specName,
+			title: spec.title,
+			specYaml
+		};
+	})));
+}
+async function requestSummaries(specs, opts) {
+	const toolCounts = {};
+	const startedAt = Date.now();
+	const { result, isError } = await invokeClaudeStreaming({
+		prompt: buildPerspectivesPrompt(specs, opts.instruction),
+		systemPrompt: buildPerspectivesSystemPrompt() + languageDirective(opts.language),
+		allowedTools: [
+			"Read",
+			"Grep",
+			"Glob"
+		],
+		silenceBashLog: true,
+		...opts.model ? { model: opts.model } : {}
+	}, (msg) => {
+		if (msg.type !== "assistant") return;
+		for (const block of msg.message.content ?? []) if (block.type === "tool_use") toolCounts[block.name] = (toolCounts[block.name] ?? 0) + 1;
+	});
+	process.stdout.write(`${formatToolSummary(toolCounts, Date.now() - startedAt)}\n`);
+	if (isError) {
+		error("Claude returned an error result");
+		return null;
+	}
+	const json = extractJsonBlock(result);
+	if (!json) {
+		error("Claude did not return a json block");
+		return null;
+	}
+	return parseSummaries(json);
+}
+/**
+* Parse the `{ summaries: [...] }` JSON contract into typed entries. Returns
+* null and logs when the payload is malformed. Exported for unit testing.
+*/
+function parseSummaries(json) {
+	let payload;
+	try {
+		payload = JSON.parse(json);
+	} catch (e) {
+		error(`failed to parse summaries JSON: ${e.message}`);
+		return null;
+	}
+	if (typeof payload !== "object" || payload === null) {
+		error("summaries payload is not an object");
+		return null;
+	}
+	const summaries = payload.summaries;
+	if (!Array.isArray(summaries)) {
+		error("summaries payload missing a `summaries` array");
+		return null;
+	}
+	const out = [];
+	for (const item of summaries) {
+		const rec = item ?? {};
+		const { featureName, specName, summary } = rec;
+		if (typeof featureName === "string" && typeof specName === "string" && typeof summary === "string") {
+			const entry = {
+				featureName,
+				specName,
+				summary
+			};
+			if (typeof rec.startScreen === "string" && rec.startScreen.length > 0) entry.startScreen = rec.startScreen;
+			if (typeof rec.testCondition === "string" && rec.testCondition.length > 0) entry.testCondition = rec.testCondition;
+			if (Array.isArray(rec.preconditions)) {
+				const pre = rec.preconditions.filter((p) => typeof p === "string" && p.length > 0);
+				if (pre.length > 0) entry.preconditions = pre;
+			}
+			out.push(entry);
+		}
+	}
+	return out;
+}
+const LABELS_JA = {
+	indexTitle: "テスト観点インデックス (perspectives)",
+	caseCol: "ケース",
+	itemCol: "項目",
+	valueCol: "内容",
+	summary: "検証内容",
+	preconditions: "前提条件",
+	startScreen: "開始画面",
+	relatedCode: "関連コード"
+};
+const LABELS_EN = {
+	indexTitle: "Test Perspectives (perspectives)",
+	caseCol: "Case",
+	itemCol: "Item",
+	valueCol: "Value",
+	summary: "Verifies",
+	preconditions: "Preconditions",
+	startScreen: "Start screen",
+	relatedCode: "Related code"
+};
+/**
+* Pick the label set for a `--language` value. Only an explicit English tag
+* (`en`, `en-US`, …) switches to English labels; `auto`, `ja`, and anything
+* else keep Japanese, matching the source-following default the rest of the
+* command uses.
+*/
+function labelsFor(language) {
+	return /^en\b/i.test(language?.trim() ?? "") ? LABELS_EN : LABELS_JA;
+}
+/**
+* Path to a spec.yaml relative to the **root** `.ccqa/perspectives.md`
+* (i.e. relative to the `.ccqa/` dir). Used for the category index links.
+*/
+function specRelPathFromRoot(featureName, specName) {
+	return `features/${featureName}/test-cases/${specName}/spec.yaml`;
+}
+/**
+* Path to a category detail file relative to the **root** `.ccqa/perspectives.md`.
+* The detail file is written to `.ccqa/features/<feature>/perspectives.md`
+* (see `getFeaturePerspectivesMarkdownPath`), so the link must include the
+* `features/` segment — otherwise the category heading link 404s.
+*/
+function featureDetailRelPathFromRoot(featureName) {
+	return `features/${featureName}/perspectives.md`;
+}
+/**
+* Path to a spec.yaml relative to the **category** detail file
+* `.ccqa/features/<feature>/perspectives.md`. The spec lives alongside under
+* `test-cases/<spec>/`, so the category file links to it directly — which is
+* what makes the link resolve both on GitHub and in a local editor.
+*/
+function specRelPathFromCategory(specName) {
+	return `test-cases/${specName}/spec.yaml`;
+}
+/**
+* Render the root `.ccqa/perspectives.md`: a category-grouped index of which
+* cases exist. Each feature is a heading (linking to its own detail
+* `perspectives.md`) followed by a row per case — title, status, and a link
+* to that case's spec.yaml. The per-case *detail* (検証内容, preconditions,
+* note) still lives only in the per-category file; the root stays a scannable
+* "what is tested, and where" overview.
+*
+* Pure and deterministic, so the index rendering is easy to unit-test.
+*/
+function renderIndexMarkdown(perspectives, labels = LABELS_JA) {
+	const lines = [];
+	lines.push(`# ${labels.indexTitle}`);
+	lines.push("");
+	for (const feature of perspectives.features) {
+		const detailLink = featureDetailRelPathFromRoot(feature.featureName);
+		lines.push(`## [${feature.featureName}](${detailLink})`);
+		lines.push("");
+		lines.push(`| ${labels.caseCol} | spec |`);
+		lines.push("| --- | --- |");
+		for (const spec of feature.specs) {
+			const specLink = specRelPathFromRoot(feature.featureName, spec.specName);
+			lines.push(`| ${mdCell(spec.title)} | [spec](${specLink}) |`);
+		}
+		lines.push("");
+	}
+	return lines.join("\n");
+}
+/**
+* Render one category's `.ccqa/features/<feature>/perspectives.md`: every
+* case in the category as a self-contained vertical table. All columns —
+* including the verification summary (検証内容) and the human note — live
+* inside the table; nothing is emitted outside it. Detailed steps / expected
+* results are still not restated (the spec.yaml is their single home); the
+* table links back to each spec instead.
+*
+* Pure and deterministic, so the per-case rendering is easy to unit-test.
+*/
+function renderFeatureMarkdown(feature, labels = LABELS_JA) {
+	const lines = [];
+	lines.push(`# ${feature.featureName}`);
+	lines.push("");
+	for (const spec of feature.specs) lines.push(...renderSpecMarkdown(spec, labels));
+	return lines.join("\n");
+}
+/**
+* Render one spec as a single vertical (item | content) Markdown table for a
+* category file. Verification summary and preconditions lead. The spec link
+* is relative to this category file so it resolves both on GitHub and in a
+* local editor. Related-code paths stay inline code rather than links: their
+* base (the cwd that hosts `.ccqa/`) is not reliably recoverable here — specs
+* carry a mix of cwd-relative (`src/...`) and repo-root (`pkg/app/src/...`)
+* forms — and many are globs that no link could open anyway. 検証内容
+* (summary) and note are rows inside the table; no prose blocks are emitted
+* around it. Exported for focused unit testing.
+*/
+function renderSpecMarkdown(spec, labels = LABELS_JA) {
+	const lines = [];
+	lines.push(`## ${spec.title}`);
+	lines.push("");
+	lines.push(`| ${labels.itemCol} | ${labels.valueCol} |`);
+	lines.push("| --- | --- |");
+	if (spec.summary) lines.push(`| ${labels.summary} | ${mdCell(spec.summary)} |`);
+	if (spec.preconditions && spec.preconditions.length > 0) lines.push(`| ${labels.preconditions} | ${spec.preconditions.map(mdCell).join("<br>")} |`);
+	if (spec.startScreen) lines.push(`| ${labels.startScreen} | ${mdCell(spec.startScreen)} |`);
+	const specPath = specRelPathFromCategory(spec.specName);
+	lines.push(`| spec | [${specPath}](${specPath}) |`);
+	if (spec.relatedPaths && spec.relatedPaths.length > 0) lines.push(`| ${labels.relatedCode} | ${spec.relatedPaths.map((p) => `\`${p}\``).join("<br>")} |`);
+	if (spec.note) lines.push(`| 📝 note | ${mdCell(spec.note)} |`);
+	lines.push("");
+	return lines;
+}
+/** Escape pipes / newlines so a value stays inside one Markdown table cell. */
+function mdCell(value) {
+	return value.replace(/\|/g, "\\|").replace(/\n/g, " ");
+}
+//#endregion
 //#region src/cli/index.ts
 const packageJsonPath = resolvePackageJson();
 const { version } = JSON.parse(readFileSync(packageJsonPath, "utf8"));
@@ -5667,6 +7439,7 @@ const program = new Command();
 program.name("ccqa").description("E2E test CLI using Claude Code + agent-browser").version(version);
 program.addCommand(draftCommand);
 program.addCommand(driftCommand);
+program.addCommand(perspectivesCommand);
 program.addCommand(traceCommand);
 program.addCommand(generateCommand);
 program.addCommand(runCommand);