npm - ccqa - Versions diffs - 0.3.10 → 0.4.0 - Mend

ccqa 0.3.10 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md +27 -26
package/dist/bin/ccqa.mjs +1595 -1291
package/dist/package.json +2 -2
package/dist/runtime/test-helpers.mjs +1 -53
package/dist/runtime/vitest.config.d.mts +10 -10
package/dist/spawn-ab-BxjEhA5e.mjs +65 -0
package/package.json +2 -2

package/dist/bin/ccqa.mjs CHANGED Viewed

@@ -1,33 +1,41 @@
 #!/usr/bin/env node
+import { n as spawnAB } from "../spawn-ab-BxjEhA5e.mjs";
 import { createRequire } from "node:module";
 import { Command } from "commander";
-import { accessSync, readFileSync, statSync } from "node:fs";
+import { accessSync, existsSync, readFileSync, statSync } from "node:fs";
 import { fileURLToPath } from "node:url";
-import { access, mkdir, mkdtemp, readFile, readdir, rm, stat, unlink, writeFile } from "node:fs/promises";
-import { delimiter, dirname, join, relative, resolve } from "node:path";
+import { access, mkdir, mkdtemp, readFile, readdir, rm, stat, writeFile } from "node:fs/promises";
 import { query } from "@anthropic-ai/claude-agent-sdk";
-import matter from "gray-matter";
+import { delimiter, dirname, join, relative, resolve } from "node:path";
+import { parse, stringify } from "yaml";
+import { ZodError, z } from "zod";
 import { execFile, spawn } from "node:child_process";
 import { createInterface } from "node:readline";
-import { tmpdir } from "node:os";
+import { homedir, tmpdir } from "node:os";
 import { createInterface as createInterface$1 } from "node:readline/promises";
-import { z } from "zod";
 import { promisify } from "node:util";
 //#region src/prompts/trace.ts
 function generateSessionName() {
 	return `ccqa-trace-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-")}`;
 }
-function buildTraceSystemPrompt(spec, options) {
-	return buildTraceSystemPromptInner(spec, options, true);
-}
-function buildTraceSystemPromptInner(spec, options, emitRelatedPaths) {
-	const sessionName = options?.sessionName ?? generateSessionName();
-	const skipCookiesClear = options?.skipCookiesClear ?? false;
-	const stepsText = spec.steps.map((step) => `### ${step.id}: ${step.title}
+/**
+* Build the trace system prompt. `input.steps` is a flat list with includes
+* already expanded (each step carries id / source / instruction / expected).
+* The spec opens URLs via explicit step instructions (e.g.
+* `instruction: "${APP_URL}/articles を開く"`).
+*
+* In v0.4 every spec is traced from scratch — block contents are inlined
+* into the spec's own step list at expand time, so the prompt has no
+* special "this is a block" mode. The `source` tag on each step still
+* distinguishes spec-native steps from inlined block steps for the
+* `// step:` comments in the eventual codegen output.
+*/
+function buildTraceSystemPrompt(input) {
+	const sessionName = input.sessionName ?? generateSessionName();
+	const stepsText = input.steps.map((step) => `### ${step.id} [${step.source}]
 - **Instruction**: ${step.instruction}
 - **Expected**: ${step.expected}`).join("\n\n");
-	const prereqText = spec.prerequisites ? `## Prerequisites\n${spec.prerequisites}\n\n` : "";
-	const relatedPathsBlock = emitRelatedPaths ? buildRelatedPathsInstruction() : "";
+	const relatedPathsBlock = buildRelatedPathsInstruction();
 	return `You are an expert QA engineer executing a browser E2E test. Execute each step precisely and record every browser action as a structured log line.
 ## Session
@@ -48,7 +56,8 @@ agent-browser --session SESSION uncheck "<selector>"
 agent-browser --session SESSION press <Key>
 agent-browser --session SESSION select "<selector>" "<value>"
 agent-browser --session SESSION hover "<selector>"
-agent-browser --session SESSION wait --text "<text>"
+agent-browser --session SESSION wait --text "<text>" [--timeout <ms>]
+agent-browser --session SESSION wait "<selector>" [--timeout <ms>] [--state visible|hidden]
 agent-browser --session SESSION cookies clear
 \`\`\`
@@ -83,17 +92,18 @@ agent-browser --session SESSION cookies clear
 ## Test Specification
-Title: ${spec.title}
-Base URL: ${spec.baseUrl}
+Title: ${input.title}
-${prereqText}## Steps
+Each step's instruction names the URL to open directly (or via \`\${ENV_VAR}\`). Open exactly the URL the step says to open.
+## Steps
 ${stepsText}
 ## Execution Workflow
 For each step:
-1. Emit \`STEP_START|<step-id>|<step-title>\`
+1. Emit \`STEP_START|<step-id>|<short description of what this step does>\`
 2. Run \`snapshot\` and identify selectors from the ARIA tree
 3. Execute the action using an ALLOWED selector
 4. Emit \`AB_ACTION|...\` for every browser action (see below)
@@ -180,6 +190,15 @@ AB_ACTION|assert|<assertType>|<selector or "">|<value or "">|<observation>
 The selector in AB_ACTION must be one of the ALLOWED formats above.
+**CRITICAL — record only successful actions.** The AB_ACTION stream is the
+canonical replay sequence: every line in it must be reproducible on a fresh
+browser session. Therefore:
+- If you tried a selector and \`agent-browser\` returned a non-zero exit (selector not found, element not interactable, timeout): **do NOT emit \`AB_ACTION|...\`** for that attempt. Take a fresh snapshot, switch selector, and only emit the AB_ACTION for the call that finally succeeded.
+- If you explored multiple selectors for the same logical action (e.g. tried \`[aria-label='Email']\`, it failed, then \`[placeholder='Email']\` worked): emit AB_ACTION for the **working selector only**. The failed attempt must not appear in the trace.
+- The same rule applies to \`AB_ACTION|assert|...\` lines: only emit them for assertions you actually verified on the current page in the current snapshot. Never declare an assertion against a selector you have not just confirmed visible — even if you intended to use it earlier.
+- If a step ultimately fails after retries: emit \`ASSERTION_FAILED\` and STOP. Do NOT leave half-recorded actions for the failed step in the AB_ACTION stream.
 ## Assertion Protocol
 After verifying each step, emit \`AB_ACTION|assert\` lines for each signal you confirmed.
@@ -211,9 +230,36 @@ After verifying each step, emit \`AB_ACTION|assert\` lines for each signal you c
 **Selector rules for assert actions — CRITICAL:**
 - Use the **same ALLOWED formats** as browser actions — never invent aria-label values
 - Only use \`[aria-label='...']\` if that **exact** aria-label string appears in the current ARIA snapshot output
-- When unsure, prefer \`text_visible\`/\`text_not_visible\` (no selector needed) over guessing a selector
+- When unsure, prefer \`text_visible\`/\`text_not_visible\` (no selector needed) over guessing a selector — but still pre-verify with \`wait --text\` per the MUST-VERIFY rule below; \`alt\`-attribute "text" will not match.
 - For \`element_disabled\`/\`element_enabled\`: use a CSS class selector if no aria-label is confirmed in the snapshot
+**MUST-VERIFY rule — STRICT (applies to every assert except \`url_contains\`):**
+The \`snapshot\` output is the **accessibility tree**: a semantic view. \`agent-browser\` queries the **real DOM**. They DO NOT always match. Two known traps:
+1. *Selector trap*: a snapshot row like \`textbox "Email address"\` is reachable via \`[placeholder='...']\` but **NOT** via \`[aria-label='...']\` if no \`aria-label\` attribute is actually set — the browser inferred the label from \`<label for=>\` / surrounding text / \`placeholder\`.
+2. *Text trap*: a snapshot row like \`link "Dashboard"\` may come from \`<a><img alt="Dashboard"></a>\` — the visible "text" is an \`alt\` attribute, not a text node. \`text_visible\` (which scans visible text nodes via \`wait --text\`) will NOT find it.
+Before emitting an \`AB_ACTION|assert|...\` line, **verify the assertion form actually resolves on the live page**:
+\`\`\`bash
+# element_visible / element_enabled / element_disabled / element_checked / element_unchecked
+agent-browser --session SESSION wait "<selector>" --timeout 3000
+# element_not_visible
+agent-browser --session SESSION wait "<selector>" --state hidden --timeout 3000
+# text_visible
+agent-browser --session SESSION wait --text "<text>" --timeout 3000
+# text_not_visible
+agent-browser --session SESSION wait --text "<text>" --state hidden --timeout 3000
+\`\`\`
+Apply the "record only successful actions" rule from the AB_ACTION section above. **Additionally**, when *no* form verifies — e.g. you tried \`[aria-label='X']\`, \`[placeholder='X']\`, and \`text=X\` and they all timed out, or the "text" turned out to be an \`alt\` / aria-label — **DROP the assertion entirely**. Fewer, real assertions beat invented ones that fail at replay. Prefer swapping a failed \`text_visible\` for an \`element_visible\` against the link/button selector when the visible label came from \`alt\` / aria-label.
+\`url_contains\` is exempt — it checks the current URL string, not the DOM/accessibility tree.
 **Examples:**
 \`\`\`
 AB_ACTION|assert|url_contains|||/dashboard|Navigated to dashboard
@@ -229,7 +275,7 @@ AB_ACTION|assert|text_visible|||Success|Confirmation message appeared
 Emit exactly one status line per step (outside any code block):
 \`\`\`
-STEP_START|<step-id>|<step-title>
+STEP_START|<step-id>|<short description of what this step does>
 STEP_DONE|<step-id>|<what was verified>
 ASSERTION_FAILED|<step-id>|<category: app-bug|env-issue|auth-blocked|missing-test-data|selector-drift|agent-misread>: <reason>
 STEP_SKIPPED|<step-id>|<reason>
@@ -242,37 +288,29 @@ RUN_COMPLETED|failed|<summary>
 After each step (outside any code block):
 \`\`\`
-ROUTE_STEP|<step-id>|<step-title>|ACTION:<what you did>|OBSERVATION:<what you verified>|STATUS:<PASSED|FAILED|SKIPPED>
+ROUTE_STEP|<step-id>|<short description>|ACTION:<what you did>|OBSERVATION:<what you verified>|STATUS:<PASSED|FAILED|SKIPPED>
 \`\`\`
 ${relatedPathsBlock}## Start
-${skipCookiesClear ? `A setup procedure has already been executed in this session. Do NOT clear cookies — keep the existing session state.
+Begin by clearing cookies, then proceed straight to the first step's instruction.
 \`\`\`bash
-agent-browser --session ${sessionName} open ${spec.baseUrl}
-\`\`\`
-Emit:
-\`\`\`
-AB_ACTION|open|${spec.baseUrl}
-\`\`\`` : `\`\`\`bash
 agent-browser --session ${sessionName} cookies clear
-agent-browser --session ${sessionName} open ${spec.baseUrl}
 \`\`\`
 Emit:
 \`\`\`
 AB_ACTION|cookies_clear
-AB_ACTION|open|${spec.baseUrl}
-\`\`\``}
+\`\`\`
-Then emit \`STEP_START|step-01|...\` and begin.`;
+Then emit \`STEP_START|step-01|...\` and execute the first step. The first step is responsible for opening the initial URL.
+`;
 }
 function buildRelatedPathsInstruction() {
 	return `## Post-run: emit \`relatedPaths\` block
-After all steps are complete (regardless of pass/fail) and **before** \`RUN_COMPLETED\`, you MUST emit a single \`RELATED_PATHS\` block. The host (not you) writes these paths into the spec's frontmatter — your only job is to emit the block.
+After all steps are complete (regardless of pass/fail) and **before** \`RUN_COMPLETED\`, you MUST emit a single \`RELATED_PATHS\` block. The host (not you) writes these paths into the spec — your only job is to emit the block.
 \`relatedPaths\` is a list of glob patterns identifying the source files this spec depends on. CI uses them to decide whether a code change should trigger a drift check for this spec.
@@ -303,18 +341,8 @@ Emit the block outside any other code block, on its own lines. If the test could
 `;
 }
-function buildTracePrompt(spec) {
-	return `Execute the test for "${spec.title}" at ${spec.baseUrl}.`;
-}
-function buildSetupTraceSystemPrompt(spec) {
-	return buildTraceSystemPromptInner({
-		title: spec.title,
-		baseUrl: "about:blank",
-		steps: spec.steps
-	}, void 0, false);
-}
-function buildSetupTracePrompt(spec) {
-	return `Execute the setup procedure "${spec.title}". Follow each step precisely.`;
+function buildTracePrompt(title) {
+	return `Execute the test for "${title}". Each step's instruction includes the URL or selector context it needs.`;
 }
 //#endregion
 //#region src/cli/logger.ts
@@ -390,6 +418,11 @@ async function invokeClaudeStreaming(options, onEvent) {
 	const { prompt, systemPrompt, allowedTools, disableBuiltinTools = false, maxTurns, env, model, cwd, onAbAction, onAbActionFailed, silenceBashLog = false } = options;
 	const resolvedModel = resolveModel(model);
 	let lastAbToolUseId = null;
+	const claimAbToolUse = (toolUseId) => {
+		if (toolUseId !== lastAbToolUseId) return false;
+		lastAbToolUseId = null;
+		return true;
+	};
 	const sdkOptions = {
 		systemPrompt,
 		maxTurns,
@@ -424,13 +457,17 @@ async function invokeClaudeStreaming(options, onEvent) {
 				} else lastAbToolUseId = null;
 				return {};
 			}] }],
+			PostToolUse: [{ hooks: [async (input) => {
+				if (input.hook_event_name !== "PostToolUse") return {};
+				if (input.tool_name !== "Bash") return {};
+				if (!isBashToolResponseError(input.tool_response)) return {};
+				if (claimAbToolUse(input.tool_use_id) && onAbActionFailed) onAbActionFailed();
+				return {};
+			}] }],
 			PostToolUseFailure: [{ hooks: [async (input) => {
 				if (input.hook_event_name !== "PostToolUseFailure") return {};
 				if (input.tool_name !== "Bash") return {};
-				if (input.tool_use_id === lastAbToolUseId && onAbActionFailed) {
-					onAbActionFailed();
-					lastAbToolUseId = null;
-				}
+				if (claimAbToolUse(input.tool_use_id) && onAbActionFailed) onAbActionFailed();
 				return {};
 			}] }]
 		} : void 0
@@ -500,6 +537,26 @@ function isBlockedAbSubcommand(cmd) {
 	const sub = extractAbSubcommand(cmd);
 	return sub !== null && BLOCKED_AB_SUBCOMMANDS.has(sub);
 }
+/**
+* Detects "the Bash tool returned an error" from a SDK PostToolUse hook's
+* `tool_response`. The SDK can shape this two ways depending on how Claude
+* Code reports Bash failures:
+*
+*   - `{ is_error: true, ... }`              — the canonical Bash failure shape
+*   - `{ output, exitCode, killed?, ... }`   — the BashOutput shape; treat
+*                                              non-zero exit / kill as error
+*
+* We accept either. Anything else (including missing fields) is treated as a
+* successful response so we never roll back over an unrelated tool call.
+*/
+function isBashToolResponseError(tool_response) {
+	if (tool_response === null || typeof tool_response !== "object") return false;
+	const r = tool_response;
+	if (r["is_error"] === true) return true;
+	if (typeof r["exitCode"] === "number" && r["exitCode"] !== 0) return true;
+	if (r["killed"] === true) return true;
+	return false;
+}
 /** Returns true if any argument to an agent-browser command uses a @ref selector (e.g. @e14). */
 function hasRefSelector(cmd) {
 	const abIdx = cmd.indexOf("agent-browser");
@@ -561,87 +618,255 @@ async function* replayMockMessages(path) {
 	}
 }
 //#endregion
+//#region src/runtime/env-vars.ts
+const ENV_VAR_RE = /\$\{([A-Z_][A-Z0-9_]*)\}|\$([A-Z_][A-Z0-9_]*)/g;
+const ANY_VAR_RE = /\$\{([A-Za-z_][A-Za-z0-9_]*)\}|\$([A-Za-z_][A-Za-z0-9_]*)/g;
+/**
+* Replace every `$NAME` / `${NAME}` reference in `value` using `lookup`. When
+* `lookup` returns `undefined`, the original reference text is preserved
+* (callers that want empty-string substitution should wrap with `?? ""`).
+*/
+function substituteVars(value, lookup) {
+	ANY_VAR_RE.lastIndex = 0;
+	return value.replace(ANY_VAR_RE, (match, braced, plain) => {
+		const replacement = lookup(braced ?? plain ?? "");
+		return replacement === void 0 ? match : replacement;
+	});
+}
+/**
+* Resolve every `$VAR` / `${VAR}` reference against the current process env.
+*
+* Missing variables expand to the empty string, mirroring `sh` behaviour.
+* Throwing would force ccqa to be invoked with every var set even for
+* unused blocks, which is more user-hostile than letting the test fail
+* downstream with a clearer message ("login form rejected: empty password").
+*/
+function resolveEnvRefs(value) {
+	return value.replace(ENV_VAR_RE, (_, braced, plain) => {
+		const name = braced ?? plain ?? "";
+		return process.env[name] ?? "";
+	});
+}
+/**
+* Embed `$VAR` / `${VAR}` as a JS template-literal expression that reads
+* `process.env.VAR ?? ""` at runtime. Used by `ccqa generate` so the test
+* script never bakes in the secret value.
+*
+* Returns a JavaScript string-literal expression (template literal when env
+* refs are present, plain string literal otherwise).
+*
+* Examples:
+*   "${PASSWORD}"             -> '`${process.env.PASSWORD ?? ""}`'
+*   "user-${SUFFIX}@x.com"    -> '`user-${process.env.SUFFIX ?? ""}@x.com`'
+*   "literal value"           -> '"literal value"'
+*/
+function envRefsToJsExpression(value) {
+	return refsToJsExpression(value, () => null);
+}
+/**
+* Generalised version of `envRefsToJsExpression`. Each `$NAME` / `${NAME}`
+* reference in `value` is passed to `nameToExpr(name)` first:
+*
+* - If it returns a string, that string is interpolated as a JS expression
+*   (no quoting / no `?? ""` wrap — the caller decides the shape).
+* - If it returns `null`, the reference is treated as a missing env var
+*   and expands to `process.env.<NAME> ?? ""` (the legacy behaviour).
+*
+* Used by the block codegen path: param names map to `params.<name>`,
+* everything else falls through to `process.env.X ?? ""`.
+*/
+function refsToJsExpression(value, nameToExpr) {
+	ANY_VAR_RE.lastIndex = 0;
+	if (!ANY_VAR_RE.test(value)) return JSON.stringify(value);
+	const escaped = value.replace(/\\/g, "\\\\").replace(/`/g, "\\`").replace(/\$\{/g, (_match, offset, source) => {
+		const probe = new RegExp(ANY_VAR_RE.source, "g");
+		let m;
+		while ((m = probe.exec(source)) !== null) if (m.index === offset) return "${";
+		return "\\${";
+	});
+	ANY_VAR_RE.lastIndex = 0;
+	return `\`${escaped.replace(ANY_VAR_RE, (_match, braced, plain) => {
+		const name = braced ?? plain ?? "";
+		const expr = nameToExpr(name);
+		return expr !== null ? `\${${expr}}` : `\${process.env.${name} ?? ""}`;
+	})}\``;
+}
+//#endregion
+//#region src/spec/yaml-schema.ts
+/**
+* An action step: one user-facing browser interaction. `instruction` and
+* `expected` are the natural-language description handed to Claude during
+* `ccqa trace`. URLs live inside `instruction`, either verbatim or via
+* `${ENV_VAR}` references (resolved at runtime).
+*/
+const ActionStepSchema = z.object({
+	instruction: z.string().min(1),
+	expected: z.string().min(1)
+}).strict();
+/**
+* An include step: invokes a reusable block (`.ccqa/blocks/<name>/spec.yaml`).
+* `params` values are plain strings; env refs (`${VAR}`) inside them are
+* resolved at expand time the same way step instructions are.
+*/
+const IncludeStepSchema = z.object({
+	include: z.string().min(1),
+	params: z.record(z.string(), z.string()).optional()
+}).strict();
+/**
+* A spec step is either an action step or an include step. The two are
+* discriminated by the presence of the `include` key — see `isIncludeStep`.
+*/
+const StepSchema = z.union([ActionStepSchema, IncludeStepSchema]);
+/** Top-level spec schema. `.strict()` rejects any unknown key. */
+const TestSpecSchema = z.object({
+	title: z.string().min(1),
+	relatedPaths: z.array(z.string().min(1)).optional(),
+	steps: z.array(StepSchema).min(1)
+}).strict();
+/**
+* A block param declaration. `required` defaults to true; only explicit
+* `required: false` makes it optional. `secret: true` flags the value as
+* sensitive — codegen renders such values as `process.env.<NAME> ?? ""`
+* template literals so the secret never ends up baked into test.spec.ts.
+* `dummy` is a placeholder value surfaced by the draft / drift prompts
+* (which see the block in isolation, before any include site exists);
+* `description` is the param's semantic role, also consumed by those
+* prompts and by spec authors browsing the block.
+*/
+const BlockParamSchema = z.object({
+	name: z.string().min(1),
+	required: z.boolean().optional(),
+	secret: z.boolean().optional(),
+	dummy: z.string().optional(),
+	description: z.string().optional()
+}).strict();
+/**
+* Block schema. Block steps are restricted to ActionStep — nested blocks are
+* forbidden. Including a block from inside another block fails parsing here
+* (the store layer maps the cryptic "Unrecognized key: 'include'" error into
+* a targeted nested-block message).
+*/
+const BlockSpecSchema = z.object({
+	title: z.string().min(1),
+	params: z.array(BlockParamSchema).optional(),
+	steps: z.array(ActionStepSchema).min(1)
+}).strict();
+/** Runtime predicate for the StepSchema union. */
+function isIncludeStep(step) {
+	return "include" in step;
+}
+/** Returns true if a block param is required (default: true). */
+function isParamRequired(param) {
+	return param.required !== false;
+}
+//#endregion
 //#region src/spec/parser.ts
-function parseTestSpec(content) {
-	const { data, content: body } = matter(content);
-	const steps = parseSteps(body);
-	const prerequisites = parsePrerequisites(body);
-	return {
-		title: String(data["title"] ?? "Untitled"),
-		baseUrl: String(data["baseUrl"] ?? "http://localhost:3000"),
-		prerequisites: prerequisites || void 0,
-		setups: parseSetupRefs(data["setups"]),
-		relatedPaths: parseRelatedPaths(data["relatedPaths"]),
-		steps
-	};
+/** Parse a spec.yaml. Schema rejections are rewritten with actionable messages. */
+function parseTestSpec(content, source = "spec.yaml") {
+	const raw = parseYamlOrThrow(content, source);
+	try {
+		return TestSpecSchema.parse(raw);
+	} catch (e) {
+		throw enrichZodError(e, source, false);
+	}
 }
-function parseRelatedPaths(raw) {
-	if (!Array.isArray(raw)) return void 0;
-	const paths = [];
-	for (const item of raw) if (typeof item === "string" && item.trim().length > 0) paths.push(item.trim());
-	return paths.length > 0 ? paths : void 0;
+/**
+* Parse a block's spec.yaml. Block-specific errors include the targeted
+* nested-block message (the underlying zod failure on an `include` key
+* inside a block step is hard to read).
+*/
+function parseBlockSpec(content, source = "block spec.yaml") {
+	const raw = parseYamlOrThrow(content, source);
+	try {
+		return BlockSpecSchema.parse(raw);
+	} catch (e) {
+		throw enrichZodError(e, source, true);
+	}
 }
-function parseSetupSpec(content) {
-	const { data, content: body } = matter(content);
-	const steps = parseSteps(body);
-	const placeholders = parsePlaceholders(data["placeholders"]);
-	return {
-		title: String(data["title"] ?? "Untitled"),
-		placeholders: Object.keys(placeholders).length > 0 ? placeholders : void 0,
-		steps
-	};
+function parseYamlOrThrow(content, source) {
+	try {
+		return parse(content);
+	} catch (e) {
+		throw new Error(`Failed to parse YAML (${source}): ${e.message}`);
+	}
 }
-function parsePlaceholders(raw) {
-	if (!raw || typeof raw !== "object") return {};
-	const result = {};
-	for (const [key, val] of Object.entries(raw)) if (val && typeof val === "object" && "dummy" in val) {
-		const v = val;
-		result[key] = {
-			dummy: String(v["dummy"]),
-			description: v["description"] ? String(v["description"]) : void 0
-		};
+function enrichZodError(error, source, isBlock) {
+	if (!(error instanceof ZodError)) return error;
+	const lines = [`Invalid ${source}:`];
+	for (const issue of error.issues) {
+		const path = issue.path.join(".") || "(root)";
+		const message = humanizeIssue(issue, isBlock);
+		lines.push(`  - ${path}: ${message}`);
 	}
-	return result;
-}
-function parseSetupRefs(raw) {
-	if (!Array.isArray(raw)) return void 0;
-	const refs = [];
-	for (const item of raw) if (typeof item === "object" && item !== null && "name" in item) {
-		const i = item;
-		refs.push({
-			name: String(i["name"]),
-			params: i["params"] && typeof i["params"] === "object" ? Object.fromEntries(Object.entries(i["params"]).map(([k, v]) => [k, String(v)])) : void 0
-		});
+	return new Error(lines.join("\n"));
+}
+function humanizeIssue(issue, isBlock) {
+	if (issue.code === "unrecognized_keys") {
+		const keys = Array.isArray(issue.keys) ? issue.keys : [];
+		if (isBlock && keys.includes("include")) return `Nested blocks are not supported — flatten by inlining the included block's steps into this block.`;
+		return `Unknown keys: ${keys.join(", ")}`;
 	}
-	return refs.length > 0 ? refs : void 0;
-}
-function parsePrerequisites(body) {
-	const match = body.match(/##\s+Prerequisites\s+([\s\S]*?)(?=##|$)/);
-	if (!match || !match[1]) return null;
-	return match[1].trim();
-}
-function parseSteps(body) {
-	const stepBlocks = body.split(/###\s+Step\s+\d+:/);
-	const steps = [];
-	for (let i = 1; i < stepBlocks.length; i++) {
-		const block = stepBlocks[i];
-		if (!block) continue;
-		const titleMatch = block.match(/^(.+)/);
-		const instructionMatch = block.match(/\*\*Instruction\*\*:\s*(.+)/);
-		const expectedMatch = block.match(/\*\*Expected\*\*:\s*(.+)/);
-		if (!titleMatch || !instructionMatch || !expectedMatch) continue;
-		steps.push({
-			id: `step-${String(i).padStart(2, "0")}`,
-			title: titleMatch[1]?.trim() ?? "",
-			instruction: instructionMatch[1]?.trim() ?? "",
-			expected: expectedMatch[1]?.trim() ?? ""
+	return issue.message;
+}
+//#endregion
+//#region src/spec/expand.ts
+/**
+* Walk the spec's top-level steps, inlining any `- include: <block>` reference
+* as the block's own steps in order. The result is a flat `step-NN`-numbered
+* sequence — block boundaries survive only as the `source` tag, so trace and
+* codegen never need a separate block code path.
+*/
+function expandSpec(spec, options) {
+	const out = [];
+	let counter = 0;
+	const allocId = () => {
+		counter += 1;
+		return `step-${String(counter).padStart(2, "0")}`;
+	};
+	for (const step of spec.steps) if (isIncludeStep(step)) {
+		const block = resolveBlock(step.include, step.params ?? {}, options.blocks);
+		for (const blockStep of block.steps) out.push({
+			id: allocId(),
+			source: step.include,
+			instruction: substituteVars(blockStep.instruction, block.lookup),
+			expected: substituteVars(blockStep.expected, block.lookup)
 		});
-	}
-	return steps;
+	} else out.push({
+		id: allocId(),
+		source: "spec",
+		instruction: step.instruction,
+		expected: step.expected
+	});
+	return out;
+}
+function resolveBlock(blockName, rawParams, blocks) {
+	const block = blocks.get(blockName);
+	if (!block) throw new Error(`Unknown block: "${blockName}". Define it under .ccqa/blocks/${blockName}/spec.yaml.`);
+	const declaredParams = new Map((block.params ?? []).map((p) => [p.name, p]));
+	for (const key of Object.keys(rawParams)) if (!declaredParams.has(key)) throw new Error(`Block "${blockName}" received unknown param "${key}". Declared params: ${[...declaredParams.keys()].join(", ") || "(none)"}.`);
+	for (const [pname, def] of declaredParams) if (isParamRequired(def) && !(pname in rawParams)) throw new Error(`Block "${blockName}" is missing required param "${pname}".`);
+	const lookup = (name) => {
+		if (Object.prototype.hasOwnProperty.call(rawParams, name)) return rawParams[name];
+	};
+	return {
+		steps: block.steps,
+		lookup
+	};
+}
+/**
+* Collect every block name referenced by a spec (top-level only — blocks
+* cannot nest). Used by the store / drift layers to know which blocks to
+* load or invalidate.
+*/
+function collectIncludedBlockNames(spec) {
+	const names = /* @__PURE__ */ new Set();
+	for (const step of spec.steps) if (isIncludeStep(step)) names.add(step.include);
+	return [...names];
 }
 //#endregion
 //#region src/store/index.ts
 const CCQA_DIR = ".ccqa";
+const SPEC_FILE = "spec.yaml";
 function getCcqaDir(cwd = process.cwd()) {
 	return join(cwd, CCQA_DIR);
 }
@@ -669,39 +894,44 @@ function getSpecDir(featureName, specName, cwd) {
 }
 async function ensureCcqaDir(cwd) {
 	await mkdir(join(getCcqaDir(cwd), "features"), { recursive: true });
+	await mkdir(join(getCcqaDir(cwd), "blocks"), { recursive: true });
 }
 async function readSpecFile(featureName, specName, cwd) {
-	const specPath = join(getSpecDir(featureName, specName, cwd), "test-spec.md");
+	const specPath = join(getSpecDir(featureName, specName, cwd), SPEC_FILE);
 	return readFile(specPath, "utf-8").catch(() => {
 		throw new Error(`Spec file not found: ${specPath}`);
 	});
 }
 async function tryReadSpecFile(featureName, specName, cwd) {
-	return readFile(join(getSpecDir(featureName, specName, cwd), "test-spec.md"), "utf-8").catch(() => null);
+	return readFile(join(getSpecDir(featureName, specName, cwd), SPEC_FILE), "utf-8").catch(() => null);
 }
 async function saveSpecFile(featureName, specName, content, cwd) {
 	const specDir = getSpecDir(featureName, specName, cwd);
 	await mkdir(specDir, { recursive: true });
-	const specPath = join(specDir, "test-spec.md");
+	const specPath = join(specDir, SPEC_FILE);
 	await writeFile(specPath, content.endsWith("\n") ? content : content + "\n", "utf-8");
 	return specPath;
 }
 /**
-* Replace (or insert) the `relatedPaths` key in the spec's YAML frontmatter.
-* Preserves every other frontmatter key and the entire body. Returns the
-* absolute path that was written, or null if the spec file does not exist.
+* Replace (or insert) the `relatedPaths` key in the spec. Preserves every
+* other top-level field and the entire steps array. Returns the absolute
+* path that was written, or null if the spec file does not exist.
 */
 async function updateSpecRelatedPaths(featureName, specName, relatedPaths, cwd) {
-	const specPath = join(getSpecDir(featureName, specName, cwd), "test-spec.md");
+	const specPath = join(getSpecDir(featureName, specName, cwd), SPEC_FILE);
 	const existing = await readFile(specPath, "utf-8").catch(() => null);
 	if (existing === null) return null;
-	const parsed = matter(existing);
-	const data = { ...parsed.data };
-	if (relatedPaths.length > 0) data["relatedPaths"] = relatedPaths;
-	else delete data["relatedPaths"];
-	await writeFile(specPath, matter.stringify(parsed.content, data), "utf-8");
+	await writeFile(specPath, stringify(stripUndefined({
+		...parseTestSpec(existing, specPath),
+		relatedPaths: relatedPaths.length > 0 ? relatedPaths : void 0
+	}), { lineWidth: 0 }), "utf-8");
 	return specPath;
 }
+function stripUndefined(obj) {
+	const out = {};
+	for (const [k, v] of Object.entries(obj)) if (v !== void 0) out[k] = v;
+	return out;
+}
 async function saveRoute(featureName, specName, route, cwd) {
 	const specDir = getSpecDir(featureName, specName, cwd);
 	await mkdir(specDir, { recursive: true });
@@ -716,38 +946,72 @@ async function saveTraceActions(featureName, specName, actions, cwd) {
 	await writeFile(actionsPath, JSON.stringify(actions, null, 2), "utf-8");
 	return actionsPath;
 }
-function getSetupDir(name, cwd) {
-	return join(getCcqaDir(cwd), "setups", name);
+function getBlocksDir(cwd) {
+	return join(getCcqaDir(cwd), "blocks");
 }
-async function readSetupSpecFile(name, cwd) {
-	const specPath = join(getSetupDir(name, cwd), "setup-spec.md");
-	return readFile(specPath, "utf-8").catch(() => {
-		throw new Error(`Setup spec not found: ${specPath}`);
-	});
+/**
+* Inverse of `getBlockDir`. Given a file path that appears in a git diff,
+* return the block name if the path points at the block's spec.yaml, else
+* null. Used by `drift --changed` to invalidate specs whose included blocks
+* were edited. (v0.4 inlines blocks into every spec's own trace, so the
+* block directory holds only spec.yaml — no per-block actions.json / route
+* lives here anymore.)
+*/
+function parseBlockPath(path) {
+	return path.match(/(?:^|\/)\.ccqa\/blocks\/([^/]+)\/spec\.yaml$/)?.[1] ?? null;
 }
-async function saveSetupActions(name, actions, cwd) {
-	const dir = getSetupDir(name, cwd);
-	await mkdir(dir, { recursive: true });
-	const path = join(dir, "actions.json");
-	await writeFile(path, JSON.stringify(actions, null, 2), "utf-8");
-	return path;
+/**
+* Load every block under `.ccqa/blocks/<name>/spec.yaml`. Used by the trace /
+* generate / drift entry points to validate include references at parse time.
+*
+* A malformed block is fatal — surfaces as a thrown Error with the path that
+* failed. Missing block directories (no `spec.yaml`) are silently skipped so
+* stray files don't break the loader.
+*/
+async function loadAllBlocks(cwd) {
+	const dir = getBlocksDir(cwd);
+	const names = await readdir(dir).catch(() => []);
+	const entries = await Promise.all(names.map(async (name) => {
+		const path = join(dir, name, SPEC_FILE);
+		const content = await readFile(path, "utf-8").catch(() => null);
+		return content === null ? null : [name, parseBlockSpec(content, path)];
+	}));
+	return new Map(entries.filter((e) => e !== null));
 }
-async function getSetupActions(name, cwd) {
-	const path = join(getSetupDir(name, cwd), "actions.json");
-	const content = await readFile(path, "utf-8").catch(() => {
-		throw new Error(`No setup actions found for: ${name}. Run \`ccqa trace-setup ${name}\` first.`);
-	});
-	return {
-		path,
-		actions: JSON.parse(content)
-	};
+/**
+* Project the parsed blocks into the shape the draft / drift prompts consume.
+* Co-located with `loadAllBlocks` so callers don't have to remember the
+* isParamRequired / secret-default mapping.
+*/
+async function loadAvailableBlocks(cwd) {
+	return [...(await loadAllBlocks(cwd)).entries()].map(([name, block]) => ({
+		name,
+		title: block.title,
+		params: (block.params ?? []).map((p) => ({
+			name: p.name,
+			required: isParamRequired(p),
+			secret: p.secret === true
+		}))
+	}));
 }
-async function saveSetupRoute(name, route, cwd) {
-	const dir = getSetupDir(name, cwd);
-	await mkdir(dir, { recursive: true });
-	const routePath = join(dir, "route.md");
-	await writeFile(routePath, routeToMarkdown(route), "utf-8");
-	return routePath;
+/**
+* Probe for orphaned files left over from earlier ccqa versions inside
+* `.ccqa/blocks/<name>/`. Both pre-v0.4 `test.spec.ts` (function-export
+* blocks) and the short-lived `actions.json` / `route.md` (recorded-block
+* variant) are dead in the new "blocks are pure spec templates" model and
+* should be deleted manually. Returns the absolute paths.
+*/
+async function findStaleBlockArtifacts(cwd) {
+	const dir = getBlocksDir(cwd);
+	const names = await readdir(dir).catch(() => []);
+	return (await Promise.all(names.flatMap((name) => [
+		"test.spec.ts",
+		"actions.json",
+		"route.md"
+	].map(async (f) => {
+		const path = join(dir, name, f);
+		return await stat(path).then(() => true).catch(() => false) ? path : null;
+	})))).filter((p) => p !== null);
 }
 async function getTraceActions(featureName, specName, cwd) {
 	const path = join(getSpecDir(featureName, specName, cwd), "actions.json");
@@ -789,8 +1053,7 @@ async function listSpecsForFeature(featureName, cwd) {
 }
 /**
 * Lists every feature/spec dir under .ccqa/features/, regardless of whether
-* the spec is fully drafted yet. Each spec file is read at most once: title
-* and relatedPaths are both extracted from the same parse.
+* the spec is fully drafted yet. Each spec file is read at most once.
 */
 async function listFeatureTree(cwd) {
 	const featuresDir = join(getCcqaDir(cwd), "features");
@@ -801,18 +1064,19 @@ async function listFeatureTree(cwd) {
 		return {
 			featureName,
 			specs: await Promise.all(specDirs.map(async (specName) => {
-				const content = await readFile(join(testCasesDir, specName, "test-spec.md"), "utf-8").catch(() => null);
+				const specFile = join(testCasesDir, specName, SPEC_FILE);
+				const content = await readFile(specFile, "utf-8").catch(() => null);
 				if (content === null) return {
 					specName,
 					hasSpecFile: false
 				};
 				try {
-					const spec = parseTestSpec(content);
+					const spec = parseTestSpec(content, specFile);
 					const entry = {
 						specName,
-						hasSpecFile: true
+						hasSpecFile: true,
+						includedBlocks: collectIncludedBlockNames(spec)
 					};
-					if (spec.title && spec.title !== "Untitled") entry.title = spec.title;
 					if (spec.relatedPaths) entry.relatedPaths = spec.relatedPaths;
 					return entry;
 				} catch {
@@ -845,6 +1109,20 @@ function routeToMarkdown(route) {
 	return lines.join("\n");
 }
 //#endregion
+//#region src/cli/stale-blocks.ts
+/**
+* Hint when stale per-block artifacts (`test.spec.ts`, `actions.json`,
+* `route.md`) from earlier ccqa versions are still present. v0.4 treats
+* blocks as pure spec templates — they no longer have their own executable
+* or recorded artifacts, so these files are dead code and should be deleted
+* manually. Shared by `trace` and `generate`.
+*/
+async function warnStaleBlockArtifacts() {
+	const stale = await findStaleBlockArtifacts();
+	if (stale.length === 0) return;
+	for (const p of stale) hint(`stale block artifact detected: ${p} — v0.4 no longer uses these; delete it manually.`);
+}
+//#endregion
 //#region src/drift/parse-related-paths.ts
 /**
 * Pull a `RELATED_PATHS_BEGIN ... RELATED_PATHS_END` block out of the trace
@@ -869,103 +1147,8 @@ function parseRelatedPathsBlock(text) {
 	return out;
 }
 //#endregion
-//#region src/runtime/bundled-config.ts
-const CANDIDATES = [
-	"../runtime/vitest.config.mjs",
-	"./vitest.config.mjs",
-	"./vitest.config.ts"
-];
-function bundledVitestConfigPath() {
-	for (const rel of CANDIDATES) {
-		const candidate = fileURLToPath(new URL(rel, import.meta.url));
-		try {
-			accessSync(candidate);
-			return candidate;
-		} catch {}
-	}
-	return fileURLToPath(new URL("./vitest.config.ts", import.meta.url));
-}
-//#endregion
-//#region src/runtime/spawn-vitest.ts
-const require$2 = createRequire(import.meta.url);
-function resolveVitestBin() {
-	const pkgPath = require$2.resolve("vitest/package.json");
-	const pkg = require$2(pkgPath);
-	const binRel = typeof pkg.bin === "string" ? pkg.bin : pkg.bin?.vitest;
-	if (!binRel) throw new Error(`vitest package.json has no bin entry (resolved at ${pkgPath})`);
-	return resolve(dirname(pkgPath), binRel);
-}
-async function spawnVitestCaptured(args, opts = {}) {
-	const child = spawnVitestChild(args, opts, "pipe");
-	const [stdout, stderr, exitCode] = await Promise.all([
-		drain(child.stdout),
-		drain(child.stderr),
-		waitExit(child)
-	]);
-	return {
-		exitCode,
-		stdout,
-		stderr
-	};
-}
-async function spawnVitestTeed(args, opts = {}) {
-	const child = spawnVitestChild(args, opts, "pipe");
-	const [stdout, stderr, exitCode] = await Promise.all([
-		teeDrain(child.stdout, process.stdout),
-		teeDrain(child.stderr, process.stderr),
-		waitExit(child)
-	]);
-	return {
-		exitCode,
-		stdout,
-		stderr
-	};
-}
-function spawnVitestStreaming(args, opts = {}) {
-	const child = spawnVitestChild(args, opts, "pipe");
-	return {
-		child,
-		stdout: child.stdout,
-		stderr: child.stderr,
-		exited: waitExit(child)
-	};
-}
-function spawnVitestChild(args, opts, stdio) {
-	const vitestBin = resolveVitestBin();
-	return spawn(process.execPath, [vitestBin, ...args], {
-		cwd: opts.cwd,
-		env: opts.env ?? process.env,
-		stdio: [
-			"ignore",
-			stdio,
-			stdio
-		]
-	});
-}
-async function drain(stream) {
-	stream.setEncoding("utf8");
-	let buf = "";
-	for await (const chunk of stream) buf += chunk;
-	return buf;
-}
-async function teeDrain(stream, sink) {
-	stream.setEncoding("utf8");
-	let buf = "";
-	for await (const chunk of stream) {
-		buf += chunk;
-		sink.write(chunk);
-	}
-	return buf;
-}
-function waitExit(child) {
-	return new Promise((resolvePromise, rejectPromise) => {
-		child.once("exit", (code) => resolvePromise(code ?? 0));
-		child.once("error", rejectPromise);
-	});
-}
-//#endregion
 //#region src/runtime/agent-browser-bin.ts
-const require$1 = createRequire(import.meta.url);
+const require$2 = createRequire(import.meta.url);
 function hasAgentBrowserShim(dir) {
 	try {
 		statSync(join(dir, "agent-browser"));
@@ -999,10 +1182,10 @@ function findNodeModulesBin(start) {
 function resolveAgentBrowserBinDir() {
 	const fromCwd = findNodeModulesBin(process.cwd());
 	if (fromCwd) return fromCwd;
-	const fromSelf = findNodeModulesBin(dirname(require$1.resolve("agent-browser/package.json")));
+	const fromSelf = findNodeModulesBin(dirname(require$2.resolve("agent-browser/package.json")));
 	if (fromSelf) return fromSelf;
 	try {
-		const candidate = join(dirname(require$1.resolve("agent-browser/package.json")), "node_modules", ".bin");
+		const candidate = join(dirname(require$2.resolve("agent-browser/package.json")), "node_modules", ".bin");
 		if (hasAgentBrowserShim(candidate)) return candidate;
 	} catch {}
 	return null;
@@ -1062,54 +1245,197 @@ function formatAgentBrowserUnavailableMessage() {
 	].join("\n");
 }
 //#endregion
-//#region src/runtime/env-vars.ts
-const ENV_VAR_RE = /\$\{([A-Z_][A-Z0-9_]*)\}|\$([A-Z_][A-Z0-9_]*)/g;
+//#region src/runtime/replay-validate.ts
+const SHORT_TIMEOUT_MS = 5e3;
+const ASSERT_TIMEOUT_MS = 1e4;
 /**
-* Returns true if the value contains at least one `$VAR` or `${VAR}` reference.
+* Convert one recorded action into the `agent-browser` arg list that would
+* exercise it. Returns `null` for actions that should not be validated
+* (snapshot has no side effect; assert types whose codegen forms aren't
+* directly verifiable here fall through to the caller's `unverifiable`
+* fallback).
 */
-function hasEnvRef(value) {
-	ENV_VAR_RE.lastIndex = 0;
-	return ENV_VAR_RE.test(value);
+function actionToAbArgs(action, sessionName) {
+	const base = ["--session", sessionName];
+	const sub = (s) => s === void 0 ? "" : resolveEnvRefs(s);
+	switch (action.command) {
+		case "cookies_clear": return [
+			...base,
+			"cookies",
+			"clear"
+		];
+		case "open": return [
+			...base,
+			"open",
+			sub(action.value).replace(/^["']|["']$/g, "")
+		];
+		case "click": return [
+			...base,
+			"click",
+			sub(action.selector)
+		];
+		case "dblclick": return [
+			...base,
+			"dblclick",
+			sub(action.selector)
+		];
+		case "fill":
+		case "type": return [
+			...base,
+			"fill",
+			sub(action.selector),
+			sub(action.value)
+		];
+		case "check": return [
+			...base,
+			"check",
+			sub(action.selector)
+		];
+		case "uncheck": return [
+			...base,
+			"uncheck",
+			sub(action.selector)
+		];
+		case "press": return [
+			...base,
+			"press",
+			sub(action.value)
+		];
+		case "select": return [
+			...base,
+			"select",
+			sub(action.selector),
+			sub(action.value)
+		];
+		case "hover": return [
+			...base,
+			"hover",
+			sub(action.selector)
+		];
+		case "scroll": {
+			const args = [action.direction ?? "down", ...action.pixels ? [action.pixels] : []];
+			return [
+				...base,
+				"scroll",
+				...args
+			];
+		}
+		case "drag": return [
+			...base,
+			"drag",
+			sub(action.selector),
+			sub(action.target)
+		];
+		case "wait": {
+			const raw = sub(action.selector);
+			if (!raw) return null;
+			if (/^\d+$/.test(raw)) return null;
+			if (raw.startsWith("text=")) return [
+				...base,
+				"wait",
+				"--text",
+				raw.slice(5),
+				"--timeout",
+				String(SHORT_TIMEOUT_MS)
+			];
+			return [
+				...base,
+				"wait",
+				raw,
+				"--timeout",
+				String(SHORT_TIMEOUT_MS)
+			];
+		}
+		case "snapshot": return null;
+		case "assert": return assertToAbArgs(action, sub, sessionName);
+	}
 }
-/**
-* Resolve every `$VAR` / `${VAR}` reference against the current process env.
-*
-* Missing variables expand to the empty string, mirroring `sh` behaviour.
-* Throwing would force ccqa to be invoked with every var set even for
-* unused setups, which is more user-hostile than letting the test fail
-* downstream with a clearer message ("login form rejected: empty password").
-*/
-function resolveEnvRefs(value) {
-	return value.replace(ENV_VAR_RE, (_, braced, plain) => {
-		const name = braced ?? plain ?? "";
-		return process.env[name] ?? "";
-	});
+function assertToAbArgs(action, sub, sessionName) {
+	const base = ["--session", sessionName];
+	const val = sub(action.value ?? action.observation);
+	const sel = sub(action.selector ?? action.observation);
+	switch (action.assertType) {
+		case "text_visible":
+			if (!val) return null;
+			return [
+				...base,
+				"wait",
+				"--text",
+				val,
+				"--timeout",
+				String(ASSERT_TIMEOUT_MS)
+			];
+		case "text_not_visible": return null;
+		case "element_visible":
+			if (!sel) return null;
+			return [
+				...base,
+				"wait",
+				sel,
+				"--timeout",
+				String(ASSERT_TIMEOUT_MS)
+			];
+		case "element_not_visible": return null;
+		case "url_contains": return null;
+		case "element_enabled":
+		case "element_disabled":
+		case "element_checked":
+		case "element_unchecked":
+			if (!sel || sel.startsWith("text=") || sel.startsWith("[aria-label=")) return null;
+			return [
+				...base,
+				"wait",
+				sel,
+				"--timeout",
+				String(ASSERT_TIMEOUT_MS)
+			];
+		default: return null;
+	}
+}
+function validateActions(actions, opts) {
+	const kept = [];
+	const dropped = [];
+	let skipUntilSideEffect = false;
+	for (let i = 0; i < actions.length; i++) {
+		const action = actions[i];
+		if (skipUntilSideEffect && isPassiveCommand(action.command)) {
+			dropped.push({
+				index: i,
+				action,
+				reason: "skipped after a preceding action failed"
+			});
+			continue;
+		}
+		skipUntilSideEffect = false;
+		const args = actionToAbArgs(action, opts.sessionName);
+		if (args === null) {
+			kept.push(action);
+			continue;
+		}
+		const result = spawnAB(args);
+		if (result.status === 0) {
+			kept.push(action);
+			continue;
+		}
+		dropped.push({
+			index: i,
+			action,
+			reason: (result.stderr.trim() || result.stdout.trim() || `agent-browser exit ${result.status ?? "?"}`).slice(0, 200)
+		});
+		skipUntilSideEffect = true;
+	}
+	return {
+		kept,
+		dropped
+	};
 }
 /**
-* Embed `$VAR` / `${VAR}` as a JS template-literal expression that reads
-* `process.env.VAR ?? ""` at runtime. Used by `ccqa generate` so the test
-* script never bakes in the secret value.
-*
-* Returns a JavaScript string-literal expression (template literal when env
-* refs are present, plain string literal otherwise).
-*
-* Examples:
-*   "${PASSWORD}"             -> '`${process.env.PASSWORD ?? ""}`'
-*   "user-${SUFFIX}@x.com"    -> '`user-${process.env.SUFFIX ?? ""}@x.com`'
-*   "literal value"           -> '"literal value"'
+* Passive (read-only) commands whose only effect is observation. When a
+* preceding action fails, dropping these too is the right move because
+* they were trying to observe state the failed action would have set up.
 */
-function envRefsToJsExpression(value) {
-	if (!hasEnvRef(value)) return JSON.stringify(value);
-	const escaped = value.replace(/\\/g, "\\\\").replace(/`/g, "\\`").replace(/\$\{/g, (match, offset, source) => {
-		ENV_VAR_RE.lastIndex = 0;
-		let m;
-		while ((m = ENV_VAR_RE.exec(source)) !== null) if (m.index === offset) return "${";
-		return "\\${";
-	});
-	ENV_VAR_RE.lastIndex = 0;
-	return `\`${escaped.replace(ENV_VAR_RE, (_, braced, plain) => {
-		return `\${process.env.${braced ?? plain ?? ""} ?? ""}`;
-	})}\``;
+function isPassiveCommand(cmd) {
+	return cmd === "snapshot" || cmd === "wait" || cmd === "assert";
 }
 //#endregion
 //#region src/cli/trace.ts
@@ -1129,30 +1455,35 @@ async function runTrace(featureName, specName, model) {
 		throw e;
 	}
 	await ensureCcqaDir();
+	await warnStaleBlockArtifacts();
 	const spec = parseTestSpec(await readSpecFile(featureName, specName));
-	const hasSetups = (spec.setups?.length ?? 0) > 0;
+	const expanded = expandSpec(spec, { blocks: await loadAllBlocks() });
 	meta("spec", spec.title);
-	meta("url", spec.baseUrl);
-	if (hasSetups) meta("setups", spec.setups.map((s) => s.name).join(", "));
-	meta("steps", spec.steps.length);
+	meta("steps", expanded.length);
+	const includes = collectIncludedBlockNames(spec);
+	if (includes.length > 0) meta("blocks", includes.join(", "));
 	blank();
 	const sessionName = generateSessionName();
-	if (hasSetups) {
-		info("Running setup procedures...");
-		await runSetups(spec.setups, sessionName);
-		blank();
-	}
-	const systemPrompt = buildTraceSystemPrompt(spec, {
-		sessionName,
-		skipCookiesClear: hasSetups
+	const systemPrompt = buildTraceSystemPrompt({
+		title: spec.title,
+		steps: expanded,
+		sessionName
 	});
-	const prompt = buildTracePrompt(spec);
+	const prompt = buildTracePrompt(spec.title);
 	info("Running agent-browser session...");
 	blank();
 	const routeSteps = [];
 	let overallStatus = "passed";
 	const traceActions = [];
+	let currentStepId;
 	let relatedPathsBuffer = null;
+	const withStepId = (action) => {
+		if (!action) return null;
+		return currentStepId ? {
+			...action,
+			stepId: currentStepId
+		} : action;
+	};
 	const { isError } = await invokeClaudeStreaming({
 		prompt,
 		systemPrompt,
@@ -1168,7 +1499,7 @@ async function runTrace(featureName, specName, model) {
 		},
 		model,
 		onAbAction: (abAction) => {
-			const action = parseAbAction(abAction);
+			const action = withStepId(parseAbAction(abAction));
 			if (action) traceActions.push(action);
 		},
 		onAbActionFailed: () => {
@@ -1184,10 +1515,14 @@ async function runTrace(featureName, specName, model) {
 				const idx = text.indexOf("RELATED_PATHS_BEGIN");
 				if (idx !== -1) relatedPathsBuffer = text.slice(idx) + "\n";
 			}
-			const statusLine = parseStatusLine(text);
-			if (statusLine) step(statusLine.type, statusLine.stepId, statusLine.detail);
 			for (const line of text.split("\n")) {
 				const trimmed = line.trim();
+				const status = parseStatusLine(line);
+				if (status) {
+					if (status.type === "STEP_START" && status.stepId) currentStepId = status.stepId;
+					step(status.type, status.stepId, status.detail);
+					continue;
+				}
 				if (trimmed.startsWith("ROUTE_STEP|")) {
 					const routeStep = parseRouteStep(trimmed);
 					if (routeStep) {
@@ -1195,24 +1530,25 @@ async function runTrace(featureName, specName, model) {
 						if (routeStep.status === "FAILED") overallStatus = "failed";
 					}
 				} else if (trimmed.startsWith("AB_ACTION|snapshot|") || trimmed.startsWith("AB_ACTION|assert|")) {
-					const action = parseAbAction(trimmed);
+					const action = withStepId(parseAbAction(trimmed));
 					if (action) traceActions.push(action);
 				}
 			}
 		}
 	});
 	if (isError) overallStatus = "failed";
+	const validatedActions = validateAndReport(traceActions);
 	const route = {
 		specName,
 		timestamp: (/* @__PURE__ */ new Date()).toISOString(),
 		status: overallStatus,
 		steps: routeSteps
 	};
-	const [routePath, actionsPath] = await Promise.all([saveRoute(featureName, specName, route), saveTraceActions(featureName, specName, traceActions)]);
+	const [routePath, actionsPath] = await Promise.all([saveRoute(featureName, specName, route), saveTraceActions(featureName, specName, validatedActions)]);
 	blank();
 	meta("route", routePath);
 	meta("saved", actionsPath);
-	meta("actions", traceActions.length);
+	meta("actions", validatedActions.length);
 	meta("status", overallStatus.toUpperCase());
 	const relatedPaths = relatedPathsBuffer !== null ? parseRelatedPathsBlock(relatedPathsBuffer) : null;
 	if (relatedPaths !== null) {
@@ -1222,34 +1558,23 @@ async function runTrace(featureName, specName, model) {
 	hint(`run 'ccqa generate ${featureName}/${specName}' to generate a test script`);
 }
 /**
-* Execute setup procedures by running their test.spec.ts via vitest with a fixed session name.
-* Creates a temporary runner script that sets the session and imports each setup's test body.
+* Run the post-trace replay validation and emit user-visible drop reports.
+* Splitting this out keeps `runTrace` readable; the function is pure aside
+* from `log.*` and the agent-browser invocations inside `validateActions`.
 */
-async function runSetups(setups, sessionName) {
-	for (const ref of setups) {
-		info(`  setup: ${ref.name}`);
-		const scriptPath = join(getSetupDir(ref.name), "test.spec.ts");
-		let script = await readFile(scriptPath, "utf-8").catch(() => {
-			throw new Error(`Setup test script not found: ${scriptPath}. Run \`ccqa generate-setup ${ref.name}\` first.`);
-		});
-		for (const [key, value] of Object.entries(ref.params ?? {})) script = script.replaceAll(`{{${key}}}`, resolveEnvRefs(value));
-		script = script.replace(/process\.env\.AGENT_BROWSER_SESSION\s*\|?\|?=\s*`.+`;/, `process.env.AGENT_BROWSER_SESSION = ${JSON.stringify(sessionName)};`);
-		const tmpPath = join(getSetupDir(ref.name), `_run.spec.ts`);
-		await writeFile(tmpPath, script, "utf-8");
-		try {
-			const { exitCode, stdout, stderr } = await spawnVitestCaptured([
-				"run",
-				"--config",
-				bundledVitestConfigPath(),
-				tmpPath
-			]);
-			process.stdout.write(stdout);
-			if (stderr) process.stderr.write(stderr);
-			if (exitCode !== 0) throw new Error(`Setup '${ref.name}' failed (exit ${exitCode})`);
-		} finally {
-			await unlink(tmpPath).catch(() => {});
-		}
+function validateAndReport(actions) {
+	if (actions.length === 0) return actions;
+	const sessionName = `${generateSessionName()}-validate`;
+	blank();
+	info("post-trace validation (replaying recorded actions)...");
+	const { kept, dropped } = validateActions(actions, { sessionName });
+	if (dropped.length === 0) {
+		meta("validated", `${kept.length}/${actions.length} kept`);
+		return kept;
 	}
+	for (const d of dropped) warn(`dropped action #${d.index + 1} (${d.action.command}${d.action.selector ? " " + d.action.selector : ""}): ${d.reason}`);
+	meta("validated", `${kept.length}/${actions.length} kept (${dropped.length} dropped)`);
+	return kept;
 }
 function parseStatusLine(text) {
 	for (const line of text.split("\n")) {
@@ -1346,21 +1671,32 @@ function parseAbAction(line) {
 }
 //#endregion
 //#region src/codegen/actions-to-script.ts
-function actionsToScript(actions, title, setupScripts) {
+function actionsToScript(input) {
+	const { actions, testName, stepMarkers = [] } = input;
 	const parts = [...[
 		`import { test } from "vitest";`,
 		`import { spawnSync } from "node:child_process";`,
-		`import { ab, abWait, abAssertTextVisible, abAssertVisible, abAssertNotVisible, abAssertUrl, abAssertEnabled, abAssertDisabled, abAssertChecked, abAssertUnchecked } from "ccqa/test-helpers";`,
+		`import { ${[
+			"ab",
+			"abWait",
+			"abAssertTextVisible",
+			"abAssertVisible",
+			"abAssertNotVisible",
+			"abAssertUrl",
+			"abAssertEnabled",
+			"abAssertDisabled",
+			"abAssertChecked",
+			"abAssertUnchecked"
+		].join(", ")} } from "ccqa/test-helpers";`,
 		"",
-		`// Single session shared across all tests — reset per run via cookies clear in first test.`,
-		`// Use ||= so an outer harness (e.g. ccqa generate's auto-fix loop) can pre-set the session`,
-		`// name and inspect the same session after the run finishes.`,
+		`// Single session shared across the run. Use ||= so an outer harness`,
+		`// (e.g. ccqa generate's auto-fix loop) can pre-set the session name`,
+		`// and inspect the same session after the run finishes.`,
 		`process.env.AGENT_BROWSER_SESSION ||= \`ccqa-run-\${Date.now()}\`;`,
 		""
 	]];
-	if (setupScripts?.length) for (const setup of setupScripts) parts.push(`test("setup: ${setup.name}", () => {`, setup.body, "}, 3 * 60 * 1000);", "");
-	const body = actionsToLines(actions).map((l) => `  ${l}`).join("\n");
-	parts.push(`test(${JSON.stringify(title)}, () => {`, body, "}, 5 * 60 * 1000);", "");
+	const body = actionsToLines(actions, stepMarkers).map((l) => `  ${l}`).join("\n");
+	parts.push(`test(${JSON.stringify(testName)}, () => {`, body, "}, 5 * 60 * 1000);", "");
 	return parts.join("\n");
 }
 /** Commands that interact with page elements and need the page to be loaded */
@@ -1375,11 +1711,18 @@ const ELEMENT_COMMANDS = new Set([
 	"hover",
 	"drag"
 ]);
-function actionsToLines(actions) {
+function actionsToLines(actions, stepMarkers) {
 	const lines = [];
 	let prevLine = null;
 	let prevCommand = null;
-	for (const action of actions) {
+	const markerByIndex = new Map(stepMarkers.map((m) => [m.actionIndex, m]));
+	for (let i = 0; i < actions.length; i++) {
+		const marker = markerByIndex.get(i);
+		if (marker) {
+			if (lines.length > 0) lines.push("");
+			lines.push(`// step: ${marker.stepId} [${marker.source}]`);
+		}
+		const action = actions[i];
 		const line = actionToLine(action);
 		if (line === null) continue;
 		if (line === prevLine) continue;
@@ -1398,16 +1741,16 @@ function actionToLine(action) {
 	if ("selector" in action && isRefSelector(action.selector)) return null;
 	switch (action.command) {
 		case "cookies_clear": return `ab("cookies", "clear");`;
-		case "open": return `ab("open", ${j((action.value ?? "").replace(/^["']|["']$/g, ""))});`;
+		case "open": return `ab("open", ${jExpr((action.value ?? "").replace(/^["']|["']$/g, ""))});`;
 		case "snapshot": return action.observation ? `// ${action.observation}` : null;
 		case "click": return `ab("click", ${j(action.selector)});`;
 		case "dblclick": return `ab("dblclick", ${j(action.selector)});`;
-		case "fill": return `ab("fill", ${j(action.selector)}, ${j(action.value)});`;
-		case "type": return `ab("fill", ${j(action.selector)}, ${j(action.value)});`;
+		case "fill": return `ab("fill", ${j(action.selector)}, ${jExpr(action.value)});`;
+		case "type": return `ab("fill", ${j(action.selector)}, ${jExpr(action.value)});`;
 		case "check": return `ab("check", ${j(action.selector)});`;
 		case "uncheck": return `ab("uncheck", ${j(action.selector)});`;
-		case "press": return `ab("press", ${j(action.value)});`;
-		case "select": return `ab("select", ${j(action.selector)}, ${j(action.value)});`;
+		case "press": return `ab("press", ${jExpr(action.value)});`;
+		case "select": return `ab("select", ${j(action.selector)}, ${jExpr(action.value)});`;
 		case "hover": return `ab("hover", ${j(action.selector)});`;
 		case "scroll": return `ab("scroll", ${[action.direction ?? "down", ...action.pixels ? [action.pixels] : []].map(j).join(", ")});`;
 		case "drag": return `ab("drag", ${j(action.selector)}, ${j(action.target)});`;
@@ -1423,10 +1766,10 @@ function actionToLine(action) {
 			let assertLine = null;
 			switch (action.assertType) {
 				case "text_visible":
-					if (val) assertLine = `abAssertTextVisible(${j(val)});`;
+					if (val) assertLine = `abAssertTextVisible(${jExpr(val)});`;
 					break;
 				case "text_not_visible":
-					if (val) assertLine = `abAssertNotVisible(${j("text=" + val)}, 180_000);`;
+					if (val) assertLine = `abAssertNotVisible(${jExpr("text=" + val)}, 180_000);`;
 					break;
 				case "element_visible":
 					if (sel) assertLine = `abAssertVisible(${j(sel)});`;
@@ -1435,7 +1778,7 @@ function actionToLine(action) {
 					if (sel) assertLine = `abAssertNotVisible(${j(sel)});`;
 					break;
 				case "url_contains":
-					if (val) assertLine = `abAssertUrl(${j(val)});`;
+					if (val) assertLine = `abAssertUrl(${jExpr(val)});`;
 					break;
 				case "element_enabled":
 					if (sel && !sel.startsWith("text=") && !sel.startsWith("[aria-label=")) assertLine = `abAssertEnabled(${j(sel)});`;
@@ -1458,6 +1801,14 @@ function actionToLine(action) {
 }
 /** JSON.stringify — produces a quoted string literal safe for embedding in TS source. */
 const j = (s) => JSON.stringify(s);
+/**
+* Like `j`, but recognises `$VAR` / `${VAR}` env-ref forms in the value and
+* emits them as `${process.env.VAR ?? ""}` template-literal substitutions
+* instead of baking the literal `$VAR` string into the script. Used for
+* values that came from a spec or block param: form fills, opened URLs,
+* assertion texts/URLs.
+*/
+const jExpr = (s) => envRefsToJsExpression(s);
 //#endregion
 //#region src/prompts/codegen.ts
 function buildCleanupPrompt(actions) {
@@ -1490,6 +1841,109 @@ ${actions.map((a, i) => {
 	}).join("\n")}`;
 }
 //#endregion
+//#region src/codegen/cleanup.ts
+/**
+* Best-effort cleanup of a recorded action list. Hands the actions to
+* Claude with the cleanup prompt and parses the returned JSON array; on
+* any failure (Claude error, malformed JSON, empty array) falls back to
+* the original input so the caller can always proceed.
+*
+* Note: the prompt deliberately does not surface the `stepId` field.
+* Callers that need to preserve stepIds across cleanup (only `ccqa generate`
+* today) must re-attach them after this returns.
+*/
+async function cleanupActions$1(actions, model) {
+	try {
+		const { result, isError } = await invokeClaudeStreaming({
+			prompt: buildCleanupPrompt(actions),
+			disableBuiltinTools: true,
+			maxTurns: 1,
+			model
+		}, () => {});
+		if (isError || !result) return actions;
+		const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
+		const parsed = JSON.parse(json);
+		if (Array.isArray(parsed) && parsed.length > 0) return parsed;
+	} catch {}
+	return actions;
+}
+//#endregion
+//#region src/runtime/bundled-config.ts
+const CANDIDATES = [
+	"../runtime/vitest.config.mjs",
+	"./vitest.config.mjs",
+	"./vitest.config.ts"
+];
+function bundledVitestConfigPath() {
+	for (const rel of CANDIDATES) {
+		const candidate = fileURLToPath(new URL(rel, import.meta.url));
+		try {
+			accessSync(candidate);
+			return candidate;
+		} catch {}
+	}
+	return fileURLToPath(new URL("./vitest.config.ts", import.meta.url));
+}
+//#endregion
+//#region src/runtime/spawn-vitest.ts
+const require$1 = createRequire(import.meta.url);
+function resolveVitestBin() {
+	const pkgPath = require$1.resolve("vitest/package.json");
+	const pkg = require$1(pkgPath);
+	const binRel = typeof pkg.bin === "string" ? pkg.bin : pkg.bin?.vitest;
+	if (!binRel) throw new Error(`vitest package.json has no bin entry (resolved at ${pkgPath})`);
+	return resolve(dirname(pkgPath), binRel);
+}
+async function spawnVitestTeed(args, opts = {}) {
+	const child = spawnVitestChild(args, opts, "pipe");
+	const [stdout, stderr, exitCode] = await Promise.all([
+		teeDrain(child.stdout, process.stdout),
+		teeDrain(child.stderr, process.stderr),
+		waitExit(child)
+	]);
+	return {
+		exitCode,
+		stdout,
+		stderr
+	};
+}
+function spawnVitestStreaming(args, opts = {}) {
+	const child = spawnVitestChild(args, opts, "pipe");
+	return {
+		child,
+		stdout: child.stdout,
+		stderr: child.stderr,
+		exited: waitExit(child)
+	};
+}
+function spawnVitestChild(args, opts, stdio) {
+	const vitestBin = resolveVitestBin();
+	return spawn(process.execPath, [vitestBin, ...args], {
+		cwd: opts.cwd,
+		env: opts.env ?? process.env,
+		stdio: [
+			"ignore",
+			stdio,
+			stdio
+		]
+	});
+}
+async function teeDrain(stream, sink) {
+	stream.setEncoding("utf8");
+	let buf = "";
+	for await (const chunk of stream) {
+		buf += chunk;
+		sink.write(chunk);
+	}
+	return buf;
+}
+function waitExit(child) {
+	return new Promise((resolvePromise, rejectPromise) => {
+		child.once("exit", (code) => resolvePromise(code ?? 0));
+		child.once("error", rejectPromise);
+	});
+}
+//#endregion
 //#region src/diagnose/apply.ts
 function applyDiagnosis(script, diagnosis) {
 	switch (diagnosis.type) {
@@ -1540,6 +1994,7 @@ function applyTiming(script, fixes) {
 		summary: summary.join("; ")
 	};
 }
+const REMOVABLE_ASSERT_RE = /\b(?:abAssert\w*|abWait)\b/;
 function applyOverAssertion(script, lineNumbers) {
 	if (lineNumbers.length === 0) return {
 		applied: false,
@@ -1552,13 +2007,13 @@ function applyOverAssertion(script, lineNumbers) {
 		const idx = line - 1;
 		if (idx < 0 || idx >= lines.length) continue;
 		const content = lines[idx];
-		if (!/abAssert/.test(content)) continue;
+		if (!REMOVABLE_ASSERT_RE.test(content)) continue;
 		removed.push(`line ${line}: ${content.trim()}`);
 		lines.splice(idx, 1);
 	}
 	if (removed.length === 0) return {
 		applied: false,
-		reason: "no abAssert lines matched the proposed line numbers"
+		reason: "no abAssert/abWait lines matched the proposed line numbers"
 	};
 	return {
 		applied: true,
@@ -1604,7 +2059,7 @@ function previewDiff(before, after) {
 //#endregion
 //#region src/diagnose/prompt.ts
 function buildDiagnosePrompt(input) {
-	const { script, specMarkdown, actions, failureLog, pageSnapshot, outputLanguage = "en" } = input;
+	const { script, specYaml, actions, failureLog, pageSnapshot, outputLanguage = "en" } = input;
 	const numbered = script.split("\n").map((l, i) => `${i + 1}: ${l}`).join("\n");
 	return `You are diagnosing a failing E2E test. The test was generated from a recorded trace of the original interaction. Compare the failing run against the original spec and recorded actions to determine WHY the test failed and what the right fix is.
@@ -1695,11 +2150,11 @@ Pick exactly ONE category. The output JSON must follow the shape for that catego
 - Your **final** assistant message must start with \`{\` and end with \`}\` — a single JSON object, nothing before or after. No prose preamble like "Confirmed: ...", no markdown fences, no commentary, no tool calls in the same turn. If you have an analysis sentence, put it in the \`reasoning\` field.
 - Line numbers refer to the numbered test script below (1-based).
 - For SELECTOR_DRIFT, \`oldSelector\` must match a substring of the script at that line; \`newSelector\` must be backed by a concrete file:line you read with Grep/Read (do not invent). Cite the evidence in \`reasoning\`.
-- For OVER_ASSERTION, only include lines that contain assert calls (\`abAssert*\`).
-- Cross-check assertions against the spec markdown. If the spec doesn't require the assertion, OVER_ASSERTION is the better diagnosis than SELECTOR_DRIFT.
+- For OVER_ASSERTION, only include lines that contain assert calls (\`abAssert*\`) or existence-checking waits (\`abWait\`); a recorded \`abWait("[selector]")\` is an implicit existence assertion and a valid removal candidate when the spec never required that element to be present.
+- Cross-check assertions against the spec YAML. If the spec doesn't require the assertion, OVER_ASSERTION is the better diagnosis than SELECTOR_DRIFT.
-## Test Spec (test-spec.md)
-${specMarkdown}
+## Test Spec (spec.yaml)
+${specYaml}
 ## Recorded Actions (actions.json summary)
 ${actions.map((a, i) => {
@@ -1910,8 +2365,7 @@ function normaliseSleepFixes(raw) {
 		const line = typeof item["line"] === "number" ? item["line"] : null;
 		if (line === null) continue;
 		const reason = typeof item["reason"] === "string" ? item["reason"] : "";
-		const kind = item["kind"];
-		if (kind === "insert" || typeof item["seconds"] === "number" && item["increase_to"] === void 0) {
+		if (item["kind"] === "insert") {
 			const seconds = typeof item["seconds"] === "number" ? item["seconds"] : null;
 			if (seconds === null) continue;
 			out.push({
@@ -1920,9 +2374,7 @@ function normaliseSleepFixes(raw) {
 				seconds,
 				reason
 			});
-			continue;
-		}
-		if (kind === "increase" || typeof item["increase_to"] === "number") {
+		} else if (item["kind"] === "increase") {
 			const increaseTo = typeof item["increase_to"] === "number" ? item["increase_to"] : null;
 			if (increaseTo === null) continue;
 			out.push({
@@ -1931,7 +2383,6 @@ function normaliseSleepFixes(raw) {
 				increase_to: increaseTo,
 				reason
 			});
-			continue;
 		}
 	}
 	return out;
@@ -2116,7 +2567,7 @@ const DEFAULT_CONFIDENCE_THRESHOLD = .8;
 * or the diagnose loop chose to bail out early.
 */
 async function runAutoFixLoop(input) {
-	const { scriptPath, initialRun, specMarkdown, actions, maxRetries, mode, runVitest, agentBrowserSession, outputLanguage, model } = input;
+	const { scriptPath, initialRun, specYaml, actions, maxRetries, mode, runVitest, agentBrowserSession, outputLanguage, model } = input;
 	let { exitCode, output, currentScript } = initialRun;
 	if (exitCode === 0) return true;
 	for (let attempt = 1; attempt <= maxRetries; attempt++) {
@@ -2127,7 +2578,7 @@ async function runAutoFixLoop(input) {
 		else fix("page snapshot unavailable; continuing without it");
 		const fixed = await diagnoseAndFix({
 			script: currentScript,
-			specMarkdown,
+			specYaml,
 			actions,
 			failureLog: output,
 			pageSnapshot: pageSnapshot ?? void 0,
@@ -2148,10 +2599,10 @@ async function runAutoFixLoop(input) {
 	return false;
 }
 async function diagnoseAndFix(input) {
-	const { script, specMarkdown, actions, failureLog, pageSnapshot, mode, outputLanguage, model } = input;
+	const { script, specYaml, actions, failureLog, pageSnapshot, mode, outputLanguage, model } = input;
 	const outcome = await timedPhase("diagnose", () => diagnose({
 		script,
-		specMarkdown,
+		specYaml,
 		actions,
 		failureLog,
 		pageSnapshot,
@@ -2186,7 +2637,7 @@ async function diagnoseAndFix(input) {
 		return apply.script;
 	}
 	if (decision === "skip-low-confidence") {
-		fix(`confidence ${result.confidence.toFixed(2)} below threshold ${DEFAULT_CONFIDENCE_THRESHOLD}; skipping (--no-interactive)`);
+		fix(`confidence ${result.confidence.toFixed(2)} below threshold ${DEFAULT_CONFIDENCE_THRESHOLD}; skipping (mode: ${mode})`);
 		handoffToUser(result, outcome.raw, outputLanguage);
 		return null;
 	}
@@ -2210,10 +2661,15 @@ async function diagnoseAndFix(input) {
 			process.exit(1);
 	}
 }
+/**
+* Map a diagnosis to one of three actions. `auto` previously bypassed the
+* confidence threshold; it no longer does — a low-confidence guess can
+* corrupt working code, and CI wants "apply obvious fixes, fail loudly on
+* the rest" rather than "apply every guess".
+*/
 function decide(result, mode) {
-	if (mode === "auto") return "apply-auto";
 	const highConfidence = result.confidence >= DEFAULT_CONFIDENCE_THRESHOLD;
-	if (mode === "non-interactive") return highConfidence ? "apply-auto" : "skip-low-confidence";
+	if (mode === "auto" || mode === "non-interactive") return highConfidence ? "apply-auto" : "skip-low-confidence";
 	return highConfidence ? "apply-auto" : "interactive";
 }
 function reportDiagnosis(result) {
@@ -2250,27 +2706,27 @@ function handoffMessage(diagnosis, language) {
 }
 function handoffEn(diagnosis) {
 	switch (diagnosis.type) {
-		case "DATA_MISSING": return [`application-side issue: required data is missing. ${diagnosis.reason}`, "next step: seed the data (or update test-spec.md prerequisites), then re-run trace + generate."];
+		case "DATA_MISSING": return [`application-side issue: required data is missing. ${diagnosis.reason}`, "next step: seed the data (or update spec.yaml prerequisites), then re-run trace + generate."];
 		case "UNKNOWN": return [`could not classify the failure. ${diagnosis.reason}`, "next step: read the failure log above, decide whether the test or the app is wrong, and fix manually. consider re-running ccqa trace if the recorded flow no longer matches the live app."];
 		case "SELECTOR_DRIFT": return [
 			`selector likely drifted but auto-apply was not safe.`,
 			`proposed: line ${diagnosis.line}: "${diagnosis.oldSelector}" → "${diagnosis.newSelector}" (${diagnosis.reason}).`,
 			"next step: confirm in the live app and either accept the proposal manually, or re-run ccqa trace to recapture the new selector."
 		];
-		case "OVER_ASSERTION": return [`assertion may not be required by the spec. lines: ${diagnosis.lines.join(", ")} (${diagnosis.reason}).`, "next step: cross-check test-spec.md. either delete the assertion from the test, or tighten the spec to require it."];
+		case "OVER_ASSERTION": return [`assertion may not be required by the spec. lines: ${diagnosis.lines.join(", ")} (${diagnosis.reason}).`, "next step: cross-check spec.yaml. either delete the assertion from the test, or tighten the spec to require it."];
 		case "TIMING_ISSUE": return [`timing fix proposed but couldn't be applied automatically.`, "next step: insert a sleep manually before the failing line, or re-run with a higher confidence trace."];
 	}
 }
 function handoffJa(diagnosis) {
 	switch (diagnosis.type) {
-		case "DATA_MISSING": return [`アプリ側の問題: 必要なデータが不足しています。${diagnosis.reason}`, "次のステップ: データを seed する（または test-spec.md の prerequisites を更新）してから ccqa trace + generate をやり直してください。"];
+		case "DATA_MISSING": return [`アプリ側の問題: 必要なデータが不足しています。${diagnosis.reason}`, "次のステップ: データを seed する（または spec.yaml の prerequisites を更新）してから ccqa trace + generate をやり直してください。"];
 		case "UNKNOWN": return [`失敗を分類できませんでした。${diagnosis.reason}`, "次のステップ: 上の失敗ログを確認し、テストとアプリのどちらが原因か判断して手動で修正してください。記録した手順がアプリの現状と合わない場合は ccqa trace の再実行を検討してください。"];
 		case "SELECTOR_DRIFT": return [
 			"selector が変わった可能性が高いですが、自動適用は安全でないと判断しました。",
 			`提案: 行 ${diagnosis.line}: "${diagnosis.oldSelector}" → "${diagnosis.newSelector}" (${diagnosis.reason})`,
 			"次のステップ: アプリで新 selector を確認し、手動で適用するか ccqa trace をやり直して新しい selector を取り直してください。"
 		];
-		case "OVER_ASSERTION": return [`spec が要求していない assertion の可能性があります。対象行: ${diagnosis.lines.join(", ")} (${diagnosis.reason})`, "次のステップ: test-spec.md と照合して、テスト側の assertion を削るか、spec 側を更新してください。"];
+		case "OVER_ASSERTION": return [`spec が要求していない assertion の可能性があります。対象行: ${diagnosis.lines.join(", ")} (${diagnosis.reason})`, "次のステップ: spec.yaml と照合して、テスト側の assertion を削るか、spec 側を更新してください。"];
 		case "TIMING_ISSUE": return ["timing 関連の修正案は出ましたが、自動適用できませんでした。", "次のステップ: 失敗行の前に手動で sleep を入れるか、より信頼度の高い trace を取り直してください。"];
 	}
 }
@@ -2306,18 +2762,24 @@ async function runGenerate(featureName, specName, maxRetries, mode, force, useSn
 	meta("actions", actions.length);
 	const specContent = await readSpecFile(featureName, specName);
 	const spec = parseTestSpec(specContent);
-	const setupScripts = await loadSetupScripts(spec.setups);
-	if (setupScripts.length > 0) meta("setups", setupScripts.map((s) => s.name).join(", "));
+	const expanded = expandSpec(spec, { blocks: await loadAllBlocks() });
+	await warnStaleBlockArtifacts();
+	meta("steps", expanded.length);
 	meta("fix-mode", mode);
 	meta("language", outputLanguage);
 	blank();
-	const cleanedActions = await cleanupActions$1(actions, model);
+	const cleanedActions = await cleanupActions(actions, model);
 	if (cleanedActions.length !== actions.length) meta("cleaned", cleanedActions.length);
-	const scriptPath = await saveTestScript(featureName, specName, actionsToScript(cleanedActions, spec.title, setupScripts.length > 0 ? setupScripts : void 0));
+	const markers = buildStepMarkers(expanded, cleanedActions);
+	const scriptPath = await saveTestScript(featureName, specName, actionsToScript({
+		actions: cleanedActions,
+		testName: spec.title,
+		stepMarkers: markers
+	}));
 	meta("saved", scriptPath);
 	blank();
 	const agentBrowserSession = useSnapshot ? `ccqa-generate-${Date.now()}` : void 0;
-	const runVitestForSession = (path) => runVitest$1(path, agentBrowserSession);
+	const runVitestForSession = (path) => runVitest(path, agentBrowserSession);
 	let signalHandler = null;
 	if (agentBrowserSession) {
 		await closeSession(agentBrowserSession);
@@ -2336,7 +2798,7 @@ async function runGenerate(featureName, specName, maxRetries, mode, force, useSn
 		if (await runAutoFixLoop({
 			scriptPath,
 			initialRun,
-			specMarkdown: specContent,
+			specYaml: specContent,
 			actions: cleanedActions,
 			maxRetries,
 			mode,
@@ -2358,6 +2820,30 @@ async function runGenerate(featureName, specName, maxRetries, mode, force, useSn
 		if (agentBrowserSession) await closeSession(agentBrowserSession);
 	}
 }
+/**
+* Build the per-step markers consumed by `actionsToScript`. Each action's
+* `stepId` (assigned at trace time from the last `STEP_START|...` line)
+* groups contiguous actions; we emit one marker at the first action of
+* each contiguous run. Unknown step ids are skipped rather than mis-labelled.
+*/
+function buildStepMarkers(steps, actions) {
+	const stepById = new Map(steps.map((s) => [s.id, s]));
+	const markers = [];
+	let lastEmittedStepId = null;
+	for (let i = 0; i < actions.length; i++) {
+		const id = actions[i].stepId;
+		if (!id || id === lastEmittedStepId) continue;
+		const step = stepById.get(id);
+		if (!step) continue;
+		markers.push({
+			actionIndex: i,
+			stepId: step.id,
+			source: step.source
+		});
+		lastEmittedStepId = id;
+	}
+	return markers;
+}
 async function confirmOverwrite(path) {
 	if (!process.stdin.isTTY) {
 		warn(`${path} exists and stdin is not a TTY; refusing to overwrite. Pass --force to allow.`);
@@ -2377,67 +2863,7 @@ async function confirmOverwrite(path) {
 		rl.close();
 	}
 }
-async function loadSetupScripts(setups) {
-	if (!setups?.length) return [];
-	const result = [];
-	for (const ref of setups) {
-		const scriptPath = join(getSetupDir(ref.name), "test.spec.ts");
-		const resolved = replacePlaceholders(extractTestBody(await readFile(scriptPath, "utf-8").catch(() => {
-			throw new Error(`Setup test script not found: ${scriptPath}. Run \`ccqa generate-setup ${ref.name}\` first.`);
-		})), ref.params ?? {});
-		result.push({
-			name: ref.name,
-			body: resolved
-		});
-	}
-	return result;
-}
-/**
-* Extract the test body (statements inside the test callback) from a setup
-* test script.
-*
-* Locates the first arrow callback (`=> {`) after a top-level `test(` call
-* and returns the text between the matching `{` and `}`. Handles both
-* single-line and multi-line `test(...)` formatting (the latter is what
-* prettier produces).
-*
-* Brace tracking is naive (string/regex/comment literals are not parsed
-* specially), but setup test scripts are themselves generated by ccqa and
-* follow a fixed shape, so this is sufficient in practice.
-*/
-function extractTestBody(script) {
-	const testCallMatch = /\btest\s*\(/.exec(script);
-	if (!testCallMatch) return "";
-	const arrowIdx = script.indexOf("=> {", testCallMatch.index);
-	if (arrowIdx === -1) return "";
-	const bodyStart = arrowIdx + 4;
-	let depth = 1;
-	let i = bodyStart;
-	for (; i < script.length; i++) {
-		const ch = script[i];
-		if (ch === "{") depth++;
-		else if (ch === "}") {
-			depth--;
-			if (depth === 0) break;
-		}
-	}
-	if (depth !== 0) return "";
-	return script.slice(bodyStart, i).replace(/^\n/, "").replace(/\n\s*$/, "");
-}
-function replacePlaceholders(body, params) {
-	let result = body;
-	for (const [key, value] of Object.entries(params)) if (hasEnvRef(value)) {
-		const expr = envRefsToJsExpression(value);
-		const re = new RegExp(`(["'])\\{\\{${escapeRegExp(key)}\\}\\}\\1`, "g");
-		result = result.replace(re, expr);
-		result = result.replaceAll(`{{${key}}}`, value);
-	} else result = result.replaceAll(`{{${key}}}`, value);
-	return result;
-}
-function escapeRegExp(s) {
-	return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
-}
-async function runVitest$1(scriptPath, agentBrowserSession) {
+async function runVitest(scriptPath, agentBrowserSession) {
 	const { exitCode, stdout, stderr } = await spawnVitestTeed([
 		"run",
 		"--config",
@@ -2454,513 +2880,89 @@ async function runVitest$1(scriptPath, agentBrowserSession) {
 		currentScript
 	};
 }
-async function cleanupActions$1(actions, model) {
-	try {
-		const { result, isError } = await invokeClaudeStreaming({
-			prompt: buildCleanupPrompt(actions),
-			disableBuiltinTools: true,
-			maxTurns: 1,
-			model
-		}, () => {});
-		if (isError || !result) return actions;
-		const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
-		const parsed = JSON.parse(json);
-		if (Array.isArray(parsed) && parsed.length > 0) return parsed;
-	} catch {}
-	return actions;
+async function cleanupActions(actions, model) {
+	const cleaned = await cleanupActions$1(actions, model);
+	return cleaned === actions ? actions : reattachStepIds(cleaned, actions);
 }
-//#endregion
-//#region src/cli/run.ts
-const USER_VITEST_CONFIG = resolve(".ccqa/vitest.config.ts");
-async function resolveVitestConfig() {
-	try {
-		await access(USER_VITEST_CONFIG);
-		return USER_VITEST_CONFIG;
-	} catch {
-		return bundledVitestConfigPath();
+/**
+* The Claude cleanup pass returns a pruned array without the `stepId` field
+* (the prompt deliberately doesn't expose it — that would make the prompt
+* easier to misformat). Re-attach stepIds here by replaying the cleaned
+* stream against the original and matching the next compatible action.
+*
+* Algorithm: walk both arrays in lockstep. For each cleaned action, scan
+* forward in `original` (from the last-matched cursor) for the next entry
+* with the same `command` + `selector` + `value` + `assertType` shape, and
+* borrow its `stepId`. Cleaned actions Claude invented from thin air (rare,
+* and explicitly forbidden by the prompt) end up with no stepId — codegen
+* just won't emit a step marker for that index, which is the same outcome
+* as a wholly stepId-less actions.json.
+*
+* The matching is forward-only so that if cleanup keeps two identical fills
+* (e.g. typing the same value twice intentionally), they're paired to the
+* first and second occurrence in the original — not both to the first.
+*/
+function reattachStepIds(cleaned, original) {
+	let cursor = 0;
+	const out = [];
+	for (const c of cleaned) {
+		let matched = null;
+		for (let i = cursor; i < original.length; i++) if (sameShape(c, original[i])) {
+			matched = original[i];
+			cursor = i + 1;
+			break;
+		}
+		if (matched?.stepId) out.push({
+			...c,
+			stepId: matched.stepId
+		});
+		else out.push(c);
 	}
+	return out;
 }
-const runCommand = new Command("run").argument("[target]", "Spec to run: '<feature>/<spec>', '<feature>', or omit for all").description("Run generated agent-browser test scripts").action(async (target) => {
-	await runTests(target);
-});
-async function runTests(target) {
-	header("run", target);
-	const specs = await resolveSpecs(target);
-	if (specs.length === 0) {
-		error("no test scripts found");
-		hint("run 'ccqa generate <feature>/<spec>' first to generate tests");
-		process.exit(1);
-	}
-	const tmpDir = await mkdtemp(join(tmpdir(), "ccqa-run-"));
-	const summaries = [];
-	let overallExitCode = 0;
-	const vitestConfig = await resolveVitestConfig();
-	try {
-		for (let i = 0; i < specs.length; i++) {
-			const { featureName, specName } = specs[i];
-			const scriptFile = await getTestScript(featureName, specName);
-			if (!scriptFile) {
-				warn(`${featureName}/${specName}: no test.spec.ts found`);
-				continue;
-			}
-			run(`${featureName}/${specName}`);
-			meta("test", scriptFile);
-			blank();
-			const reportFile = join(tmpDir, `report-${i}.json`);
-			const proc = spawnVitestStreaming([
-				"run",
-				"--config",
-				vitestConfig,
-				scriptFile,
-				"--reporter=json",
-				`--outputFile.json=${reportFile}`
-			]);
-			await Promise.all([streamFiltered(proc.stdout, process.stdout), streamFiltered(proc.stderr, process.stderr)]);
-			const exitCode = await proc.exited;
-			if (exitCode !== 0) overallExitCode = exitCode;
-			const report = await readReport(reportFile);
-			summaries.push({
-				featureName,
-				specName,
-				scriptFile,
-				report,
-				exitCode
-			});
-			blank();
-		}
-		printSummary(summaries);
-	} finally {
-		await rm(tmpDir, {
-			recursive: true,
-			force: true
-		});
-	}
-	process.exit(overallExitCode);
-}
-async function readReport(path) {
-	try {
-		const raw = await readFile(path, "utf8");
-		return JSON.parse(raw);
-	} catch {
-		return null;
-	}
-}
-const useColor = process.stdout.isTTY && process.env.NO_COLOR == null;
-const C = {
-	reset: useColor ? "\x1B[0m" : "",
-	bold: useColor ? "\x1B[1m" : "",
-	dim: useColor ? "\x1B[2m" : "",
-	green: useColor ? "\x1B[32m" : "",
-	red: useColor ? "\x1B[31m" : "",
-	yellow: useColor ? "\x1B[33m" : "",
-	cyan: useColor ? "\x1B[36m" : "",
-	gray: useColor ? "\x1B[90m" : ""
-};
-function printSummary(summaries) {
-	process.stdout.write(`\n${C.cyan}${C.bold}──────── ccqa summary ────────${C.reset}\n\n`);
-	let totalTests = 0;
-	let totalPassed = 0;
-	let totalFailed = 0;
-	let totalSkipped = 0;
-	for (const s of summaries) {
-		const header = `${C.bold}${s.featureName}/${s.specName}${C.reset}`;
-		if (!s.report) {
-			const icon = s.exitCode === 0 ? `${C.green}✔${C.reset}` : `${C.red}✖${C.reset}`;
-			process.stdout.write(`${icon} ${header} ${C.dim}(no report)${C.reset}\n`);
-			continue;
-		}
-		totalTests += s.report.numTotalTests;
-		totalPassed += s.report.numPassedTests;
-		totalFailed += s.report.numFailedTests;
-		totalSkipped += s.report.numPendingTests;
-		const ok = s.report.success;
-		const icon = ok ? `${C.green}✔${C.reset}` : `${C.red}✖${C.reset}`;
-		const countColor = ok ? C.green : C.red;
-		process.stdout.write(`${icon} ${header}  ${countColor}${s.report.numPassedTests}/${s.report.numTotalTests}${C.reset} ${C.dim}passed${C.reset}\n`);
-		for (const file of s.report.testResults) for (const a of file.assertionResults) {
-			const aIcon = assertionIcon(a.status);
-			const dur = a.duration != null ? ` ${C.gray}${formatDuration(a.duration)}${C.reset}` : "";
-			process.stdout.write(`    ${aIcon} ${a.fullName}${dur}\n`);
-			if (a.status === "failed" && a.failureMessages?.length) for (const msg of a.failureMessages) {
-				const firstLine = msg.split("\n")[0] ?? msg;
-				process.stdout.write(`        ${C.red}${firstLine}${C.reset}\n`);
-			}
-		}
-	}
-	const specsPassed = summaries.filter((s) => s.exitCode === 0).length;
-	const specsFailed = summaries.filter((s) => s.exitCode !== 0).length;
-	process.stdout.write("\n");
-	process.stdout.write(`  ${C.bold}Specs${C.reset}   ${summaries.length}  (${C.green}${specsPassed} passed${C.reset}, ${specsFailed > 0 ? C.red : C.dim}${specsFailed} failed${C.reset})\n`);
-	process.stdout.write(`  ${C.bold}Tests${C.reset}   ${totalTests}  (${C.green}${totalPassed} passed${C.reset}, ${totalFailed > 0 ? C.red : C.dim}${totalFailed} failed${C.reset}, ${C.yellow}${totalSkipped} skipped${C.reset})\n`);
-	process.stdout.write("\n");
-}
-function assertionIcon(status) {
-	switch (status) {
-		case "passed": return `${C.green}✔${C.reset}`;
-		case "failed": return `${C.red}✖${C.reset}`;
-		case "skipped":
-		case "pending":
-		case "todo": return `${C.yellow}◌${C.reset}`;
-	}
-}
-function formatDuration(ms) {
-	if (ms < 1e3) return `${Math.round(ms)}ms`;
-	return `${(ms / 1e3).toFixed(2)}s`;
-}
-const NOISE_LINE_PATTERNS = [/^JSON report written to /];
-async function streamFiltered(source, sink) {
-	source.setEncoding("utf8");
-	let buffer = "";
-	for await (const chunk of source) {
-		buffer += chunk;
-		let nl = buffer.indexOf("\n");
-		while (nl !== -1) {
-			const line = buffer.slice(0, nl);
-			buffer = buffer.slice(nl + 1);
-			if (!NOISE_LINE_PATTERNS.some((p) => p.test(line))) sink.write(line + "\n");
-			nl = buffer.indexOf("\n");
-		}
-	}
-	if (buffer.length > 0 && !NOISE_LINE_PATTERNS.some((p) => p.test(buffer))) sink.write(buffer);
-}
-async function resolveSpecs(target) {
-	if (!target) return listAllSpecs();
-	if (target.includes("/")) {
-		const { featureName, specName } = parseSpecPath(target);
-		return [{
-			featureName,
-			specName
-		}];
-	}
-	return (await listSpecsForFeature(target)).map((specName) => ({
-		featureName: target,
-		specName
-	}));
+function sameShape(a, b) {
+	return a.command === b.command && (a.selector ?? "") === (b.selector ?? "") && (a.value ?? "") === (b.value ?? "") && (a.assertType ?? "") === (b.assertType ?? "");
 }
 //#endregion
-//#region src/cli/trace-setup.ts
-const traceSetupCommand = new Command("trace-setup").argument("<name>", "Setup name to trace (e.g. login)").description("Trace a setup procedure using dummy placeholder values").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").action(async (name, opts) => {
-	await runTraceSetup(name, opts.model);
-});
-async function runTraceSetup(name, model) {
-	header("trace-setup", name);
-	try {
-		meta("agent-browser", assertAgentBrowserAvailable());
-	} catch (e) {
-		if (e instanceof AgentBrowserUnavailableError) {
-			error(formatAgentBrowserUnavailableMessage());
-			process.exit(1);
-		}
-		throw e;
-	}
-	await ensureCcqaDir();
-	const spec = parseSetupSpec(await readSetupSpecFile(name));
-	const resolvedSpec = replacePlaceholdersWithDummies(spec);
-	const secretsToScrub = buildSecretsToScrub(spec);
-	meta("setup", spec.title);
-	meta("steps", spec.steps.length);
-	if (spec.placeholders) meta("placeholders", Object.keys(spec.placeholders).join(", "));
-	blank();
-	const systemPrompt = buildSetupTraceSystemPrompt(resolvedSpec);
-	const prompt = buildSetupTracePrompt(resolvedSpec);
-	info("Running agent-browser session...");
-	blank();
-	const routeSteps = [];
-	let overallStatus = "passed";
-	const traceActions = [];
-	const { isError } = await invokeClaudeStreaming({
-		prompt,
-		systemPrompt,
-		allowedTools: [
-			"Bash(*)",
-			"Read",
-			"Grep",
-			"Glob"
-		],
-		env: {
-			PATH: pathWithAgentBrowserShim(process.env["PATH"]),
-			ANTHROPIC_API_KEY: ""
-		},
-		model,
-		onAbAction: (abAction) => {
-			const action = parseAbAction(scrubSecrets(abAction, secretsToScrub));
-			if (action) traceActions.push(action);
-		},
-		onAbActionFailed: () => {
-			traceActions.pop();
-		}
-	}, (msg) => {
-		if (msg.type !== "assistant") return;
-		for (const block of msg.message.content ?? []) {
-			if (block.type !== "text" || !block.text) continue;
-			const text = block.text;
-			const statusLine = parseStatusLine(text);
-			if (statusLine) step(statusLine.type, statusLine.stepId, statusLine.detail);
-			for (const line of text.split("\n")) {
-				const trimmed = line.trim();
-				if (trimmed.startsWith("ROUTE_STEP|")) {
-					const routeStep = parseRouteStep(trimmed);
-					if (routeStep) {
-						routeSteps.push(routeStep);
-						if (routeStep.status === "FAILED") overallStatus = "failed";
-					}
-				} else if (trimmed.startsWith("AB_ACTION|snapshot|") || trimmed.startsWith("AB_ACTION|assert|")) {
-					const action = parseAbAction(scrubSecrets(trimmed, secretsToScrub));
-					if (action) traceActions.push(action);
-				}
-			}
-		}
-	});
-	if (isError) overallStatus = "failed";
-	const route = {
-		specName: name,
-		timestamp: (/* @__PURE__ */ new Date()).toISOString(),
-		status: overallStatus,
-		steps: routeSteps
-	};
-	const [routePath, actionsPath] = await Promise.all([saveSetupRoute(name, route), saveSetupActions(name, traceActions)]);
-	blank();
-	meta("route", routePath);
-	meta("saved", actionsPath);
-	meta("actions", traceActions.length);
-	meta("status", overallStatus.toUpperCase());
-	hint(`run 'ccqa generate-setup ${name}' to generate and validate the setup`);
-}
-function replacePlaceholdersWithDummies(spec) {
-	if (!spec.placeholders) return spec;
-	const dummies = spec.placeholders;
-	const resolve = (text) => {
-		let result = text;
-		for (const [key, def] of Object.entries(dummies)) result = result.replaceAll(`{{${key}}}`, resolveEnvRefs(def.dummy));
-		return result;
-	};
-	return {
-		...spec,
-		steps: spec.steps.map((step) => ({
-			...step,
-			instruction: resolve(step.instruction),
-			expected: resolve(step.expected)
-		}))
-	};
-}
+//#region src/claude/extract-json.ts
 /**
-* Build the substitution map used to scrub real secret values out of
-* recorded actions before they are written to actions.json.
-*
-* For each placeholder whose dummy contains env refs, store
-*   <resolved-value> -> <original ${VAR} string>
-* so that an `ab fill ... <secret>` line records the placeholder string
-* instead of the secret. Empty resolved values are skipped — they would
-* otherwise replace incidental empty strings in the recorded actions.
+* Pulls a JSON object out of a Claude completion. Accepts either a fenced
+* ```json block or a bare `{...}` payload that constitutes the whole reply.
+* Returns null when neither shape is present.
 */
-function buildSecretsToScrub(spec) {
-	const map = /* @__PURE__ */ new Map();
-	if (!spec.placeholders) return map;
-	const dummies = spec.placeholders;
-	for (const def of Object.values(dummies)) {
-		if (!hasEnvRef(def.dummy)) continue;
-		const resolved = resolveEnvRefs(def.dummy);
-		if (!resolved) continue;
-		map.set(resolved, def.dummy);
-	}
-	return map;
-}
-/** Replace every occurrence of a recorded secret with its `${VAR}` placeholder. */
-function scrubSecrets(line, secrets) {
-	if (secrets.size === 0) return line;
-	let result = line;
-	for (const [secret, placeholder] of secrets) {
-		if (!result.includes(secret)) continue;
-		result = result.split(secret).join(placeholder);
-	}
-	return result;
+function extractJsonBlock(text) {
+	const fenced = text.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
+	if (fenced && fenced[1]) return fenced[1].trim();
+	const trimmed = text.trim();
+	if (trimmed.startsWith("{") && trimmed.endsWith("}")) return trimmed;
+	return null;
 }
 //#endregion
-//#region src/cli/generate-setup.ts
-const generateSetupCommand = new Command("generate-setup").argument("<name>", "Setup name to generate (e.g. login)").description("Clean up, validate, and templatize setup actions").option("--max-retries <n>", "Maximum number of auto-fix retries", "3").option("--from-dummy", "Resume from existing test.dummy.spec.ts (after manual fix)").option("--auto", "Apply auto-fixes without confirmation regardless of confidence (CI use)").option("--no-interactive", "Never prompt; only auto-apply when confidence is high, otherwise give up").option("--language <bcp47>", "Language for diagnose reasoning / hint text (e.g. 'en', 'ja')", "en").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").action(async (name, opts) => {
-	const mode = resolveMode(opts);
-	await runGenerateSetup(name, parseInt(opts.maxRetries, 10), opts.fromDummy ?? false, mode, opts.language ?? "en", opts.model);
-});
-async function runGenerateSetup(name, maxRetries, fromDummy, mode, outputLanguage, model) {
-	header("generate-setup", name);
-	await ensureCcqaDir();
-	const specContent = await readSetupSpecFile(name);
-	const spec = parseSetupSpec(specContent);
-	const dummyPath = join(getSetupDir(name), "test.dummy.spec.ts");
-	const finalPath = join(getSetupDir(name), "test.spec.ts");
-	let cleanedActions = [];
-	if (fromDummy) {
-		if (!await stat(dummyPath).then(() => true).catch(() => false)) {
-			warn(`test.dummy.spec.ts not found. Run without --from-dummy first.`);
-			process.exit(1);
-		}
-		info("Resuming from existing test.dummy.spec.ts");
-	} else {
-		const { actions } = await getSetupActions(name);
-		meta("setup", spec.title);
-		meta("actions", actions.length);
-		meta("fix-mode", mode);
-		meta("language", outputLanguage);
-		blank();
-		cleanedActions = await cleanupActions(actions, model);
-		if (cleanedActions.length !== actions.length) meta("cleaned", cleanedActions.length);
-		await writeFile(dummyPath, actionsToScript(cleanedActions, spec.title), "utf-8");
-		meta("saved", dummyPath);
-	}
-	blank();
-	const agentBrowserSession = `ccqa-generate-setup-${name}-${Date.now()}`;
-	const runVitestForSession = (path) => runVitestResolved(path, agentBrowserSession);
-	await closeSession(agentBrowserSession);
-	const signalHandler = () => {
-		closeSession(agentBrowserSession).finally(() => process.exit(130));
-	};
-	process.once("SIGINT", signalHandler);
-	process.once("SIGTERM", signalHandler);
-	try {
-		const initialRun = await timedPhase("vitest run #1", () => runVitestForSession(dummyPath), "run");
-		let passed = initialRun.exitCode === 0;
-		if (!passed) passed = await runAutoFixLoop({
-			scriptPath: dummyPath,
-			initialRun,
-			specMarkdown: specContent,
-			actions: cleanedActions,
-			maxRetries,
-			mode,
-			runVitest: runVitestForSession,
-			agentBrowserSession,
-			outputLanguage,
-			model
-		});
-		if (!passed) {
-			warn("auto-fix exhausted; setup test still failing");
-			hint(`edit ${dummyPath} manually, then run: ccqa generate-setup ${name} --from-dummy`);
-			process.exit(1);
-		}
-		await writeFile(finalPath, reversePlaceholdersInScript(await readFile(dummyPath, "utf8"), spec.placeholders), "utf-8");
-		await unlink(dummyPath).catch(() => {});
-		blank();
-		meta("saved", finalPath);
-		hint(`setup '${name}' is ready; reference it in test-spec.md with setups: [{name: ${name}, params: {...}}]`);
-	} finally {
-		process.off("SIGINT", signalHandler);
-		process.off("SIGTERM", signalHandler);
-		await closeSession(agentBrowserSession);
-	}
+//#region src/prompts/draft.ts
+function buildNamingSystemPrompt() {
+	return `You name a new ccqa test case based on the user's intent and the existing feature tree.
+ccqa test cases live under \`.ccqa/features/<featureName>/test-cases/<specName>/spec.yaml\`.
+## Naming rules
+- featureName and specName are kebab-case ASCII (lowercase, words separated by '-').
+- featureName: a broad area (e.g. "tasks", "auth", "billing", "search").
+- specName: a short scenario name (e.g. "create-and-complete", "login-with-email", "search-by-tag").
+- Reuse existing featureName when the user's intent fits an existing area. Only invent a new featureName when the existing tree clearly does not cover the area.
+- specName must NOT collide with an existing spec under the chosen feature. If the natural name collides, pick a different one that distinguishes the new scenario from the existing ones.
+- Use the codebase (Read/Grep/Glob) sparingly to confirm domain vocabulary if helpful. Do not over-explore.
+## Output (STRICT)
+Output ONE fenced \`\`\`json block, nothing else outside it:
+{
+  "featureName": "<kebab-case>",
+  "specName": "<kebab-case>",
+  "reason": "<one short sentence: why this name and how it relates to existing specs>"
 }
-/**
-* Replace dummy values with {{placeholder}} directly in the test script text.
-* Longer dummy values are replaced first to avoid partial matches.
-*/
-function reversePlaceholdersInScript(script, placeholders) {
-	if (!placeholders) return script;
-	const entries = Object.entries(placeholders).sort((a, b) => b[1].dummy.length - a[1].dummy.length);
-	let result = script;
-	for (const [key, def] of entries) result = result.replaceAll(def.dummy, `{{${key}}}`);
-	return result;
-}
-async function runVitest(scriptPath, agentBrowserSession) {
-	const { exitCode, stdout, stderr } = await spawnVitestTeed([
-		"run",
-		"--config",
-		bundledVitestConfigPath(),
-		scriptPath
-	], agentBrowserSession ? { env: {
-		...process.env,
-		AGENT_BROWSER_SESSION: agentBrowserSession
-	} } : {});
-	const currentScript = await readFile(scriptPath, "utf8");
-	return {
-		exitCode,
-		output: stdout + stderr,
-		currentScript
-	};
-}
-/**
-* Run vitest on `test.dummy.spec.ts`, but transparently expand any `${VAR}`
-* env refs to real values for the duration of the run. The original file is
-* preserved unchanged so subsequent reverse-replace still sees the env-ref
-* literals. Auto-fix edits the original file (via writeFile in callers), so
-* we always re-read it before each invocation.
-*/
-async function runVitestResolved(scriptPath, agentBrowserSession) {
-	const original = await readFile(scriptPath, "utf8");
-	if (!hasEnvRef(original)) return runVitest(scriptPath, agentBrowserSession);
-	const tmpPath = scriptPath.replace(/\.ts$/, ".__resolved.spec.ts");
-	await writeFile(tmpPath, resolveEnvRefs(original), "utf-8");
-	try {
-		const { exitCode, stdout, stderr } = await spawnVitestTeed([
-			"run",
-			"--config",
-			bundledVitestConfigPath(),
-			tmpPath
-		], agentBrowserSession ? { env: {
-			...process.env,
-			AGENT_BROWSER_SESSION: agentBrowserSession
-		} } : {});
-		return {
-			exitCode,
-			output: stdout + stderr,
-			currentScript: original
-		};
-	} finally {
-		await unlink(tmpPath).catch(() => {});
-	}
-}
-async function cleanupActions(actions, model) {
-	try {
-		const { result, isError } = await invokeClaudeStreaming({
-			prompt: buildCleanupPrompt(actions),
-			disableBuiltinTools: true,
-			maxTurns: 1,
-			model
-		}, () => {});
-		if (isError || !result) return actions;
-		const json = result.trim().replace(/^```(?:json)?\n?([\s\S]*?)\n?```$/, "$1").trim();
-		const parsed = JSON.parse(json);
-		if (Array.isArray(parsed) && parsed.length > 0) return parsed;
-	} catch {}
-	return actions;
-}
-//#endregion
-//#region src/claude/extract-json.ts
-/**
-* Pulls a JSON object out of a Claude completion. Accepts either a fenced
-* ```json block or a bare `{...}` payload that constitutes the whole reply.
-* Returns null when neither shape is present.
-*/
-function extractJsonBlock(text) {
-	const fenced = text.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
-	if (fenced && fenced[1]) return fenced[1].trim();
-	const trimmed = text.trim();
-	if (trimmed.startsWith("{") && trimmed.endsWith("}")) return trimmed;
-	return null;
-}
-//#endregion
-//#region src/prompts/draft.ts
-function buildNamingSystemPrompt() {
-	return `You name a new ccqa test case based on the user's intent and the existing feature tree.
-ccqa test cases live under \`.ccqa/features/<featureName>/test-cases/<specName>/test-spec.md\`.
-## Naming rules
-- featureName and specName are kebab-case ASCII (lowercase, words separated by '-').
-- featureName: a broad area (e.g. "tasks", "auth", "billing", "search").
-- specName: a short scenario name (e.g. "create-and-complete", "login-with-email", "search-by-tag").
-- Reuse existing featureName when the user's intent fits an existing area. Only invent a new featureName when the existing tree clearly does not cover the area.
-- specName must NOT collide with an existing spec under the chosen feature. If the natural name collides, pick a different one that distinguishes the new scenario from the existing ones.
-- Use the codebase (Read/Grep/Glob) sparingly to confirm domain vocabulary if helpful. Do not over-explore.
-## Output (STRICT)
-Output ONE fenced \`\`\`json block, nothing else outside it:
-{
-  "featureName": "<kebab-case>",
-  "specName": "<kebab-case>",
-  "reason": "<one short sentence: why this name and how it relates to existing specs>"
-}
-`;
+`;
 }
 function buildNamingPrompt(intent, tree) {
 	return `## User intent
@@ -2970,7 +2972,7 @@ ${intent}
 ## Existing feature tree
 ${tree.length === 0 ? "(no existing features yet)" : tree.map((f) => {
-		const specLines = f.specs.length === 0 ? "  (no specs yet)" : f.specs.map((s) => `  - ${s.specName}${s.title ? ` — ${s.title}` : ""}`).join("\n");
+		const specLines = f.specs.length === 0 ? "  (no specs yet)" : f.specs.map((s) => `  - ${s.specName}`).join("\n");
 		return `- ${f.featureName}/\n${specLines}`;
 	}).join("\n")}
@@ -2979,48 +2981,58 @@ ${tree.length === 0 ? "(no existing features yet)" : tree.map((f) => {
 Pick featureName and specName for the new test case. Follow the naming rules. Avoid colliding with any existing specName under the chosen feature.
 `;
 }
-function buildDraftSystemPrompt() {
-	return `You are a QA engineer drafting and refining a ccqa test-spec.md.
+function buildDraftSystemPrompt(blocks) {
+	return `You are a QA engineer drafting and refining a ccqa spec.yaml.
 The CLI runs you in a loop: each turn the user gives an intent (first run) or a refinement instruction (later runs). You read the codebase, validate the spec, and return a single JSON report. The CLI displays a diff and asks the user whether to apply.
-## test-spec.md format (STRICT)
+## spec.yaml format (STRICT)
-YAML frontmatter + Markdown body.
+Pure YAML — no markdown body, no frontmatter dashes.
-Frontmatter fields:
-- title: string (required)
-- baseUrl: string (required, e.g. http://localhost:3000)
-- prerequisites: string (optional, free text)
-- setups: array of { name: string, params?: Record<string,string> } (optional)
-- relatedPaths: array of string (optional) — glob patterns identifying source files this spec depends on. Used by \`ccqa drift --changed\` in CI to skip drift checks for unrelated changes.
+Top-level fields:
+- \`title\`: string (required) — short human-readable name for the test
+- \`relatedPaths\`: array of glob string (optional) — source files this spec depends on, used by \`ccqa drift --changed\`
+- \`steps\`: array (required, at least one)
-Body must contain a \`## Steps\` section followed by step blocks:
+A step is one of two shapes:
+**Action step** — a user-facing browser interaction:
+\`\`\`yaml
+- instruction: <imperative; include the URL directly or via \${ENV_VAR}>
+  expected: <observable outcome — visible text, URL pattern, element state>
 \`\`\`
-### Step 1: <short title>
-- **Instruction**: <imperative, one sentence>
-- **Expected**: <observable outcome>
-### Step 2: <short title>
-...
+**Include step** — invoke a reusable block from \`.ccqa/blocks/<name>/spec.yaml\`:
+\`\`\`yaml
+- include: <block-name>
+  params:
+    <param-name>: <string value, can use \${ENV_VAR}>
 \`\`\`
+## URLs
+Each step writes the URL it opens directly inside \`instruction\` (e.g. \`"\${APP_URL}/articles を開く"\`). Use \`\${ENV_VAR}\` references for environment-specific values.
+## Available blocks
+${formatBlockList(blocks)}
 ## Quality rules
 - One user-facing action per step (login, click, fill, navigate, ...).
-- **Expected** must be assertion-friendly: visible text, URL pattern, element state.
-- Forbidden in **Expected**: timestamps, exact counts, session IDs, internal state.
+- \`expected\` must be assertion-friendly: visible text, URL pattern, element state.
+- Forbidden in \`expected\`: timestamps, exact counts, session IDs, internal state.
 - 3–8 steps is typical. Fewer means too coarse; more means too fine.
 ## Workflow (use Read / Grep / Glob extensively)
-1. Read the codebase under cwd to find concrete strings: routes, button labels, aria-labels, page titles, placeholders. Use those exact strings in **Expected**.
-2. If the spec references setups, Read \`.ccqa/setups/<name>/setup-spec.md\` and verify each \`params\` key matches the setup's \`placeholders\`.
-3. Populate \`relatedPaths\` in the frontmatter with **provisional** glob patterns pointing at the source files this spec touches: the route/page file for each URL the spec visits, plus the component files (or their parent feature directory) that render the aria-labels, placeholders, or visible texts the spec asserts on. Prefer directory globs (e.g. \`src/features/tasks/**\`) when several files in one area are involved. Be conservative — include a path if you're unsure rather than omit it. \`ccqa trace\` will refine this list later from real browser observations.
+1. Read the codebase under cwd to find concrete strings: routes, button labels, aria-labels, page titles, placeholders. Use those exact strings in \`expected\`.
+2. If you use \`include:\` steps, verify each \`params\` key matches a declared param of the block (see the Available blocks list above).
+3. Populate \`relatedPaths\` with **provisional** glob patterns pointing at the source files this spec touches: the route/page file for each URL the spec visits, plus the component files (or their parent feature directory) that render the aria-labels, placeholders, or visible texts the spec asserts on. Prefer directory globs (e.g. \`src/features/tasks/**\`) when several files in one area are involved. Be conservative — include a path if you're unsure rather than omit it. \`ccqa trace\` will refine this list later from real browser observations.
 4. Validate the (current or proposed) spec on four axes — emit one issue per finding:
-   - **assertable**: each Expected can be verified against a string/URL/state that exists in code.
-   - **setups**: referenced setup exists; params keys match placeholders.
+   - **assertable**: each \`expected\` can be verified against a string/URL/state that exists in code.
+   - **blocks**: every \`include\` resolves to a real block; every \`params\` key is declared on that block; every required param is provided.
    - **granularity**: not too coarse (multiple actions per step) nor too fine (snapshot-only steps); order is logical.
    - **unimplemented**: any feature mentioned in the spec that you cannot find in code.
@@ -3035,13 +3047,13 @@ Schema:
   "issues": [
     {
       "severity": "OK" | "WARN" | "ERROR",
-      "category": "assertable" | "setups" | "granularity" | "unimplemented",
+      "category": "assertable" | "blocks" | "granularity" | "unimplemented",
       "stepId": "step-01" | null,
       "message": "<one-line summary>",
       "detail": "<optional, multiline explanation>"
     }
   ],
-  "patch": "<COMPLETE rewritten test-spec.md, or empty string if no changes>"
+  "patch": "<COMPLETE rewritten spec.yaml, or empty string if no changes>"
 }
 \`\`\`
@@ -3049,17 +3061,25 @@ Schema:
 - \`patch\` must be the COMPLETE file content if non-empty (never a diff fragment).
 - The CLI replaces the file atomically with \`patch\`.
+- The patch must be valid YAML matching the schema above. The CLI re-parses it before applying; if it fails validation, the patch is rejected.
 - For **create** mode: produce a fresh spec from the user intent.
 - For **refine** mode with a non-empty user instruction: apply the user's request, plus fix any issues it introduces. Preserve the user's wording elsewhere.
 - For **refine** mode with an empty user instruction: only fix issues you find against the current spec; if everything is fine, return \`patch: ""\`.
 - If \`patch\` is the same as the current spec, return \`patch: ""\` instead.
 `;
 }
+function formatBlockList(blocks) {
+	if (blocks.length === 0) return "(no blocks defined yet — only action steps are available.)";
+	return blocks.map((b) => {
+		const paramLines = b.params.length === 0 ? "    params: (none)" : b.params.map((p) => `    - ${p.name}${p.required ? "" : " (optional)"}${p.secret ? " [secret]" : ""}`).join("\n");
+		return `- \`${b.name}\` — ${b.title}\n${paramLines}`;
+	}).join("\n");
+}
 function buildDraftPrompt(input) {
 	const { mode, existing, userInput } = input;
 	if (mode === "create") return `## Mode
-create — no spec exists yet at the target path. Produce a fresh test-spec.md.
+create — no spec exists yet at the target path. Produce a fresh spec.yaml.
 ## User intent
@@ -3067,7 +3087,7 @@ ${userInput}
 ## Task
-Read the codebase under cwd. Discover concrete strings (routes, labels, titles). Produce a complete test-spec.md as the \`patch\` field, plus any issues you'd flag about your own draft.
+Read the codebase under cwd. Discover concrete strings (routes, labels, titles). Produce a complete spec.yaml as the \`patch\` field, plus any issues you'd flag about your own draft.
 `;
 	return `## Mode
@@ -3075,47 +3095,76 @@ refine — a spec already exists. Apply the user's instruction (if any) and vali
 ## Current spec
-\`\`\`markdown
+\`\`\`yaml
 ${existing}\`\`\`
-${userInput ? `## User refinement instruction\n\n${userInput}\n` : `## User refinement instruction\n\n(empty — re-validate the current spec against the codebase; only emit a non-empty patch if something is actually wrong)\n`}
-## Task
+${userInput ? `## User refinement instruction\n\n${userInput}\n` : `## User refinement instruction\n\n(empty — re-validate the current spec against the codebase; only emit a non-empty patch if something is actually wrong)\n`}## Task
-1. Read the codebase under cwd and any referenced setups.
+1. Read the codebase under cwd and any referenced blocks (\`.ccqa/blocks/<name>/spec.yaml\`).
 2. If the user's instruction is non-empty, apply it to the spec.
 3. Validate the resulting spec on the four axes. Emit issues.
 4. Return the complete updated spec as \`patch\`. If no changes are needed, return \`patch: ""\`.
 `;
 }
 //#endregion
+//#region src/prompts/drift.ts
+function buildDriftSystemPrompt(blocks) {
+	return `${buildDraftSystemPrompt(blocks)}
+## Drift mode
+You are running non-interactively in CI. The user will not see or apply the patch — only the \`issues\` array.
+- Always set \`patch\` to "" in your response.
+- Focus issue messages on what is **out of sync** between the spec and the current codebase: missing aria-labels, renamed routes, removed buttons, placeholders that no longer exist, include references that point to non-existent blocks.
+- Do NOT raise issues about stylistic preferences in the spec wording.
+- Treat \`category: unimplemented\` as the primary signal for drift: anything the spec asserts that you cannot find in code is a drift finding.
+## Drift severity policy (STRICT)
+The CLI exits non-zero when any issue has \`severity: "ERROR"\` (default) or — with \`--severity warn\` — when any \`WARN\` is present. Pick severity by **whether a deterministic replay of this spec would fail today**, not by how confident you are in your own analysis.
+### CRITICAL: spec ↔ source mismatch is ERROR, not "vague phrasing" WARN
+The most common false negative is treating a concrete spec/source mismatch as a WARN about "expected phrasing." It is not. Apply this decision rule **before** picking severity:
+1. **Pick the concrete strings the spec asserts** in each step's \`expected\` (visible text, aria-labels, button labels, route paths). For \`expected\` like "the Dashboard page is visible", the spec is asserting that the literal string "Dashboard" — or the page conceptually identified by that label — is rendered.
+2. **Search the source** for those exact strings (\`Grep\` / \`Read\`) at the location the step references (the relevant page/component/route).
+3. Classify:
+   - **ERROR** — the source instead renders a *different* string in that location (e.g. spec says "Dashboard", the breadcrumb in \`DashboardPage.tsx\` now renders "Overview"). A replay against the current source would fail; a replay against a stale staging environment would pass and *hide* the drift — exactly the case drift CI exists to catch. Cite both sides in \`detail\`: the spec line and the file:line of the source mismatch.
+   - **WARN (vague phrasing)** — the source's actual string IS present somewhere relevant; the \`expected\` just paraphrases it more loosely (e.g. spec says "the Save button is visible" and the source has both visible "Save" text and \`aria-label="Save"\`). Replay still passes; the spec could just be tightened.
+   - **OK** — the spec's exact string appears in source at the relevant location.
+Use **ERROR** when the spec would break on replay:
+- A selector the spec relies on (\`aria-label\`, \`placeholder\`, \`data-testid\`, button text) **does not exist anywhere in the source**.
+- A URL / route the spec navigates to is no longer defined.
+- An \`expected\` asserts a string or visible text that is no longer rendered by the relevant component.
+- The source renders a *different* string in the place the spec describes (per the decision rule above).
+- An \`include\` step references a block that does not exist under \`.ccqa/blocks/<name>/spec.yaml\`, or a \`params\` key is not declared on that block.
+- The spec references a feature/page that has been removed from the codebase.
+Use **WARN** when the spec is still likely to work, but quality could improve:
+- The \`expected\` paraphrases a string that **still exists** in source (the literal target is findable, just imprecisely worded).
+- A step bundles multiple actions, or a needed intermediate verification step is missing.
+- Stable signals exist that the spec could leverage but currently doesn't.
+- You are unsure whether a referenced string exists (give the user the benefit of the doubt; do not hard-fail CI on uncertainty).
+Use **OK** for axes you actively verified and found no issue.
+If you cannot decide between ERROR and WARN, choose WARN. Reserve ERROR for findings you can back up with a specific file path or grep result that proves the drift.
+Conversely: when you DO have a citation showing a concrete spec/source mismatch (per the decision rule above), you MUST use ERROR — "vague phrasing" WARN is not a safe fallback for an actual drift.
+`;
+}
+function buildDriftUserPrompt(existing) {
+	return buildDraftPrompt({
+		mode: "refine",
+		existing,
+		userInput: ""
+	});
+}
+//#endregion
 //#region src/types.ts
-const TestStepSchema = z.object({
-	id: z.string(),
-	title: z.string(),
-	instruction: z.string(),
-	expected: z.string()
-});
-const SetupRefSchema = z.object({
-	name: z.string(),
-	params: z.record(z.string(), z.string()).optional()
-});
-z.object({
-	title: z.string(),
-	baseUrl: z.string(),
-	prerequisites: z.string().optional(),
-	setups: z.array(SetupRefSchema).optional(),
-	relatedPaths: z.array(z.string()).optional(),
-	steps: z.array(TestStepSchema)
-});
-const PlaceholderDefSchema = z.object({
-	dummy: z.string(),
-	description: z.string().optional()
-});
-z.object({
-	title: z.string(),
-	placeholders: z.record(z.string(), PlaceholderDefSchema).optional(),
-	steps: z.array(TestStepSchema)
-});
 const RouteStepSchema = z.object({
 	title: z.string(),
 	action: z.string(),
@@ -3141,7 +3190,7 @@ const DraftIssueSchema = z.object({
 	]),
 	category: z.enum([
 		"assertable",
-		"setups",
+		"blocks",
 		"granularity",
 		"unimplemented"
 	]),
@@ -3153,20 +3202,499 @@ const DraftReportSchema = z.object({
 	issues: z.array(DraftIssueSchema),
 	patch: z.string()
 });
+const DRAFT_CATEGORY_LABEL = {
+	assertable: "Assertability",
+	blocks: "Block references",
+	granularity: "Step granularity",
+	unimplemented: "Unimplemented checks"
+};
 const DraftNamingSchema = z.object({
 	featureName: z.string().min(1),
 	specName: z.string().min(1),
 	reason: z.string().optional()
 });
 //#endregion
+//#region src/drift/analyze.ts
+const DEFAULT_CONCURRENCY$1 = 3;
+/**
+* Run drift checks against a list of pre-collected targets. Pure library
+* function: no commander, no process.exit, no stdout writes. Callers handle
+* presentation. `cli/drift` does the full sweep with `--changed` scoping;
+* `cli/run` calls this with just the failing specs after vitest.
+*/
+async function analyzeDrift(input) {
+	const { targets, cwd, blocks, concurrency = DEFAULT_CONCURRENCY$1, model, onSpecStart } = input;
+	const results = new Array(targets.length);
+	let cursor = 0;
+	const worker = async () => {
+		while (true) {
+			const idx = cursor++;
+			if (idx >= targets.length) return;
+			const target = targets[idx];
+			onSpecStart?.(target);
+			results[idx] = await checkSpec(target, {
+				cwd,
+				blocks,
+				model
+			});
+		}
+	};
+	const pool = Array.from({ length: Math.min(concurrency, targets.length) }, () => worker());
+	await Promise.all(pool);
+	return results;
+}
+async function checkSpec(target, opts) {
+	const { featureName, specName } = target;
+	const existing = await tryReadSpecFile(featureName, specName, opts.cwd);
+	if (existing === null) return {
+		target,
+		ok: false,
+		issues: [],
+		error: `spec file disappeared after enumeration: ${featureName}/${specName}`
+	};
+	const { result, isError } = await invokeClaudeStreaming({
+		prompt: buildDriftUserPrompt(existing),
+		systemPrompt: buildDriftSystemPrompt(opts.blocks),
+		allowedTools: [
+			"Read",
+			"Grep",
+			"Glob"
+		],
+		silenceBashLog: true,
+		cwd: opts.cwd,
+		...opts.model ? { model: opts.model } : {}
+	}, (_msg) => {});
+	if (isError) return {
+		target,
+		ok: false,
+		issues: [],
+		error: "Claude returned an error result"
+	};
+	const json = extractJsonBlock(result);
+	if (!json) return {
+		target,
+		ok: false,
+		issues: [],
+		error: "Claude did not return a json block"
+	};
+	let report;
+	try {
+		report = DraftReportSchema.parse(JSON.parse(json));
+	} catch (e) {
+		return {
+			target,
+			ok: false,
+			issues: [],
+			error: `failed to parse drift report: ${e.message}`
+		};
+	}
+	return {
+		target,
+		ok: true,
+		issues: report.issues
+	};
+}
+//#endregion
+//#region src/drift/format.ts
+/**
+* Render drift results as a string. The CLI commands and the `run` failure
+* hook are the only callers; both want the formatted output returned so
+* they can prefix / interleave / pipe it as needed.
+*/
+function renderDrift(results, format, cwd) {
+	if (format === "json") return renderJson(results);
+	if (format === "github") return renderGithub(results, cwd);
+	return renderText(results);
+}
+const HEAVY_RULE = "═".repeat(72);
+function renderText(results) {
+	const out = [];
+	for (const r of results) {
+		out.push("");
+		const heading = `══ ${r.target.featureName}/${r.target.specName} `;
+		const tail = "═".repeat(Math.max(3, 72 - heading.length));
+		out.push(`${heading}${tail}`);
+		if (r.error) {
+			out.push(`  ERROR  ${r.error}`);
+			continue;
+		}
+		const errors = r.issues.filter((i) => i.severity === "ERROR");
+		const warnings = r.issues.filter((i) => i.severity === "WARN");
+		const passed = r.issues.filter((i) => i.severity === "OK");
+		if (errors.length === 0 && warnings.length === 0) {
+			const label = passed.length === 1 ? "check" : "checks";
+			const detail = passed.length > 0 ? `all ${passed.length} ${label} passed` : "no issues";
+			out.push(`  ✓  ${detail}`);
+			continue;
+		}
+		for (const issue of errors) appendFinding(out, "ERROR", issue);
+		for (const issue of warnings) appendFinding(out, "WARN", issue);
+		if (passed.length > 0) {
+			const names = passed.map((i) => DRAFT_CATEGORY_LABEL[i.category]).join(", ");
+			out.push("");
+			out.push(`  ✓  passed (${passed.length}): ${names}`);
+		}
+	}
+	out.push("");
+	out.push(HEAVY_RULE);
+	const totals = summarize(results);
+	out.push(`  specs    ${results.length} (${totals.errored} errored)`);
+	out.push(`  findings ${totals.error} error, ${totals.warn} warn, ${totals.ok} ok`);
+	out.push("");
+	return out.join("\n");
+}
+function appendFinding(out, level, issue) {
+	const stepPart = issue.stepId ? ` ${issue.stepId}` : "";
+	out.push("");
+	out.push(`  ${level}  ${DRAFT_CATEGORY_LABEL[issue.category]}${stepPart}`);
+	out.push(`    ${issue.message}`);
+	if (issue.detail) out.push(`    └ ${issue.detail.replace(/\n/g, "\n      ")}`);
+}
+function renderJson(results) {
+	const payload = { specs: results.map((r) => ({
+		feature: r.target.featureName,
+		spec: r.target.specName,
+		ok: r.ok,
+		...r.error ? { error: r.error } : {},
+		issues: r.issues.map((i) => ({
+			severity: i.severity,
+			category: i.category,
+			stepId: i.stepId,
+			message: i.message,
+			...i.detail ? { detail: i.detail } : {}
+		}))
+	})) };
+	return `${JSON.stringify(payload, null, 2)}\n`;
+}
+function renderGithub(results, cwd) {
+	const repoRoot = process.env["GITHUB_WORKSPACE"] ?? process.cwd();
+	const lines = [];
+	for (const r of results) {
+		const file = githubRelPath(cwd, repoRoot, r.target.featureName, r.target.specName);
+		if (r.error) {
+			lines.push(`::error file=${file}::${escapeGhMessage(r.error)}`);
+			continue;
+		}
+		for (const issue of r.issues) {
+			if (issue.severity === "OK") continue;
+			const level = issue.severity === "ERROR" ? "error" : "warning";
+			const title = `${r.target.featureName}/${r.target.specName} — ${issue.category}${issue.stepId ? ` (${issue.stepId})` : ""}`;
+			const body = issue.detail ? `${issue.message}\n${issue.detail}` : issue.message;
+			lines.push(`::${level} file=${file},title=${escapeGhProp(title)}::${escapeGhMessage(body)}`);
+		}
+	}
+	return lines.length === 0 ? "" : `${lines.join("\n")}\n`;
+}
+function githubRelPath(cwd, repoRoot, featureName, specName) {
+	const abs = resolve(cwd, ".ccqa", "features", featureName, "test-cases", specName, "spec.yaml");
+	const rel = relative(repoRoot, abs);
+	return rel.startsWith("..") ? abs : rel;
+}
+function escapeGhMessage(s) {
+	return s.replace(/%/g, "%25").replace(/\r/g, "%0D").replace(/\n/g, "%0A");
+}
+function escapeGhProp(s) {
+	return s.replace(/%/g, "%25").replace(/\r/g, "%0D").replace(/\n/g, "%0A").replace(/,/g, "%2C").replace(/:/g, "%3A");
+}
+function summarize(results) {
+	let error = 0;
+	let warn = 0;
+	let ok = 0;
+	let errored = 0;
+	for (const r of results) {
+		if (r.error) errored++;
+		for (const issue of r.issues) if (issue.severity === "ERROR") error++;
+		else if (issue.severity === "WARN") warn++;
+		else ok++;
+	}
+	return {
+		error,
+		warn,
+		ok,
+		errored
+	};
+}
+//#endregion
+//#region src/drift/exit-code.ts
+/**
+* Map drift results to an exit code. Spec-level errors (Claude call failed)
+* always fail; otherwise ERROR severity always fails, WARN fails only when
+* the threshold is `warn`.
+*/
+function determineExitCode(results, threshold) {
+	for (const r of results) {
+		if (r.error) return 1;
+		for (const issue of r.issues) {
+			if (issue.severity === "ERROR") return 1;
+			if (threshold === "warn" && issue.severity === "WARN") return 1;
+		}
+	}
+	return 0;
+}
+//#endregion
+//#region src/drift/auth.ts
+/**
+* Probe whether the host has any credential the Anthropic SDK can pick up:
+*   1. ANTHROPIC_API_KEY env var (CI / scripted use)
+*   2. ~/.claude/.credentials.json (local Claude Code login)
+*
+* `run --drift` is opt-in, so the caller will only consult this after the
+* user has asked for drift. We never throw — auth absence is a normal flow
+* that surfaces as "drift analysis skipped".
+*/
+function driftAuthAvailable() {
+	const key = process.env["ANTHROPIC_API_KEY"];
+	if (typeof key === "string" && key.length > 0) return { ok: true };
+	if (existsSync(join(homedir(), ".claude", ".credentials.json"))) return { ok: true };
+	return {
+		ok: false,
+		reason: "no ANTHROPIC_API_KEY / claude login"
+	};
+}
+//#endregion
+//#region src/cli/run.ts
+const USER_VITEST_CONFIG = resolve(".ccqa/vitest.config.ts");
+async function resolveVitestConfig() {
+	try {
+		await access(USER_VITEST_CONFIG);
+		return USER_VITEST_CONFIG;
+	} catch {
+		return bundledVitestConfigPath();
+	}
+}
+const runCommand = new Command("run").argument("[target]", "Spec to run: '<feature>/<spec>', '<feature>', or omit for all").description("Run generated agent-browser test scripts. Pass --drift to invoke a Claude-driven drift analysis on each failing spec (skipped silently when no test fails). Requires ANTHROPIC_API_KEY or a local Claude login.").option("--drift", "On vitest failure, run drift analysis on the failing specs").option("--drift-strict", "Treat drift ERROR findings as a run failure (exit 1 even if vitest passed). Implies --drift.").option("--format <fmt>", "Output format for the drift block: text | json | github", "text").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Used by --drift only. Overrides CCQA_MODEL.").action(async (target, opts) => {
+	await runTests(target, opts);
+});
+async function runTests(target, opts) {
+	header("run", target);
+	const specs = await resolveSpecs(target);
+	if (specs.length === 0) {
+		error("no test scripts found");
+		hint("run 'ccqa generate <feature>/<spec>' first to generate tests");
+		process.exit(1);
+	}
+	const tmpDir = await mkdtemp(join(tmpdir(), "ccqa-run-"));
+	const summaries = [];
+	let overallExitCode = 0;
+	const vitestConfig = await resolveVitestConfig();
+	try {
+		for (let i = 0; i < specs.length; i++) {
+			const { featureName, specName } = specs[i];
+			const scriptFile = await getTestScript(featureName, specName);
+			if (!scriptFile) {
+				warn(`${featureName}/${specName}: no test.spec.ts found`);
+				continue;
+			}
+			run(`${featureName}/${specName}`);
+			meta("test", scriptFile);
+			blank();
+			const reportFile = join(tmpDir, `report-${i}.json`);
+			const proc = spawnVitestStreaming([
+				"run",
+				"--config",
+				vitestConfig,
+				scriptFile,
+				"--reporter=json",
+				`--outputFile.json=${reportFile}`
+			]);
+			await Promise.all([streamFiltered(proc.stdout, process.stdout), streamFiltered(proc.stderr, process.stderr)]);
+			const exitCode = await proc.exited;
+			if (exitCode !== 0) overallExitCode = exitCode;
+			const report = await readReport(reportFile);
+			summaries.push({
+				featureName,
+				specName,
+				scriptFile,
+				report,
+				exitCode
+			});
+			blank();
+		}
+		printSummary(summaries);
+		overallExitCode = await maybeRunDrift(summaries, opts, overallExitCode);
+	} finally {
+		await rm(tmpDir, {
+			recursive: true,
+			force: true
+		});
+	}
+	process.exit(overallExitCode);
+}
+function failedSpec(s) {
+	if (s.exitCode !== 0) return true;
+	return (s.report?.numFailedTests ?? 0) > 0;
+}
+function parseDriftFormat(raw) {
+	const v = raw ?? "text";
+	if (v === "text" || v === "json" || v === "github") return v;
+	error(`invalid --format: ${v} (expected text|json|github)`);
+	process.exit(2);
+}
+/**
+* Choose which specs to drift-check. `--drift` is a fail-supplement: only the
+* specs that failed get a drift analysis (the goal is to *explain* a vitest
+* failure). `--drift-strict` is an audit: even passing specs are checked,
+* because the CI need is "fail loud if the spec lags behind the source",
+* which can absolutely happen while vitest is still green against a stale
+* staging environment.
+*/
+function selectDriftTargets(summaries, opts) {
+	if (opts.driftStrict) return summaries;
+	if (opts.drift) return summaries.filter(failedSpec);
+	return [];
+}
+/**
+* Opt-in post-vitest drift hook. With `--drift`, fires only when at least
+* one spec failed (supplemental signal). With `--drift-strict`, fires
+* unconditionally so a spec/source divergence is caught even when vitest
+* passed. Skips silently when auth is unavailable so the run's exit code
+* is determined by vitest alone.
+*/
+async function maybeRunDrift(summaries, opts, currentExitCode) {
+	const candidates = selectDriftTargets(summaries, opts);
+	if (candidates.length === 0) return currentExitCode;
+	const auth = driftAuthAvailable();
+	if (!auth.ok) {
+		info(`drift analysis skipped (${auth.reason})`);
+		return currentExitCode;
+	}
+	const format = parseDriftFormat(opts.format);
+	const cwd = process.cwd();
+	const tree = await listFeatureTree(cwd);
+	const targets = candidates.map((s) => {
+		const spec = tree.find((f) => f.featureName === s.featureName)?.specs.find((sp) => sp.specName === s.specName);
+		if (!spec) return null;
+		const t = {
+			featureName: s.featureName,
+			specName: s.specName
+		};
+		if (spec.relatedPaths) t.relatedPaths = spec.relatedPaths;
+		if (spec.includedBlocks) t.includedBlocks = spec.includedBlocks;
+		return t;
+	}).filter((t) => t !== null);
+	if (targets.length === 0) {
+		info("drift analysis skipped (no spec.yaml found for failing specs)");
+		return currentExitCode;
+	}
+	const results = await analyzeDrift({
+		targets,
+		cwd,
+		blocks: await loadAvailableBlocks(cwd),
+		concurrency: Math.min(3, targets.length),
+		...opts.model ? { model: opts.model } : {},
+		onSpecStart: (t) => {
+			if (format === "text") info(`drift: checking ${t.featureName}/${t.specName}`);
+		}
+	});
+	if (format === "text") process.stdout.write(`\n${C.cyan}${C.bold}──────── drift analysis ────────${C.reset}\n`);
+	process.stdout.write(renderDrift(results, format, cwd));
+	if (opts.driftStrict && determineExitCode(results, "error") !== 0) return currentExitCode || 1;
+	return currentExitCode;
+}
+async function readReport(path) {
+	try {
+		const raw = await readFile(path, "utf8");
+		return JSON.parse(raw);
+	} catch {
+		return null;
+	}
+}
+const useColor = process.stdout.isTTY && process.env.NO_COLOR == null;
+const C = {
+	reset: useColor ? "\x1B[0m" : "",
+	bold: useColor ? "\x1B[1m" : "",
+	dim: useColor ? "\x1B[2m" : "",
+	green: useColor ? "\x1B[32m" : "",
+	red: useColor ? "\x1B[31m" : "",
+	yellow: useColor ? "\x1B[33m" : "",
+	cyan: useColor ? "\x1B[36m" : "",
+	gray: useColor ? "\x1B[90m" : ""
+};
+function printSummary(summaries) {
+	process.stdout.write(`\n${C.cyan}${C.bold}──────── ccqa summary ────────${C.reset}\n\n`);
+	let totalTests = 0;
+	let totalPassed = 0;
+	let totalFailed = 0;
+	let totalSkipped = 0;
+	for (const s of summaries) {
+		const header = `${C.bold}${s.featureName}/${s.specName}${C.reset}`;
+		if (!s.report) {
+			const icon = s.exitCode === 0 ? `${C.green}✔${C.reset}` : `${C.red}✖${C.reset}`;
+			process.stdout.write(`${icon} ${header} ${C.dim}(no report)${C.reset}\n`);
+			continue;
+		}
+		totalTests += s.report.numTotalTests;
+		totalPassed += s.report.numPassedTests;
+		totalFailed += s.report.numFailedTests;
+		totalSkipped += s.report.numPendingTests;
+		const ok = s.report.success;
+		const icon = ok ? `${C.green}✔${C.reset}` : `${C.red}✖${C.reset}`;
+		const countColor = ok ? C.green : C.red;
+		process.stdout.write(`${icon} ${header}  ${countColor}${s.report.numPassedTests}/${s.report.numTotalTests}${C.reset} ${C.dim}passed${C.reset}\n`);
+		for (const file of s.report.testResults) for (const a of file.assertionResults) {
+			const aIcon = assertionIcon(a.status);
+			const dur = a.duration != null ? ` ${C.gray}${formatDuration(a.duration)}${C.reset}` : "";
+			process.stdout.write(`    ${aIcon} ${a.fullName}${dur}\n`);
+			if (a.status === "failed" && a.failureMessages?.length) for (const msg of a.failureMessages) {
+				const firstLine = msg.split("\n")[0] ?? msg;
+				process.stdout.write(`        ${C.red}${firstLine}${C.reset}\n`);
+			}
+		}
+	}
+	const specsPassed = summaries.filter((s) => s.exitCode === 0).length;
+	const specsFailed = summaries.filter((s) => s.exitCode !== 0).length;
+	process.stdout.write("\n");
+	process.stdout.write(`  ${C.bold}Specs${C.reset}   ${summaries.length}  (${C.green}${specsPassed} passed${C.reset}, ${specsFailed > 0 ? C.red : C.dim}${specsFailed} failed${C.reset})\n`);
+	process.stdout.write(`  ${C.bold}Tests${C.reset}   ${totalTests}  (${C.green}${totalPassed} passed${C.reset}, ${totalFailed > 0 ? C.red : C.dim}${totalFailed} failed${C.reset}, ${C.yellow}${totalSkipped} skipped${C.reset})\n`);
+	process.stdout.write("\n");
+}
+function assertionIcon(status) {
+	switch (status) {
+		case "passed": return `${C.green}✔${C.reset}`;
+		case "failed": return `${C.red}✖${C.reset}`;
+		case "skipped":
+		case "pending":
+		case "todo": return `${C.yellow}◌${C.reset}`;
+	}
+}
+function formatDuration(ms) {
+	if (ms < 1e3) return `${Math.round(ms)}ms`;
+	return `${(ms / 1e3).toFixed(2)}s`;
+}
+const NOISE_LINE_PATTERNS = [/^JSON report written to /];
+async function streamFiltered(source, sink) {
+	source.setEncoding("utf8");
+	let buffer = "";
+	for await (const chunk of source) {
+		buffer += chunk;
+		let nl = buffer.indexOf("\n");
+		while (nl !== -1) {
+			const line = buffer.slice(0, nl);
+			buffer = buffer.slice(nl + 1);
+			if (!NOISE_LINE_PATTERNS.some((p) => p.test(line))) sink.write(line + "\n");
+			nl = buffer.indexOf("\n");
+		}
+	}
+	if (buffer.length > 0 && !NOISE_LINE_PATTERNS.some((p) => p.test(buffer))) sink.write(buffer);
+}
+async function resolveSpecs(target) {
+	if (!target) return listAllSpecs();
+	if (target.includes("/")) {
+		const { featureName, specName } = parseSpecPath(target);
+		return [{
+			featureName,
+			specName
+		}];
+	}
+	return (await listSpecsForFeature(target)).map((specName) => ({
+		featureName: target,
+		specName
+	}));
+}
+//#endregion
 //#region src/cli/draft.ts
-const CATEGORY_LABEL$1 = {
-	assertable: "Assertability",
-	setups: "Setup references",
-	granularity: "Step granularity",
-	unimplemented: "Unimplemented checks"
-};
-const draftCommand = new Command("draft").argument("[feature/spec]", "Optional spec path (e.g. tasks/create-and-complete). If omitted, Claude proposes one from your intent.").description("Interactively draft and refine a test-spec.md with Claude Code").option("--instruction <text>", "Non-interactive single-shot instruction (skips the interactive loop)").option("--apply", "Auto-apply each generated patch without [y/N] confirmation", false).action(async (specPath, opts) => {
+const CATEGORY_LABEL = DRAFT_CATEGORY_LABEL;
+const draftCommand = new Command("draft").argument("[feature/spec]", "Optional spec path (e.g. tasks/create-and-complete). If omitted, Claude proposes one from your intent.").description("Interactively draft and refine a spec.yaml with Claude Code").option("--instruction <text>", "Non-interactive single-shot instruction (skips the interactive loop)").option("--apply", "Auto-apply each generated patch without [y/N] confirmation", false).action(async (specPath, opts) => {
 	await ensureCcqaDir();
 	let featureName;
 	let specName;
@@ -3216,7 +3744,7 @@ async function runDraft(featureName, specName, opts, prefilledIntent) {
 async function runOneTurn(input) {
 	const { featureName, specName, existing, userInput, autoApply } = input;
 	const isFirstRun = existing === null;
-	const systemPrompt = buildDraftSystemPrompt();
+	const systemPrompt = buildDraftSystemPrompt(await loadAvailableBlocks());
 	const userPrompt = buildDraftPrompt({
 		mode: isFirstRun ? "create" : "refine",
 		existing: existing ?? "",
@@ -3349,24 +3877,24 @@ function printReviewBlock(issues) {
 	}
 	if (errors.length) {
 		process.stdout.write(`  ERRORS (${errors.length})\n`);
-		for (const issue of errors) writeFinding$1(issue);
+		for (const issue of errors) writeFinding(issue);
 		process.stdout.write("\n");
 	}
 	if (warnings.length) {
 		process.stdout.write(`  WARNINGS (${warnings.length})\n`);
-		for (const issue of warnings) writeFinding$1(issue);
+		for (const issue of warnings) writeFinding(issue);
 		process.stdout.write("\n");
 	}
 	if (passed.length) {
-		const names = passed.map((i) => CATEGORY_LABEL$1[i.category]).join(", ");
+		const names = passed.map((i) => CATEGORY_LABEL[i.category]).join(", ");
 		process.stdout.write(`  PASSED (${passed.length})\n    ${names}\n`);
 	}
 	process.stdout.write(`\n${RULE}\n\n`);
 	return errors.length > 0;
 }
-function writeFinding$1(issue) {
+function writeFinding(issue) {
 	const stepPart = issue.stepId ? `  ${issue.stepId}` : "";
-	process.stdout.write(`    ${CATEGORY_LABEL$1[issue.category]}${stepPart}\n`);
+	process.stdout.write(`    ${CATEGORY_LABEL[issue.category]}${stepPart}\n`);
 	process.stdout.write(`      ${issue.message}\n`);
 	if (issue.detail) process.stdout.write(`      └ ${issue.detail.replace(/\n/g, "\n        ")}\n`);
 }
@@ -3380,10 +3908,7 @@ async function proposeNaming(opts) {
 	const tree = await listFeatureTree();
 	const treeForPrompt = tree.map((f) => ({
 		featureName: f.featureName,
-		specs: f.specs.map((s) => ({
-			specName: s.specName,
-			...s.title ? { title: s.title } : {}
-		}))
+		specs: f.specs.map((s) => ({ specName: s.specName }))
 	}));
 	info("Proposing a feature/spec name based on your intent...");
 	const { result, isError } = await invokeClaudeStreaming({
@@ -3533,49 +4058,6 @@ function truncate(s, n) {
 	return s.slice(s.length - n);
 }
 //#endregion
-//#region src/prompts/drift.ts
-function buildDriftSystemPrompt() {
-	return `${buildDraftSystemPrompt()}
-## Drift mode
-You are running non-interactively in CI. The user will not see or apply the patch — only the \`issues\` array.
-- Always set \`patch\` to "" in your response.
-- Focus issue messages on what is **out of sync** between the spec and the current codebase: missing aria-labels, renamed routes, removed buttons, placeholders that no longer exist, setup references that point to non-existent files.
-- Do NOT raise issues about stylistic preferences in the spec wording.
-- Treat \`category: unimplemented\` as the primary signal for drift: anything the spec asserts that you cannot find in code is a drift finding.
-## Drift severity policy (STRICT)
-The CLI exits non-zero when any issue has \`severity: "ERROR"\` (default) or — with \`--severity warn\` — when any \`WARN\` is present. Pick severity by **whether a deterministic replay of this spec would fail today**, not by how confident you are in your own analysis.
-Use **ERROR** when the spec would break on replay:
-- A selector the spec relies on (\`aria-label\`, \`placeholder\`, \`data-testid\`, button text) **does not exist anywhere in the source**.
-- A URL / route the spec navigates to is no longer defined.
-- An **Expected** asserts a string or visible text that is no longer rendered by the relevant component.
-- A \`setups[].name\` does not resolve to \`.ccqa/setups/<name>/setup-spec.md\`, or a \`params\` key is not declared in that setup's \`placeholders\`.
-- The spec references a feature/page that has been removed from the codebase.
-Use **WARN** when the spec is still likely to work, but quality could improve:
-- The Expected is vague ("a message appears") when a precise string exists in code.
-- A step bundles multiple actions, or a needed intermediate verification step is missing.
-- Stable signals exist that the spec could leverage but currently doesn't.
-- You are unsure whether a referenced string exists (give the user the benefit of the doubt; do not hard-fail CI on uncertainty).
-Use **OK** for axes you actively verified and found no issue.
-If you cannot decide between ERROR and WARN, choose WARN. Reserve ERROR for findings you can back up with a specific file path or grep result that proves the drift.
-`;
-}
-function buildDriftUserPrompt(existing) {
-	return buildDraftPrompt({
-		mode: "refine",
-		existing,
-		userInput: ""
-	});
-}
-//#endregion
 //#region src/drift/affected.ts
 const execFileP = promisify(execFile);
 /**
@@ -3595,10 +4077,10 @@ function resolveBaseRef(explicit) {
 * post-rename layout.
 *
 * Paths are re-rooted to be relative to `cwd`, not the git repo root. In a
-* monorepo where `cwd` is a sub-package (e.g. `js/apps/knowledge-webapp`),
-* git emits paths relative to the repo root, but specs declare relatedPaths
-* relative to their own package. Changes outside `cwd` are dropped so an
-* unrelated PR can never accidentally scope a sub-package's specs in.
+* monorepo where `cwd` is a sub-package (e.g. `apps/foo`), git emits paths
+* relative to the repo root, but specs declare relatedPaths relative to
+* their own package. Changes outside `cwd` are dropped so an unrelated PR
+* can never accidentally scope a sub-package's specs in.
 */
 async function getChangedFiles(base, cwd) {
 	const [{ stdout: rootOut }, { stdout: diffOut }] = await Promise.all([execFileP("git", ["rev-parse", "--show-toplevel"], { cwd }), execFileP("git", [
@@ -3837,9 +4319,8 @@ ${previews.map((p) => {
 ## Existing specs
 ${specs.map((s) => {
-		const title = s.title ? ` — ${s.title}` : "";
 		const paths = s.relatedPaths.length === 0 ? "  (no relatedPaths declared)" : s.relatedPaths.map((p) => `  - ${p}`).join("\n");
-		return `- ${s.featureName}/${s.specName}${title}\n${paths}`;
+		return `- ${s.featureName}/${s.specName}\n${paths}`;
 	}).join("\n")}
 ## Task
@@ -3850,7 +4331,7 @@ Return the spec keys that might be affected by any of the new files. Conservativ
 //#endregion
 //#region src/cli/drift.ts
 const DEFAULT_CONCURRENCY = 3;
-const driftCommand = new Command("drift").argument("[feature/spec]", "Optional spec id. If omitted, every spec under .ccqa/features/ is checked.").description("Check whether each test-spec.md is still in sync with the current codebase (CI-friendly, no patches applied).").option("--format <fmt>", "Output format: text | json | github", "text").option("--severity <level>", "Exit non-zero on this severity or higher: warn | error", "error").option("--concurrency <n>", `Parallel spec checks (default: ${DEFAULT_CONCURRENCY})`).option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").option("--cwd <path>", "Working directory used as both the .ccqa root and the codebase Claude reads. Useful for monorepos. Defaults to process.cwd().").option("--changed", "Restrict drift checks to specs whose relatedPaths intersect the git diff against --base (or, in CI, $GITHUB_BASE_REF, else origin/main). New files are routed to specs via a single lightweight Claude call.").option("--base <ref>", "Base ref to diff against when --changed is set. Defaults to $GITHUB_BASE_REF (CI) or origin/main.").action(async (specPath, opts) => {
+const driftCommand = new Command("drift").argument("[feature/spec]", "Optional spec id. If omitted, every spec under .ccqa/features/ is checked.").description("Check whether each spec.yaml is still in sync with the current codebase (CI-friendly, no patches applied).").option("--format <fmt>", "Output format: text | json | github", "text").option("--severity <level>", "Exit non-zero on this severity or higher: warn | error", "error").option("--concurrency <n>", `Parallel spec checks (default: ${DEFAULT_CONCURRENCY})`).option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").option("--cwd <path>", "Working directory used as both the .ccqa root and the codebase Claude reads. Useful for monorepos. Defaults to process.cwd().").option("--changed", "Restrict drift checks to specs whose relatedPaths intersect the git diff against --base (or, in CI, $GITHUB_BASE_REF, else origin/main). New files are routed to specs via a single lightweight Claude call.").option("--base <ref>", "Base ref to diff against when --changed is set. Defaults to $GITHUB_BASE_REF (CI) or origin/main.").action(async (specPath, opts) => {
 	const format = parseFormat(opts.format);
 	const threshold = parseSeverity(opts.severity);
 	const concurrency = parseConcurrency(opts.concurrency);
@@ -3878,8 +4359,18 @@ const driftCommand = new Command("drift").argument("[feature/spec]", "Optional s
 		if (format === "text") meta("scoped", `${targets.length} of ${total} spec${total > 1 ? "s" : ""}`);
 		if (targets.length === 0) exitWithNoSpecs(format, "no specs intersect the changed file set; nothing to check");
 	}
-	const results = await runChecks(targets, concurrency, opts.model, cwd, format);
-	emitReport(results, format, cwd);
+	const blocks = await loadAvailableBlocks(cwd);
+	const results = await analyzeDrift({
+		targets,
+		cwd,
+		blocks,
+		concurrency,
+		...opts.model ? { model: opts.model } : {},
+		onSpecStart: (t) => {
+			if (format === "text") info(`checking ${t.featureName}/${t.specName}`);
+		}
+	});
+	process.stdout.write(renderDrift(results, format, cwd));
 	process.exit(determineExitCode(results, threshold));
 });
 function exitWithNoSpecs(format, message) {
@@ -3905,12 +4396,21 @@ async function filterByChanged(input) {
 	const newFiles = changed.filter((f) => f.status === "added");
 	const existingChanges = changed.filter((f) => f.status !== "added");
 	const affected = /* @__PURE__ */ new Set();
+	const touchedBlockNames = /* @__PURE__ */ new Set();
+	for (const f of changed) {
+		const blockName = parseBlockPath(f.path);
+		if (blockName) touchedBlockNames.add(blockName);
+	}
 	for (const t of targets) {
 		if (!t.relatedPaths) {
 			affected.add(specKey(t));
 			continue;
 		}
-		if (existingChanges.some((f) => isPathAffectedBy(f.path, t.relatedPaths)) || newFiles.some((f) => isPathAffectedBy(f.path, t.relatedPaths))) affected.add(specKey(t));
+		if (existingChanges.some((f) => isPathAffectedBy(f.path, t.relatedPaths)) || newFiles.some((f) => isPathAffectedBy(f.path, t.relatedPaths))) {
+			affected.add(specKey(t));
+			continue;
+		}
+		if (t.includedBlocks?.some((name) => touchedBlockNames.has(name))) affected.add(specKey(t));
 	}
 	if (newFiles.length > 0) {
 		if (format === "text") info(`routing ${newFiles.length} new file(s) to specs via Claude...`);
@@ -3919,7 +4419,6 @@ async function filterByChanged(input) {
 			specs: targets.filter((t) => t.relatedPaths).map((t) => ({
 				featureName: t.featureName,
 				specName: t.specName,
-				title: t.title,
 				relatedPaths: t.relatedPaths
 			})),
 			cwd,
@@ -3930,18 +4429,20 @@ async function filterByChanged(input) {
 	return targets.filter((t) => affected.has(specKey(t)));
 }
 async function collectTargets(specPath, cwd) {
+	const tree = await listFeatureTree(cwd);
 	if (specPath) {
 		const { featureName, specName } = parseSpecPath(specPath);
-		if (await tryReadSpecFile(featureName, specName, cwd) === null) {
+		const spec = tree.find((f) => f.featureName === featureName)?.specs.find((s) => s.specName === specName);
+		if (!spec?.hasSpecFile) {
 			error(`spec not found: ${featureName}/${specName} (under ${cwd})`);
 			process.exit(1);
 		}
 		return [{
 			featureName,
-			specName
+			specName,
+			includedBlocks: spec.includedBlocks ?? []
 		}];
 	}
-	const tree = await listFeatureTree(cwd);
 	const out = [];
 	for (const feature of tree) for (const spec of feature.specs) {
 		if (!spec.hasSpecFile) continue;
@@ -3950,206 +4451,11 @@ async function collectTargets(specPath, cwd) {
 			specName: spec.specName
 		};
 		if (spec.relatedPaths) t.relatedPaths = spec.relatedPaths;
-		if (spec.title) t.title = spec.title;
+		if (spec.includedBlocks) t.includedBlocks = spec.includedBlocks;
 		out.push(t);
 	}
 	return out;
 }
-async function runChecks(targets, concurrency, model, cwd, format) {
-	const results = new Array(targets.length);
-	let cursor = 0;
-	const worker = async () => {
-		while (true) {
-			const idx = cursor++;
-			if (idx >= targets.length) return;
-			const target = targets[idx];
-			results[idx] = await checkSpec(target, model, cwd, format);
-		}
-	};
-	const pool = Array.from({ length: Math.min(concurrency, targets.length) }, () => worker());
-	await Promise.all(pool);
-	return results;
-}
-async function checkSpec(target, model, cwd, format) {
-	const { featureName, specName } = target;
-	const existing = await tryReadSpecFile(featureName, specName, cwd);
-	if (existing === null) return {
-		target,
-		ok: false,
-		issues: [],
-		error: `spec file disappeared after enumeration: ${featureName}/${specName}`
-	};
-	if (format === "text") info(`checking ${featureName}/${specName}`);
-	const { result, isError } = await invokeClaudeStreaming({
-		prompt: buildDriftUserPrompt(existing),
-		systemPrompt: buildDriftSystemPrompt(),
-		allowedTools: [
-			"Read",
-			"Grep",
-			"Glob"
-		],
-		silenceBashLog: true,
-		cwd,
-		...model ? { model } : {}
-	}, (_msg) => {});
-	if (isError) return {
-		target,
-		ok: false,
-		issues: [],
-		error: "Claude returned an error result"
-	};
-	const json = extractJsonBlock(result);
-	if (!json) return {
-		target,
-		ok: false,
-		issues: [],
-		error: "Claude did not return a json block"
-	};
-	let report;
-	try {
-		report = DraftReportSchema.parse(JSON.parse(json));
-	} catch (e) {
-		return {
-			target,
-			ok: false,
-			issues: [],
-			error: `failed to parse drift report: ${e.message}`
-		};
-	}
-	return {
-		target,
-		ok: true,
-		issues: report.issues
-	};
-}
-function emitReport(results, format, cwd) {
-	if (format === "json") {
-		emitJson(results);
-		return;
-	}
-	if (format === "github") {
-		emitGithub(results, cwd);
-		return;
-	}
-	emitText(results);
-}
-const CATEGORY_LABEL = {
-	assertable: "Assertability",
-	setups: "Setup references",
-	granularity: "Step granularity",
-	unimplemented: "Unimplemented checks"
-};
-const HEAVY_RULE = "═".repeat(72);
-function emitText(results) {
-	for (const r of results) {
-		blank();
-		const heading = `══ ${r.target.featureName}/${r.target.specName} `;
-		const tail = "═".repeat(Math.max(3, 72 - heading.length));
-		process.stdout.write(`${heading}${tail}\n`);
-		if (r.error) {
-			process.stdout.write(`  ERROR  ${r.error}\n`);
-			continue;
-		}
-		const errors = r.issues.filter((i) => i.severity === "ERROR");
-		const warnings = r.issues.filter((i) => i.severity === "WARN");
-		const passed = r.issues.filter((i) => i.severity === "OK");
-		if (errors.length === 0 && warnings.length === 0) {
-			const label = passed.length === 1 ? "check" : "checks";
-			const detail = passed.length > 0 ? `all ${passed.length} ${label} passed` : "no issues";
-			process.stdout.write(`  ✓  ${detail}\n`);
-			continue;
-		}
-		for (const issue of errors) writeFinding("ERROR", issue);
-		for (const issue of warnings) writeFinding("WARN", issue);
-		if (passed.length > 0) {
-			const names = passed.map((i) => CATEGORY_LABEL[i.category]).join(", ");
-			process.stdout.write(`\n  ✓  passed (${passed.length}): ${names}\n`);
-		}
-	}
-	blank();
-	process.stdout.write(`${HEAVY_RULE}\n`);
-	const totals = summarize(results);
-	meta("specs", `${results.length} (${totals.errored} errored)`);
-	meta("findings", `${totals.error} error, ${totals.warn} warn, ${totals.ok} ok`);
-}
-function writeFinding(level, issue) {
-	const stepPart = issue.stepId ? ` ${issue.stepId}` : "";
-	process.stdout.write(`\n  ${level}  ${CATEGORY_LABEL[issue.category]}${stepPart}\n`);
-	process.stdout.write(`    ${issue.message}\n`);
-	if (issue.detail) process.stdout.write(`    └ ${issue.detail.replace(/\n/g, "\n      ")}\n`);
-}
-function emitJson(results) {
-	const payload = { specs: results.map((r) => ({
-		feature: r.target.featureName,
-		spec: r.target.specName,
-		ok: r.ok,
-		...r.error ? { error: r.error } : {},
-		issues: r.issues.map((i) => ({
-			severity: i.severity,
-			category: i.category,
-			stepId: i.stepId,
-			message: i.message,
-			...i.detail ? { detail: i.detail } : {}
-		}))
-	})) };
-	process.stdout.write(`${JSON.stringify(payload, null, 2)}\n`);
-}
-function emitGithub(results, cwd) {
-	const repoRoot = process.env["GITHUB_WORKSPACE"] ?? process.cwd();
-	for (const r of results) {
-		const file = githubRelPath(cwd, repoRoot, r.target.featureName, r.target.specName);
-		if (r.error) {
-			process.stdout.write(`::error file=${file}::${escapeGhMessage(r.error)}\n`);
-			continue;
-		}
-		for (const issue of r.issues) {
-			if (issue.severity === "OK") continue;
-			const level = issue.severity === "ERROR" ? "error" : "warning";
-			const title = `${r.target.featureName}/${r.target.specName} — ${issue.category}${issue.stepId ? ` (${issue.stepId})` : ""}`;
-			const body = issue.detail ? `${issue.message}\n${issue.detail}` : issue.message;
-			process.stdout.write(`::${level} file=${file},title=${escapeGhProp(title)}::${escapeGhMessage(body)}\n`);
-		}
-	}
-}
-function githubRelPath(cwd, repoRoot, featureName, specName) {
-	const abs = resolve(cwd, ".ccqa", "features", featureName, "test-cases", specName, "test-spec.md");
-	const rel = relative(repoRoot, abs);
-	return rel.startsWith("..") ? abs : rel;
-}
-function escapeGhMessage(s) {
-	return s.replace(/%/g, "%25").replace(/\r/g, "%0D").replace(/\n/g, "%0A");
-}
-function escapeGhProp(s) {
-	return s.replace(/%/g, "%25").replace(/\r/g, "%0D").replace(/\n/g, "%0A").replace(/,/g, "%2C").replace(/:/g, "%3A");
-}
-function summarize(results) {
-	let error = 0;
-	let warn = 0;
-	let ok = 0;
-	let errored = 0;
-	for (const r of results) {
-		if (r.error) errored++;
-		for (const issue of r.issues) if (issue.severity === "ERROR") error++;
-		else if (issue.severity === "WARN") warn++;
-		else ok++;
-	}
-	return {
-		error,
-		warn,
-		ok,
-		errored
-	};
-}
-function determineExitCode(results, threshold) {
-	for (const r of results) {
-		if (r.error) return 1;
-		for (const issue of r.issues) {
-			if (issue.severity === "ERROR") return 1;
-			if (threshold === "warn" && issue.severity === "WARN") return 1;
-		}
-	}
-	return 0;
-}
 function parseFormat(raw) {
 	const v = raw ?? "text";
 	if (v === "text" || v === "json" || v === "github") return v;
@@ -4192,8 +4498,6 @@ program.addCommand(driftCommand);
 program.addCommand(traceCommand);
 program.addCommand(generateCommand);
 program.addCommand(runCommand);
-program.addCommand(traceSetupCommand);
-program.addCommand(generateSetupCommand);
 program.parse();
 //#endregion
 export {};