npm - ccqa - Versions diffs - 0.8.3 → 0.10.0 - Mend

ccqa 0.8.3 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md +115 -12
package/dist/bin/ccqa.mjs +869 -303
package/dist/package.json +1 -1
package/dist/runtime/test-helpers.d.mts +8 -1
package/dist/runtime/test-helpers.mjs +28 -3
package/package.json +1 -1

package/dist/bin/ccqa.mjs CHANGED Viewed

@@ -6,12 +6,14 @@ import { accessSync, existsSync, readFileSync, statSync } from "node:fs";
 import { fileURLToPath } from "node:url";
 import { access, mkdir, mkdtemp, readFile, readdir, rm, stat, writeFile } from "node:fs/promises";
 import { homedir, tmpdir } from "node:os";
-import { delimiter, dirname, join, posix, relative, resolve } from "node:path";
+import { delimiter, dirname, isAbsolute, join, posix, relative, resolve } from "node:path";
 import { parse, stringify } from "yaml";
 import { ZodError, z } from "zod";
 import { execFile, spawn, spawnSync } from "node:child_process";
 import { query } from "@anthropic-ai/claude-agent-sdk";
+import { AsyncLocalStorage } from "node:async_hooks";
 import { promisify } from "node:util";
+import { randomUUID } from "node:crypto";
 import { createInterface } from "node:readline";
 import { createInterface as createInterface$1 } from "node:readline/promises";
 //#region src/runtime/env-vars.ts
@@ -139,6 +141,7 @@ const TestSpecSchema = z.object({
 	title: z.string().min(1),
 	relatedPaths: z.array(z.string().min(1)).optional(),
 	mode: SpecModeSchema.optional(),
+	statePath: z.string().min(1).optional(),
 	steps: z.array(StepSchema).min(1)
 }).strict();
 /** Default mode when `mode:` is absent. */
@@ -480,50 +483,62 @@ async function loadAvailableBlocks(cwd) {
 		}))
 	}));
 }
-const TRACE_USER_PROMPT_PATH = ".ccqa/prompts/trace.user.md";
-const RUN_ND_USER_PROMPT_PATH = ".ccqa/prompts/run-nd.user.md";
+const RECORD_USER_PROMPT_PATH = ".ccqa/prompts/record.user.md";
+const RECORD_AGENT_PROMPT_PATH = ".ccqa/prompts/record.agent.md";
+const LIVE_USER_PROMPT_PATH = ".ccqa/prompts/live.user.md";
+const LIVE_AGENT_PROMPT_PATH = ".ccqa/prompts/live.agent.md";
 const USER_PROMPT_MAX_BYTES = 32768;
 /**
-* Load project-specific guidance to append to the trace system prompt.
+* Load the prompt bundle appended to the `ccqa record` (trace) system prompt.
 *
-* Returns the file's contents (trimmed) when `.ccqa/prompts/trace.user.md`
-* exists and is non-empty. Missing file, empty file, or read error all
-* resolve to `null` so callers can treat the override as strictly optional.
+* Reads `.ccqa/prompts/record.user.md` (human-maintained, stable project
+* rules) and `.ccqa/prompts/record.agent.md` (auto-rewritten by
+* `ccqa record --update-agent-prompt`). Returns null when both files are
+* missing / empty. The combined text is capped at 32 KiB after concatenation.
 *
-* The file is meant for organisation-specific rules that don't belong in
-* the OSS-default prompt — naming conventions, staging URL hints, repeated
-* UI quirks that recur across specs. Anything that genuinely belongs in
-* one spec should go in that spec's instruction, not here.
-*
-* Size-capped at 32 KiB to keep accidental commits of huge files from
-* blowing up the system prompt; the cap is observable to callers as a
-* truncated warning suffix.
+* Use `ccqa init` to scaffold both files.
 */
-async function loadTraceUserPrompt(cwd) {
-	return loadUserPromptFile(TRACE_USER_PROMPT_PATH, cwd, "trace.user.md");
+async function loadRecordPromptBundle(cwd) {
+	return loadPromptBundle(RECORD_USER_PROMPT_PATH, RECORD_AGENT_PROMPT_PATH, cwd);
 }
 /**
-* Load project-specific guidance to append to the `ccqa run-nd` system prompt.
+* Load the prompt bundle appended to the `ccqa run` (live mode) system prompt.
 *
-* Same shape as `loadTraceUserPrompt`, but reads from
-* `.ccqa/prompts/run-nd.user.md`. The non-deterministic test mode delegates
-* every step to Claude live, so anything that helps Claude do that job for a
-* particular product — domain glossary, staging URL conventions, known
-* "this is fine" warnings, login flow quirks — belongs here. Keeping it in the
+* Reads `.ccqa/prompts/live.user.md` (human-maintained, stable project
+* rules) and `.ccqa/prompts/live.agent.md` (auto-rewritten by
+* `ccqa run --update-agent-prompt`). Same null / cap semantics as
+* `loadRecordPromptBundle`. Keeping product-specific context in the
 * consuming repo (not the ccqa OSS prompt) is the explicit non-contamination
-* boundary: ccqa stays product-agnostic, projects can layer in whatever
-* context they need.
+* boundary.
 */
-async function loadRunNdUserPrompt(cwd) {
-	return loadUserPromptFile(RUN_ND_USER_PROMPT_PATH, cwd, "run-nd.user.md");
+async function loadLivePromptBundle(cwd) {
+	return loadPromptBundle(LIVE_USER_PROMPT_PATH, LIVE_AGENT_PROMPT_PATH, cwd);
 }
-async function loadUserPromptFile(relPath, cwd, labelForTruncation) {
+async function loadPromptBundle(userRelPath, agentRelPath, cwd) {
+	const [userText, agentText] = await Promise.all([readPromptFile(userRelPath, cwd), readPromptFile(agentRelPath, cwd)]);
+	if (userText === null && agentText === null) return null;
+	const sections = [];
+	const loaded = [];
+	if (userText !== null) {
+		sections.push(`### Project guidance (human-maintained)\n\n${userText}`);
+		loaded.push(userRelPath);
+	}
+	if (agentText !== null) {
+		sections.push(`### Agent learnings (auto-updated by ccqa --update-agent-prompt)\n\n${agentText}`);
+		loaded.push(agentRelPath);
+	}
+	let text = sections.join("\n\n");
+	if (text.length > USER_PROMPT_MAX_BYTES) text = text.slice(0, USER_PROMPT_MAX_BYTES) + `\n\n[ccqa] (prompt bundle truncated at ${USER_PROMPT_MAX_BYTES} bytes)`;
+	return {
+		text,
+		loaded
+	};
+}
+async function readPromptFile(relPath, cwd) {
 	const content = await readFile(join(cwd ?? process.cwd(), relPath), "utf-8").catch(() => null);
 	if (content === null) return null;
 	const trimmed = content.trim();
-	if (trimmed.length === 0) return null;
-	if (trimmed.length > USER_PROMPT_MAX_BYTES) return trimmed.slice(0, USER_PROMPT_MAX_BYTES) + `\n\n[ccqa] (${labelForTruncation} truncated at ${USER_PROMPT_MAX_BYTES} bytes)`;
-	return trimmed;
+	return trimmed.length === 0 ? null : trimmed;
 }
 /**
 * Probe for orphaned files left over from earlier ccqa versions inside
@@ -567,9 +582,9 @@ async function getTestScript(featureName, specName, cwd) {
 }
 /**
 * Variant of `listAllSpecs` for callers that care about the spec definition
-* itself (spec.yaml) rather than its compiled vitest script. `ccqa run-nd`
-* uses this because it skips codegen entirely — a freshly drafted spec with
-* no `test.spec.ts` is still a valid target.
+* itself (spec.yaml) rather than its compiled vitest script. `ccqa run` uses
+* this for live-mode specs because they skip codegen entirely — a freshly
+* drafted spec with no `test.spec.ts` is still a valid target.
 */
 async function listAllSpecsWithSpecFile(cwd) {
 	return listAllSpecsFilteredBy(SPEC_FILE, cwd);
@@ -589,10 +604,10 @@ async function listAllSpecsFilteredBy(requiredFilename, cwd) {
 	}))).flat();
 }
 /**
-* Resolve a CLI `<target>` argument into a list of spec refs. Shared between
-* `ccqa run` and `ccqa run-nd`. Callers pass the right enumerator for "no
-* target" (run wants `test.spec.ts`-having specs; run-nd wants `spec.yaml`-
-* having specs).
+* Resolve a CLI `<target>` argument into a list of spec refs. Used by
+* `ccqa run`. Callers pass the right enumerator for "no target" (deterministic
+* specs want `test.spec.ts`-having specs; live specs want `spec.yaml`-having
+* specs).
 */
 async function resolveSpecTargets(target, enumerateAll, cwd) {
 	if (!target) return enumerateAll();
@@ -745,6 +760,27 @@ function waitExit(child) {
 	});
 }
 //#endregion
+//#region src/runtime/pool.ts
+/**
+* Run each item through `fn` with at most `concurrency` running at once.
+* Results preserve input order. A throwing `fn` rejects the whole pool
+* (callers that want per-item isolation should catch inside `fn`).
+*/
+async function runPool(items, concurrency, fn) {
+	const results = new Array(items.length);
+	let cursor = 0;
+	const worker = async () => {
+		while (true) {
+			const idx = cursor++;
+			if (idx >= items.length) return;
+			results[idx] = await fn(items[idx], idx);
+		}
+	};
+	const n = Math.max(1, Math.min(concurrency, items.length));
+	await Promise.all(Array.from({ length: n }, () => worker()));
+	return results;
+}
+//#endregion
 //#region src/claude/extract-json.ts
 /**
 * Pulls a JSON object out of a Claude completion. Accepts either a fenced
@@ -767,26 +803,70 @@ const STEP_ICONS = {
 	STEP_SKIPPED: "⊘",
 	RUN_COMPLETED: "■"
 };
+/**
+* When a `withBuffer` scope is active, every log line (stdout and stderr) is
+* appended to its buffer instead of being written immediately. Parallel spec
+* runs use this so each spec's narration — including logs emitted deep inside
+* the live executor — flushes as one contiguous block, not interleaved.
+*/
+const bufferStore = new AsyncLocalStorage();
+/** True while inside a `withBuffer` scope: progress lines avoid TTY cursor tricks. */
+function isBuffered() {
+	return bufferStore.getStore() !== void 0;
+}
+function emit(text, sink = process.stdout) {
+	const store = bufferStore.getStore();
+	if (store) {
+		store.out.push(text);
+		return;
+	}
+	sink.write(text);
+}
+/**
+* Write raw text to the active `withBuffer` scope, or straight to stdout when
+* none is active. Lets a runner redirect sub-process output (e.g. a child's
+* stdout) into the same buffer as its `log.*` lines so they flush together.
+*/
+function emitRaw(text) {
+	emit(text);
+}
+/**
+* Run `fn` with all its log output captured into a buffer, then flush the
+* buffer in one shot under `label`. Used by parallel runners to keep each
+* spec's output legible. Output is flushed even when `fn` throws.
+*
+* When `buffered` is false, `fn` runs with no buffer so its output streams
+* live — this is the sequential (concurrency 1) path, unchanged from before.
+*/
+async function withBuffer(label, buffered, fn) {
+	if (!buffered) return fn();
+	const store = { out: [] };
+	try {
+		return await bufferStore.run(store, fn);
+	} finally {
+		process.stdout.write(`\n──── ${label} ────\n${store.out.join("")}`);
+	}
+}
 function header(command, target) {
-	process.stdout.write(`\nccqa ${command}${target ? ` ${target}` : ""}\n\n`);
+	emit(`\nccqa ${command}${target ? ` ${target}` : ""}\n\n`);
 }
 function write(scope, message, sink = process.stdout) {
-	sink.write(`[${scope}] ${message}\n`);
+	emit(`[${scope}] ${message}\n`, sink);
 }
 function meta(key, value) {
 	write("meta", `${key}: ${value}`);
 }
 function blank() {
-	process.stdout.write("\n");
+	emit("\n");
 }
 function info(message) {
 	write("info", message);
 }
 function step(type, stepId, detail) {
-	process.stdout.write(`  ${STEP_ICONS[type]} [${stepId}] ${detail}\n`);
+	emit(`  ${STEP_ICONS[type]} [${stepId}] ${detail}\n`);
 }
 function bash(command) {
-	process.stdout.write(`  $ ${command.slice(0, 120)}\n`);
+	emit(`  $ ${command.slice(0, 120)}\n`);
 }
 function error(message) {
 	write("error", message, process.stderr);
@@ -795,7 +875,7 @@ function warn(message) {
 	write("warn", message, process.stderr);
 }
 function hint(message) {
-	process.stdout.write("\n");
+	emit("\n");
 	write("hint", message);
 }
 function fix(message) {
@@ -820,17 +900,17 @@ const PROGRESS_NONTTY_STRIDE = 5;
 let lastProgressNonTtyEmit = -1;
 function progress(current, total, label) {
 	const text = `[info] ${current + 1}/${total} ${label}`;
-	if (process.stdout.isTTY) {
+	if (process.stdout.isTTY && !isBuffered()) {
 		process.stdout.write(`\r${text}\x1b[K`);
 		return;
 	}
 	if (current === 0 || current - lastProgressNonTtyEmit >= PROGRESS_NONTTY_STRIDE) {
-		process.stdout.write(`${text}\n`);
+		emit(`${text}\n`);
 		lastProgressNonTtyEmit = current;
 	}
 }
 function progressEnd() {
-	if (process.stdout.isTTY) process.stdout.write(`\r\x1b[K`);
+	if (process.stdout.isTTY && !isBuffered()) process.stdout.write(`\r\x1b[K`);
 	lastProgressNonTtyEmit = -1;
 }
 /**
@@ -1351,6 +1431,12 @@ function extractAbActionFromBashCommand(cmd) {
 		case "type":
 		case "select": return `AB_ACTION|${subCmd}|${args[0] ?? ""}|${args[1] ?? ""}|${args[2] ?? ""}`;
 		case "drag": return `AB_ACTION|drag|${args[0] ?? ""}|${args[1] ?? ""}|${args[2] ?? ""}`;
+		case "upload": {
+			const sel = args[0] ?? "";
+			const files = args.slice(1);
+			if (!sel || files.length === 0) return null;
+			return `AB_ACTION|upload|${sel}|${files.join("|")}`;
+		}
 		case "snapshot": return null;
 		case "find": return extractFindAbAction(args);
 		default: return null;
@@ -1688,25 +1774,15 @@ const DEFAULT_CONCURRENCY$1 = 3;
 */
 async function analyzeDrift(input) {
 	const { targets, cwd, blocks, concurrency = DEFAULT_CONCURRENCY$1, model, language, onSpecStart } = input;
-	const results = new Array(targets.length);
-	let cursor = 0;
-	const worker = async () => {
-		while (true) {
-			const idx = cursor++;
-			if (idx >= targets.length) return;
-			const target = targets[idx];
-			onSpecStart?.(target);
-			results[idx] = await checkSpec(target, {
-				cwd,
-				blocks,
-				model,
-				language
-			});
-		}
-	};
-	const pool = Array.from({ length: Math.min(concurrency, targets.length) }, () => worker());
-	await Promise.all(pool);
-	return results;
+	return runPool(targets, concurrency, async (target) => {
+		onSpecStart?.(target);
+		return checkSpec(target, {
+			cwd,
+			blocks,
+			model,
+			language
+		});
+	});
 }
 async function checkSpec(target, opts) {
 	const { featureName, specName } = target;
@@ -2324,7 +2400,7 @@ function clamp(n, lo, hi) {
 //#endregion
 //#region src/report/prompt.ts
 function buildFailureAnalysisPrompt(input) {
-	const { script, specYaml, failureLog, ndTranscriptExcerpt, diffPatch, changedFiles, baseRef, driftIssues, outputLanguage = "auto" } = input;
+	const { script, specYaml, failureLog, liveTranscriptExcerpt, diffPatch, changedFiles, baseRef, driftIssues, outputLanguage = "auto" } = input;
 	return `You are analyzing a failing E2E regression test right after a source change landed. Your job is a root-cause CALL, not a fix: decide which of three categories explains the failure, using the source diff as your primary context.
 ${outputLanguageBlock(outputLanguage, "`reasoning`, `detail`", "label names (TEST_DRIFT, etc.)")}## The three categories
@@ -2396,7 +2472,7 @@ Evidence rules: TEST_DRIFT and SPEC_CHANGE require at least one concrete \`file\
 ## Test Spec (spec.yaml)
 ${specYaml}
-${buildExecutionEvidenceBlock(script, failureLog, ndTranscriptExcerpt)}
+${buildExecutionEvidenceBlock(script, failureLog, liveTranscriptExcerpt)}
 ${diffPatch ? `## Source changes since ${baseRef ?? "base"} (git diff, may be truncated)
@@ -2432,14 +2508,14 @@ ${driftIssues.map((i) => `- [${i.severity}] (${DRAFT_CATEGORY_LABEL[i.category]}
 * never has to branch on mode — it just sees "here's what was executed
 * and here's how it failed".
 */
-function buildExecutionEvidenceBlock(script, failureLog, ndTranscriptExcerpt) {
+function buildExecutionEvidenceBlock(script, failureLog, liveTranscriptExcerpt) {
 	const sections = [];
 	if (script && script.length > 0) sections.push(`## Test Script (with line numbers)
 ${numberLines(script)}`);
 	if (failureLog && failureLog.length > 0) sections.push(`## Failure Log
 ${failureLog.slice(0, 8e3)}`);
-	if (ndTranscriptExcerpt && ndTranscriptExcerpt.length > 0) sections.push(`## Live Run Transcript (summary of Claude's per-step execution)
-${ndTranscriptExcerpt}`);
+	if (liveTranscriptExcerpt && liveTranscriptExcerpt.length > 0) sections.push(`## Live Run Transcript (summary of Claude's per-step execution)
+${liveTranscriptExcerpt}`);
 	if (sections.length === 0) return `## Execution evidence
 (No script, failure log, or live transcript was captured for this run. Classify from spec.yaml + diff only, and be correspondingly more conservative — prefer UNKNOWN over a confident call.)`;
@@ -2535,11 +2611,11 @@ const ReportEvidenceSchema = z.object({
 	failureSummary: z.string().nullable().default(null)
 });
 /**
-* Per-step row for a non-deterministic run (`ccqa run-nd`). Mirrors the
-* structure produced by `src/runtime/nd-executor.ts:NdStepResult` but
+* Per-step row for a live-mode run (spec.yaml `mode: live`). Mirrors the
+* structure produced by `src/runtime/live-executor.ts:LiveStepResult` but
 * encoded against the report schema so the HTML renderer can carry both
-* deterministic (`evidence`) and non-deterministic (`ndRun`) sources of
-* step-boundary screenshots.
+* deterministic (`evidence`) and live (`liveRun`) sources of step-boundary
+* screenshots.
 *
 * `beforePng` / `afterPng` are RELATIVE to the HTML report directory — the
 * caller computes the relative path with `node:path`'s `relative()` so the
@@ -2554,7 +2630,7 @@ const ReportEvidenceSchema = z.object({
 * `models` is the union of model ids the SDK reported using; usually a
 * single element, but the SDK can fan out across models in some modes.
 */
-const NdReportCostSchema = z.object({
+const LiveReportCostSchema = z.object({
 	totalCostUsd: z.number().nullable(),
 	durationApiMs: z.number().nullable(),
 	numTurns: z.number().nullable(),
@@ -2564,7 +2640,7 @@ const NdReportCostSchema = z.object({
 	outputTokens: z.number().nullable(),
 	models: z.array(z.string())
 });
-const NdReportStepSchema = z.object({
+const LiveReportStepSchema = z.object({
 	stepId: z.string(),
 	source: z.string(),
 	instruction: z.string(),
@@ -2578,15 +2654,15 @@ const NdReportStepSchema = z.object({
 	beforePng: z.string().nullable(),
 	afterPng: z.string().nullable(),
 	durationMs: z.number(),
-	cost: NdReportCostSchema
+	cost: LiveReportCostSchema
 });
-const NdReportRunSchema = z.object({
+const LiveReportRunSchema = z.object({
 	runId: z.string(),
 	sessionName: z.string(),
 	startedAt: z.string(),
 	durationMs: z.number(),
-	steps: z.array(NdReportStepSchema),
-	cost: NdReportCostSchema
+	steps: z.array(LiveReportStepSchema),
+	cost: LiveReportCostSchema
 });
 const ReportSpecResultSchema = z.object({
 	feature: z.string(),
@@ -2607,7 +2683,7 @@ const ReportSpecResultSchema = z.object({
 	diffExcerpt: z.string().nullable(),
 	specYaml: z.string().nullable(),
 	evidence: z.array(ReportEvidenceSchema).nullable(),
-	ndRun: NdReportRunSchema.nullable()
+	liveRun: LiveReportRunSchema.nullable()
 });
 z.object({
 	schemaVersion: z.literal(1),
@@ -2846,7 +2922,7 @@ function scopePatchForSpec(patch, relatedPaths, caps = {}) {
 	return parts.join("\n");
 }
 //#endregion
-//#region src/runtime/nd-cost-format.ts
+//#region src/runtime/live-cost-format.ts
 /**
 * Compact one-line cost summary. Format:
 *   "$0.1234 · 4 turns · 42 in / 6,511 out · 2.0M cached · sonnet"
@@ -2856,7 +2932,7 @@ function scopePatchForSpec(patch, relatedPaths, caps = {}) {
 * `model=...` segment. `compact: true` (HTML chip) thousand-separates fresh
 * tokens, abbreviates cache-read with K/M, drops the `model=` prefix.
 */
-function formatNdCost(cost, options) {
+function formatLiveCost(cost, options) {
 	if (cost.totalCostUsd === null) return null;
 	const compact = options.compact;
 	const sep = compact ? " · " : " / ";
@@ -2875,7 +2951,7 @@ function formatNdCost(cost, options) {
 * Sum of per-spec costs for a batch. Used only by the CLI batch summary.
 * Returns null when no spec has cost data.
 */
-function formatNdBatchCost(costs) {
+function formatLiveBatchCost(costs) {
 	let totalUsd = 0;
 	let seen = false;
 	let totalIn = 0;
@@ -3176,7 +3252,7 @@ function renderResult(r, index, s) {
 	const heading = r.title ? `<span class="spec-title">${esc(r.title)}</span><span class="spec-slug">(${esc(id)})</span>` : `<span class="spec-title">${esc(id)}</span>`;
 	const predictionLine = r.status === "failed" && r.analysis ? `<span class="label-text label-${r.analysis.label}">${esc(displayLabel(r.analysis.label, s))} · ${Math.round(r.analysis.confidence * 100)}%</span>` : "";
 	const needsGradingDot = r.status === "failed" && r.analysis ? `<span class="needs-grading-dot" data-case-id="${esc(id)}" title="${esc(s.needsGrading)}"></span>` : "";
-	const modeTag = r.ndRun ? `<span class="mode-tag" title="executed in live mode (Claude drove the browser per step)">LIVE</span>` : `<span class="mode-tag" title="executed in deterministic mode (vitest replayed test.spec.ts)">DETERMINISTIC</span>`;
+	const modeTag = r.liveRun ? `<span class="mode-tag" title="executed in live mode (Claude drove the browser per step)">LIVE</span>` : `<span class="mode-tag" title="executed in deterministic mode (vitest replayed test.spec.ts)">DETERMINISTIC</span>`;
 	return `<details class="spec ${r.status}" data-status="${r.status}" data-case-id="${esc(id)}"${r.status === "failed" ? " open" : ""}>
   <summary>
     ${statusIcon(r.status)}
@@ -3189,7 +3265,7 @@ function renderResult(r, index, s) {
   </summary>
   <div class="spec-body">
     ${renderEvidence(r, s)}
-    ${r.ndRun ? renderNdRun(r.ndRun, s) : ""}
+    ${r.liveRun ? renderLiveRun(r.liveRun, s) : ""}
     ${renderSpecBody(r, index, s)}
     ${collapsible(s.collSpecYaml, s.collSpecYamlHelp, r.specYaml)}
   </div>
@@ -3200,16 +3276,16 @@ function renderSpecBody(r, index, s) {
 	if (r.analysis) return renderAnalysis(r, index, s);
 	return renderSkippedWithSupporting(r, s);
 }
-function renderNdRun(nd, strings) {
-	const stepItems = nd.steps.map((s) => {
+function renderLiveRun(live, strings) {
+	const stepItems = live.steps.map((s) => {
 		const before = s.beforePng ? `<a class="shot" href="${esc(s.beforePng)}" target="_blank" rel="noopener"><img src="${esc(s.beforePng)}" alt="before ${esc(s.stepId)}" loading="lazy"><span>before</span></a>` : "";
 		const after = s.afterPng ? `<a class="shot" href="${esc(s.afterPng)}" target="_blank" rel="noopener"><img src="${esc(s.afterPng)}" alt="after ${esc(s.stepId)}" loading="lazy"><span>after</span></a>` : "";
 		const dur = s.durationMs > 0 ? `<span class="duration">${formatDuration$1(s.durationMs)}</span>` : "";
-		const stepCost = formatNdCostChip(s.cost);
+		const stepCost = formatLiveCostChip(s.cost);
 		const stepModel = formatModelChip(s.cost.models);
-		const sourceBadge = s.source && s.source !== "spec" ? `<span class="nd-source">[${esc(s.source)}]</span>` : "";
-		return `<li class="nd-step ${s.status}">
-        <div class="nd-step-head">
+		const sourceBadge = s.source && s.source !== "spec" ? `<span class="live-source">[${esc(s.source)}]</span>` : "";
+		return `<li class="live-step ${s.status}">
+        <div class="live-step-head">
           ${statusIcon(s.status)}
           <span class="step-name">${esc(s.stepId)}</span>
           ${sourceBadge}
@@ -3218,44 +3294,44 @@ function renderNdRun(nd, strings) {
           ${stepCost}
           ${dur}
         </div>
-        <div class="nd-step-body">
-          <p class="nd-instr"><strong>${esc(strings.stepDoLabel)}:</strong> ${esc(s.instruction)}</p>
-          <p class="nd-instr"><strong>${esc(strings.stepExpectLabel)}:</strong> ${esc(s.expected)}</p>
-          ${s.reasoning ? `<p class="nd-reasoning">${esc(s.reasoning)}</p>` : ""}
-          ${before || after ? `<div class="nd-shots">${before}${after}</div>` : ""}
+        <div class="live-step-body">
+          <p class="live-instr"><strong>${esc(strings.stepDoLabel)}:</strong> ${esc(s.instruction)}</p>
+          <p class="live-instr"><strong>${esc(strings.stepExpectLabel)}:</strong> ${esc(s.expected)}</p>
+          ${s.reasoning ? `<p class="live-reasoning">${esc(s.reasoning)}</p>` : ""}
+          ${before || after ? `<div class="live-shots">${before}${after}</div>` : ""}
         </div>
       </li>`;
 	}).join("\n");
-	const runCost = formatNdCostChip(nd.cost);
-	const runModel = formatModelChip(nd.cost.models);
-	return `<section class="nd-run">
-    <details class="nd-run-meta">
+	const runCost = formatLiveCostChip(live.cost);
+	const runModel = formatModelChip(live.cost.models);
+	return `<section class="live-run">
+    <details class="live-run-meta">
       <summary>${labelWithHelp(esc(strings.collLiveRunMeta), strings.collLiveRunMetaHelp)}</summary>
-      <div class="nd-run-meta-body">
+      <div class="live-run-meta-body">
         <span class="dim">${esc(strings.liveRunIdLabel)}</span>
-        <code>${esc(nd.runId)}</code>
+        <code>${esc(live.runId)}</code>
         <span class="dim">${esc(strings.liveSessionLabel)}</span>
-        <code>${esc(nd.sessionName)}</code>
+        <code>${esc(live.sessionName)}</code>
         ${runModel}
         ${runCost}
-        <span class="duration">${formatDuration$1(nd.durationMs)}</span>
+        <span class="duration">${formatDuration$1(live.durationMs)}</span>
       </div>
     </details>
-    <ol class="nd-steps">${stepItems}</ol>
+    <ol class="live-steps">${stepItems}</ol>
   </section>`;
 }
 /** Compact dot-separated cost chip, e.g. "$0.1234 · 4 turns · 42 in / 6,511 out · 2.0M cached". */
-function formatNdCostChip(cost) {
-	const line = formatNdCost(cost, { compact: true });
+function formatLiveCostChip(cost) {
+	const line = formatLiveCost(cost, { compact: true });
 	if (line === null) return "";
-	return `<span class="nd-cost" title="cost · turns · fresh-input/output tokens · cache-read input">${esc(line)}</span>`;
+	return `<span class="live-cost" title="cost · turns · fresh-input/output tokens · cache-read input">${esc(line)}</span>`;
 }
 function formatModelChip(models) {
 	if (!models || models.length === 0) return "";
-	return `<span class="nd-model" title="Claude model id(s) reported by the SDK">${esc(models.join(", "))}</span>`;
+	return `<span class="live-model" title="Claude model id(s) reported by the SDK">${esc(models.join(", "))}</span>`;
 }
 /**
-* Per-step UI for deterministic runs. Adopts the same `nd-step` card layout
+* Per-step UI for deterministic runs. Adopts the same `live-step` card layout
 * used by live runs so reviewers don't have to context-switch between two
 * visual idioms. We map the evidence entries (which are already keyed by
 * stepId) onto the same shape, leaving live-only fields (before png, cost,
@@ -3263,14 +3339,14 @@ function formatModelChip(models) {
 */
 function renderEvidence(r, s) {
 	if (!r.evidence || r.evidence.length === 0) return "";
-	return `<section class="nd-run">
-    <ol class="nd-steps">${r.evidence.map((e) => renderDetStepCard(e, s)).join("\n")}</ol>
+	return `<section class="live-run">
+    <ol class="live-steps">${r.evidence.map((e) => renderDetStepCard(e, s)).join("\n")}</ol>
   </section>`;
 }
 function renderDetStepCard(e, s) {
 	const status = e.status === "failed" ? "failed" : "passed";
-	const description = e.description ? `<p class="nd-instr"><strong>${esc(s.stepExpectLabel)}:</strong> ${esc(e.description)}</p>` : "";
-	const failureBlock = e.status === "failed" && e.failureSummary ? `<p class="nd-reasoning">${esc(e.failureSummary)}</p>` : "";
+	const description = e.description ? `<p class="live-instr"><strong>${esc(s.stepExpectLabel)}:</strong> ${esc(e.description)}</p>` : "";
+	const failureBlock = e.status === "failed" && e.failureSummary ? `<p class="live-reasoning">${esc(e.failureSummary)}</p>` : "";
 	const metaRows = [];
 	if (e.url) {
 		const shortUrl = shortenUrl(e.url);
@@ -3279,16 +3355,16 @@ function renderDetStepCard(e, s) {
 	if (e.title) metaRows.push(`<div class="evidence-meta-row"><span class="evidence-meta-label">${esc(s.metaPage)}</span><span class="evidence-meta-value">${esc(e.title)}</span></div>`);
 	const meta = metaRows.length > 0 ? `<div class="evidence-meta">${metaRows.join("")}</div>` : "";
 	const after = `<a class="shot" href="${esc(e.pngPath)}" target="_blank" rel="noopener"><img src="${esc(e.pngPath)}" alt="${esc(e.stepId)}" loading="lazy"><span>after</span></a>`;
-	return `<li class="nd-step ${status}">
-    <div class="nd-step-head">
+	return `<li class="live-step ${status}">
+    <div class="live-step-head">
       ${statusIcon(status)}
       <span class="step-name">${esc(e.stepId)}</span>
       <span class="spacer"></span>
     </div>
-    <div class="nd-step-body">
+    <div class="live-step-body">
       ${description}
       ${failureBlock}
-      <div class="nd-shots">${after}</div>
+      <div class="live-shots">${after}</div>
       ${meta}
     </div>
   </li>`;
@@ -3726,54 +3802,54 @@ table.matrix td.miss-nonzero { background: var(--fail-bg); }
 /* Per-step block: indented + a thin rail under the test title so the
    hierarchy spec → test → step is visible. */
-.nd-run {
+.live-run {
   padding: 0 0 0 14px;
   margin-left: 6px;
   border-left: 1px solid var(--border-soft);
 }
-.nd-run-meta { margin: 0 0 8px; font-size: 11.5px; }
-.nd-run-meta > summary {
+.live-run-meta { margin: 0 0 8px; font-size: 11.5px; }
+.live-run-meta > summary {
   cursor: pointer; color: var(--text-mute); list-style: none;
   padding: 4px 0;
 }
-.nd-run-meta > summary::-webkit-details-marker { display: none; }
-.nd-run-meta > summary::before {
+.live-run-meta > summary::-webkit-details-marker { display: none; }
+.live-run-meta > summary::before {
   content: "▸"; color: var(--text-dim); font-size: 10px;
   margin-right: 6px; transition: transform 0.12s ease;
   display: inline-block;
 }
-.nd-run-meta[open] > summary::before { transform: rotate(90deg); }
-.nd-run-meta-body {
+.live-run-meta[open] > summary::before { transform: rotate(90deg); }
+.live-run-meta-body {
   display: flex; gap: 12px; align-items: baseline; flex-wrap: wrap;
   color: var(--text-mute); padding: 6px 0 8px 16px;
 }
-.nd-run-meta-body code { background: transparent; padding: 0; font-size: 11.5px; color: var(--text-dim); }
-.nd-run-meta-body .dim { color: var(--text-mute); }
+.live-run-meta-body code { background: transparent; padding: 0; font-size: 11.5px; color: var(--text-dim); }
+.live-run-meta-body .dim { color: var(--text-mute); }
 /* Steps: flat list. The separator between steps has to outweigh anything
    *inside* a step (e.g. evidence-meta footer) so the eye finds the
    step boundary at a glance — hence a solid var(--border), not the
    softer hairline used inside the step body. */
-.nd-steps { list-style: none; padding: 0; margin: 0; display: flex; flex-direction: column; gap: 0; }
-.nd-step { border-top: 1px solid var(--border); padding: 16px 0; background: transparent; }
-.nd-step:first-child { border-top: 0; padding-top: 0; }
-.nd-step.skipped { opacity: 0.55; }
-.nd-step-head { display: flex; align-items: baseline; gap: 8px; padding: 0; background: transparent; border-bottom: 0; font-size: 13px; margin-bottom: 6px; }
-.nd-step-body { padding: 0; font-size: 12.5px; line-height: 1.55; }
-.nd-step-body p { margin: 4px 0; }
-.nd-instr strong { color: var(--text-mute); font-weight: 600; margin-right: 4px; font-size: 11px; letter-spacing: 0.04em; text-transform: uppercase; }
+.live-steps { list-style: none; padding: 0; margin: 0; display: flex; flex-direction: column; gap: 0; }
+.live-step { border-top: 1px solid var(--border); padding: 16px 0; background: transparent; }
+.live-step:first-child { border-top: 0; padding-top: 0; }
+.live-step.skipped { opacity: 0.55; }
+.live-step-head { display: flex; align-items: baseline; gap: 8px; padding: 0; background: transparent; border-bottom: 0; font-size: 13px; margin-bottom: 6px; }
+.live-step-body { padding: 0; font-size: 12.5px; line-height: 1.55; }
+.live-step-body p { margin: 4px 0; }
+.live-instr strong { color: var(--text-mute); font-weight: 600; margin-right: 4px; font-size: 11px; letter-spacing: 0.04em; text-transform: uppercase; }
 /* Reasoning: left rail, no fill. */
-.nd-reasoning { color: var(--text-dim); font-style: italic; background: transparent; padding: 4px 0 4px 12px; border-left: 2px solid var(--fail); border-radius: 0; margin: 6px 0; }
-.nd-step.passed .nd-reasoning { border-left-color: var(--border); color: var(--text-mute); font-style: normal; }
+.live-reasoning { color: var(--text-dim); font-style: italic; background: transparent; padding: 4px 0 4px 12px; border-left: 2px solid var(--fail); border-radius: 0; margin: 6px 0; }
+.live-step.passed .live-reasoning { border-left-color: var(--border); color: var(--text-mute); font-style: normal; }
-.nd-source { font-size: 11px; color: var(--text-mute); }
-.nd-shots { display: flex; gap: 12px; margin-top: 10px; flex-wrap: wrap; }
-.nd-shots .shot { display: flex; flex-direction: column; align-items: center; gap: 4px; text-decoration: none; color: var(--text-mute); font-size: 10px; letter-spacing: 0.08em; }
-.nd-shots .shot img { max-width: 280px; max-height: 180px; border: 1px solid var(--border-soft); border-radius: 3px; object-fit: contain; background: #000; }
+.live-source { font-size: 11px; color: var(--text-mute); }
+.live-shots { display: flex; gap: 12px; margin-top: 10px; flex-wrap: wrap; }
+.live-shots .shot { display: flex; flex-direction: column; align-items: center; gap: 4px; text-decoration: none; color: var(--text-mute); font-size: 10px; letter-spacing: 0.08em; }
+.live-shots .shot img { max-width: 280px; max-height: 180px; border: 1px solid var(--border-soft); border-radius: 3px; object-fit: contain; background: #000; }
 /* Cost / model chips: muted text, no fill. */
-.nd-cost, .nd-model {
+.live-cost, .live-model {
   font-size: 11px; padding: 0;
   background: transparent;
   color: var(--text-mute);
@@ -4083,6 +4159,123 @@ const CLIENT_JS = `
 })();
 `;
 //#endregion
+//#region src/runtime/profile-env.ts
+/**
+* Profile env (Issue #37). A profile is a named `.env` under
+* `.ccqa/profiles/<name>.env`; its contents merge into `process.env` before any
+* spec work, so one spec targets dev/stg/prd without per-environment copies.
+* Spec `${VAR}` references all resolve against `process.env` downstream.
+*
+* The `.env` parser is a small hand-rolled subset (no dotenv dependency).
+*/
+/**
+* Parse a `.env` body into a `name → value` map. Subset: blank / `#` lines
+* skipped, optional leading `export`, split on the first `=`, surrounding
+* quotes stripped, inline `# comment` dropped. No multi-line / interpolation.
+*/
+function parseDotenv(content) {
+	const out = {};
+	for (const rawLine of content.split(/\r?\n/)) {
+		const line = rawLine.trim();
+		if (line === "" || line.startsWith("#")) continue;
+		const withoutExport = line.replace(/^export\s+/, "");
+		const eq = withoutExport.indexOf("=");
+		if (eq === -1) continue;
+		const key = withoutExport.slice(0, eq).trim();
+		if (key === "") continue;
+		out[key] = parseValue(withoutExport.slice(eq + 1).trim());
+	}
+	return out;
+}
+function parseValue(raw) {
+	const quote = raw[0];
+	if (quote === "\"" || quote === "'") {
+		const close = raw.indexOf(quote, 1);
+		if (close !== -1 && /^\s*(#.*)?$/.test(raw.slice(close + 1))) return raw.slice(1, close);
+	}
+	const hash = raw.search(/\s#/);
+	return hash === -1 ? raw : raw.slice(0, hash).trimEnd();
+}
+var ProfileNotFoundError = class extends Error {
+	profile;
+	path;
+	constructor(profile, path) {
+		super(`profile "${profile}" not found: ${path}`);
+		this.name = "ProfileNotFoundError";
+		this.profile = profile;
+		this.path = path;
+	}
+};
+var InvalidProfileNameError = class extends Error {
+	profile;
+	constructor(profile) {
+		super(`invalid profile name "${profile}": expected a bare name like "stg" (no path separators, no leading dot)`);
+		this.name = "InvalidProfileNameError";
+		this.profile = profile;
+	}
+};
+/**
+* A profile name must be a single, non-dot-leading path segment, so
+* `--profile <name>` can't read a file outside the profiles dir (e.g.
+* `--profile ../../etc/hosts`). Rejecting separators and a leading dot already
+* blocks `..` traversal, so an in-name `..` (like `v1..2`) stays allowed.
+*/
+function assertValidProfileName(profile) {
+	if (profile === "" || profile.includes("/") || profile.includes("\\") || profile.startsWith(".")) throw new InvalidProfileNameError(profile);
+}
+/** Absolute path of the `.env` file backing `<profile>` under `<cwd>/.ccqa/`. */
+function profilePath(profile, cwd) {
+	assertValidProfileName(profile);
+	return join(cwd, ".ccqa", "profiles", `${profile}.env`);
+}
+/** Read + parse a `.env`, or `null` if absent. Other read errors propagate. */
+async function readDotenv(path) {
+	let content;
+	try {
+		content = await readFile(path, "utf8");
+	} catch (err) {
+		if (err.code === "ENOENT") return null;
+		throw err;
+	}
+	return parseDotenv(content);
+}
+/**
+* Load `.ccqa/profiles/<profile>.env`. A missing file throws — a typo must fail
+* loudly, not silently resolve every credential to empty.
+*/
+async function loadProfileEnv(profile, cwd) {
+	const path = profilePath(profile, cwd);
+	const vars = await readDotenv(path);
+	if (vars === null) throw new ProfileNotFoundError(profile, path);
+	return vars;
+}
+/** Absolute path of the default `.env` ccqa loads when `--profile` is absent. */
+function defaultEnvPath(cwd) {
+	return join(cwd, ".env");
+}
+/**
+* Load `<cwd>/.env`, the default when no `--profile` is given. A missing `.env`
+* is fine (returns `null`) — the run falls back to the existing `process.env`.
+*/
+async function loadDefaultEnv(cwd) {
+	return readDotenv(defaultEnvPath(cwd));
+}
+/**
+* Merge vars into `process.env`. With `override` (the default), the profile
+* wins over inherited values. Returns the applied names — never values, so
+* callers log names only and secrets stay out of the log.
+*/
+function applyProfileEnv(vars, opts = {}) {
+	const override = opts.override ?? true;
+	const applied = [];
+	for (const [name, value] of Object.entries(vars)) {
+		if (!override && process.env[name] !== void 0) continue;
+		process.env[name] = value;
+		applied.push(name);
+	}
+	return applied;
+}
+//#endregion
 //#region src/cli/options.ts
 /**
 * Shared `--language` flag. Every Claude-driven command writes some
@@ -4093,6 +4286,53 @@ const CLIENT_JS = `
 function addLanguageOption(command) {
 	return command.option("--language <bcp47>", "Language for human-readable output (e.g. 'en', 'ja'). Default 'auto' follows the language of the spec/codebase.", DEFAULT_LANGUAGE);
 }
+/**
+* Shared `--profile <name>` flag for the browser-driving commands (`run`,
+* `record`), registered identically so help text and behaviour don't drift.
+*/
+function addProfileOption(command) {
+	return command.option("--profile <name>", "Load .ccqa/profiles/<name>.env into the environment before resolving spec ${VAR} references (URLs, credentials), so one spec can target dev/stg/prd without per-environment copies. Profile values override the inherited environment.");
+}
+/**
+* Merge the environment for a `run` / `record` invocation into `process.env`
+* before any spec work. With `--profile <name>`, load that profile (missing /
+* invalid → exit 2). Without it, auto-load `<cwd>/.env` if present (a missing
+* `.env` is fine). Checking `!== undefined` rejects `--profile ""` rather than
+* skipping it.
+*/
+async function applyProfileFromOption(profile, cwd) {
+	if (profile !== void 0) await applyNamedProfile(profile, cwd);
+	else await applyDefaultEnv(cwd);
+}
+/** "1 var" / "2 vars" — the count summary shared by both load paths' meta line. */
+function varCount(n) {
+	return `${n} var${n === 1 ? "" : "s"}`;
+}
+async function applyNamedProfile(profile, cwd) {
+	try {
+		const applied = applyProfileEnv(await loadProfileEnv(profile, cwd));
+		meta("profile", `${profile} (${varCount(applied.length)})`);
+		if (applied.length === 0) warn(`profile "${profile}" defined no variables — spec $\{VAR} references will resolve to empty`);
+	} catch (err) {
+		if (err instanceof ProfileNotFoundError) {
+			error(err.message);
+			hint(`create ${err.path} with the environment's $\{VAR} values`);
+		} else if (err instanceof InvalidProfileNameError) error(err.message);
+		else error(`failed to load profile "${profile}": ${err instanceof Error ? err.message : String(err)}`);
+		process.exit(2);
+	}
+}
+async function applyDefaultEnv(cwd) {
+	let vars;
+	try {
+		vars = await loadDefaultEnv(cwd);
+	} catch (err) {
+		error(`failed to load ${defaultEnvPath(cwd)}: ${err instanceof Error ? err.message : String(err)}`);
+		process.exit(2);
+	}
+	if (vars === null) return;
+	meta("env", `.env (${varCount(applyProfileEnv(vars, { override: false }).length)})`);
+}
 //#endregion
 //#region src/cli/resolve-cwd.ts
 /**
@@ -4104,7 +4344,7 @@ function addLanguageOption(command) {
 *
 * It's mostly useful in monorepos where you want to invoke ccqa from the
 * repo root but target a subpackage (e.g.
-* `ccqa run --cwd js/apps/knowledge-webapp`).
+* `ccqa run --cwd apps/web-app`).
 *
 * Falls back to `process.cwd()` when the option is not given.
 */
@@ -4249,7 +4489,7 @@ function formatAgentBrowserUnavailableMessage() {
 //#region src/cli/preflight.ts
 /**
 * Shared startup steps for every command that drives a real `agent-browser`
-* (currently `ccqa trace` and `ccqa run-nd`):
+* (currently `ccqa record` (trace) and `ccqa run` (live mode)):
 *
 *   1. Verify the peer-installed agent-browser binary is reachable. On
 *      failure print the standard guidance and `process.exit(1)`; on
@@ -4276,14 +4516,14 @@ async function preflightAgentBrowserCommand() {
 	await warnStaleBlockArtifacts();
 }
 //#endregion
-//#region src/report/nd-transcript-excerpt.ts
+//#region src/report/live-transcript-excerpt.ts
 /**
 * Build a compact transcript summary for the failure classifier.
 *
 * Returns `null` when the run has no failed step (every step passed/skipped),
 * since the failure analyzer has nothing to explain in that case.
 */
-async function buildNdTranscriptExcerpt(result, options = {}) {
+async function buildLiveTranscriptExcerpt(result, options = {}) {
 	const failingIndex = result.steps.findIndex((s) => s.status === "failed");
 	if (failingIndex === -1) return null;
 	const failingStep = result.steps[failingIndex];
@@ -4316,7 +4556,7 @@ function oneLine$1(s) {
 	return s.replace(/\s+/g, " ").trim();
 }
 //#endregion
-//#region src/runtime/nd-artifacts.ts
+//#region src/runtime/live-artifacts.ts
 /**
 * Build a sortable run id from the current wall-clock time. ISO8601 with
 * `:` / `.` replaced so it's filename-safe. Caller is expected to mkdir the
@@ -4346,6 +4586,12 @@ function stepArtifactPaths(runDir, stepId) {
 //#endregion
 //#region src/claude/agent-browser-invoke.ts
 function agentBrowserInvokeBase(input) {
+	const env = {
+		AGENT_BROWSER_SESSION: input.sessionName,
+		CCQA_RUN_ID: input.runId,
+		PATH: pathWithAgentBrowserShim(process.env["PATH"])
+	};
+	if (input.statePath) env["CCQA_AB_STATE"] = input.statePath;
 	return {
 		allowedTools: [
 			"Bash(*)",
@@ -4353,23 +4599,25 @@ function agentBrowserInvokeBase(input) {
 			"Grep",
 			"Glob"
 		],
-		env: {
-			AGENT_BROWSER_SESSION: input.sessionName,
-			CCQA_RUN_ID: input.runId,
-			PATH: pathWithAgentBrowserShim(process.env["PATH"])
-		}
+		env
 	};
 }
 //#endregion
-//#region src/prompts/run-nd.ts
-function generateRunNdSessionName() {
-	return `ccqa-run-nd-${buildRunId()}`;
+//#region src/prompts/live.ts
+/**
+* Unique agent-browser session name. The runId is millisecond-precision wall
+* clock, so under `--concurrency > 1` two specs can start in the same
+* millisecond and collide; a random suffix guarantees each spec gets its own
+* Chrome session and state never bleeds across parallel runs.
+*/
+function generateLiveSessionName() {
+	return `ccqa-live-${buildRunId()}-${randomUUID().slice(0, 8)}`;
 }
 /**
-* Static prefix of the `ccqa run-nd` system prompt. Built once per run and
-* reused across every step's invocation — the only piece that changes per
-* step is the trailing "Your Task: <stepId>" section produced by
-* `buildRunNdSystemPromptStepSection`. Keeping the split here lets the prompt
+* Static prefix of the `ccqa run` (live spec) system prompt. Built once per
+* run and reused across every step's invocation — the only piece that
+* changes per step is the trailing "Your Task: <stepId>" section produced by
+* `buildLiveSystemPromptStepSection`. Keeping the split here lets the prompt
 * cache absorb the shared bulk and keeps each turn's prompt construction down
 * to a small string concat.
 *
@@ -4378,32 +4626,35 @@ function generateRunNdSessionName() {
 * but never names a specific product, URL, account, role, or UI element.
 * Project-specific guidance ("the admin tenant is foo.example", "session
 * times out at X minutes", …) is appended from
-* `.ccqa/prompts/run-nd.user.md` by the caller, so ccqa stays clean of
-* downstream-product context.
+* `.ccqa/prompts/live.user.md` (human-maintained) and
+* `.ccqa/prompts/live.agent.md` (updated by `ccqa run --update-agent-prompt`)
+* by the caller, so ccqa stays clean of downstream-product context.
 *
-* Constraint posture: `ccqa trace` enforces a strict selector whitelist and
-* blocks `eval` / `@ref` / chained agent-browser invocations because those
-* trace outputs need to replay deterministically. `run-nd` has no replay —
-* the model judges the step live — so those guards are off and the model is
-* told it may use any agent-browser subcommand and any selector strategy.
-*/
-function buildRunNdSystemPromptPrefix(input) {
+* Constraint posture: `ccqa record` (trace) enforces a strict selector
+* whitelist and blocks `eval` / `@ref` / chained agent-browser invocations
+* because those trace outputs need to replay deterministically. Live specs
+* have no replay — the model judges the step live — so those guards are off
+* and the model is told it may use any agent-browser subcommand and any
+* selector strategy.
+*/
+function buildLiveSystemPromptPrefix(input) {
 	const stepsText = input.allSteps.map((s) => `### ${s.id} [${s.source}]
 - **Instruction**: ${s.instruction}
 - **Expected**: ${s.expected}`).join("\n\n");
+	const stateLine = input.statePath ? `\n\nA pre-recorded auth-state file is provided at \`${input.statePath}\` (also in the env var \`CCQA_AB_STATE\`). **Always also pass \`--state "$CCQA_AB_STATE"\`** to every \`agent-browser\` command — this restores cookies and localStorage from a prior interactive login, so the user is already signed in to the application under test from step 1. The file is loaded read-only; do not run \`agent-browser state save\`.` : "";
 	return `You are a QA execution agent. You are executing ONE step of a browser-based end-to-end test and judging whether the step's expected outcome was achieved. You are NOT recording a replayable test script — be flexible, explore the DOM as needed, and make a clear pass / fail call at the end.
 ## Session
 SESSION NAME: \`${input.sessionName}\`
-Always pass \`--session ${input.sessionName}\` to every \`agent-browser\` command. The session persists across steps within this test run, so the browser state from previous steps is already loaded when this turn starts.
+Always pass \`--session ${input.sessionName}\` to every \`agent-browser\` command. The session persists across steps within this test run, so the browser state from previous steps is already loaded when this turn starts.${stateLine}
 ## Tools
 You have:
-- **Bash** to run \`agent-browser\` (the full surface — \`open\`, \`snapshot\`, \`click\`, \`fill\`, \`press\`, \`wait\`, \`find\`, \`screenshot\`, \`eval\`, \`js\`, \`get\`, etc.). Any selector form is allowed: \`@ref\` (e.g. \`@e14\`), CSS selectors, \`text=...\`, \`[aria-label='...']\`, \`[data-testid='...']\`, bare tags inside \`find first/last/nth\` — whatever works for this single run. There is no replay contract to honour.
+- **Bash** to run \`agent-browser\` (the full surface — \`open\`, \`snapshot\`, \`click\`, \`fill\`, \`upload\`, \`press\`, \`wait\`, \`find\`, \`screenshot\`, \`eval\`, \`js\`, \`get\`, etc.). Any selector form is allowed: \`@ref\` (e.g. \`@e14\`), CSS selectors, \`text=...\`, \`[aria-label='...']\`, \`[data-testid='...']\`, bare tags inside \`find first/last/nth\` — whatever works for this single run. There is no replay contract to honour. For file inputs (\`<input type="file">\`) do NOT \`click\` the input — use \`agent-browser upload "<selector>" <path>\` so no OS file-picker dialog opens. Fixtures conventionally live under \`.ccqa/fixtures/\`; reference them via \`\${CCQA_FIXTURES_DIR}/<name>\`.
 - **Read / Grep / Glob** for inspecting the application source code when you need to find a selector or understand routing. Read-only — do not modify source files.
 ## Test Specification
@@ -4456,7 +4707,7 @@ Everything else you write (narrative, tool output summaries, etc.) is fine — o
 `;
 }
 /** Per-step trailer with the current step's instruction / expected. */
-function buildRunNdSystemPromptStepSection(step) {
+function buildLiveSystemPromptStepSection(step) {
 	return `
 ## Your Task: ${step.id}
@@ -4467,11 +4718,11 @@ Execute the instruction in the running browser session, then judge whether the e
 `;
 }
 /** Per-turn user message — the system prompt already carries all spec context. */
-function buildRunNdUserPrompt(step) {
+function buildLiveUserPrompt(step) {
 	return `Execute step ${step.id} and emit your STEP_RESULT verdict as instructed in the system prompt.`;
 }
 //#endregion
-//#region src/runtime/nd-result-parse.ts
+//#region src/runtime/live-result-parse.ts
 const MAX_REASON_LEN = 2e3;
 /** Parse a single STEP_RESULT line. Returns null on malformed input. */
 function parseStepResultLine(line) {
@@ -4501,7 +4752,7 @@ function findLastStepResult(text) {
 //#region src/runtime/screenshot.ts
 /**
 * Take a PNG screenshot of the current page in the given agent-browser session
-* and write it to `outPath`. Used by `ccqa run-nd` to capture per-step
+* and write it to `outPath`. Used by `ccqa run` (live mode) to capture per-step
 * artifacts (before / after the step's actions) so the human-readable run
 * report has a visual trail even though no AB_ACTION stream is recorded.
 *
@@ -4511,11 +4762,9 @@ function findLastStepResult(text) {
 * artifact, not a reason to abort the test step.
 */
 function takeScreenshot(sessionName, outPath, options) {
-	const args = [
-		"--session",
-		sessionName,
-		"screenshot"
-	];
+	const args = ["--session", sessionName];
+	if (options?.statePath) args.push("--state", options.statePath);
+	args.push("screenshot");
 	if (options?.fullPage) args.push("--full");
 	args.push(outPath);
 	const res = spawnAB(args);
@@ -4530,10 +4779,10 @@ function takeScreenshot(sessionName, outPath, options) {
 	};
 }
 //#endregion
-//#region src/runtime/nd-executor.ts
+//#region src/runtime/live-executor.ts
 /**
-* Run all spec steps once through Claude (non-deterministic mode). Each step
-* is one Claude invocation that:
+* Run all spec steps once through Claude (live mode). Each step is one Claude
+* invocation that:
 *   1. takes a "before" screenshot of the live session
 *   2. lets Claude execute the step's instruction via agent-browser (full
 *      surface, no replay-time selector constraints)
@@ -4544,20 +4793,23 @@ function takeScreenshot(sessionName, outPath, options) {
 * the overall run status flips to `failed`. The Chrome session persists
 * across steps so step N+1 starts on whatever page step N left the browser on.
 */
-async function runNdExecutor(input) {
+async function runLiveExecutor(input) {
 	const startedAt = /* @__PURE__ */ new Date();
 	const stepResults = [];
 	let overallFailed = false;
-	const promptPrefix = buildRunNdSystemPromptPrefix({
+	const statePath = input.statePath ?? null;
+	const promptPrefix = buildLiveSystemPromptPrefix({
 		title: input.spec.title,
 		allSteps: input.steps,
-		sessionName: input.sessionName
+		sessionName: input.sessionName,
+		statePath
 	});
 	const suffixBlock = input.systemPromptSuffix ? `\n## Project-specific guidance\n\n${input.systemPromptSuffix}\n` : "";
 	const langDirective = languageDirective(input.language);
 	const invokeBase = agentBrowserInvokeBase({
 		sessionName: input.sessionName,
-		runId: input.runId
+		runId: input.runId,
+		statePath
 	});
 	const retries = Math.max(0, input.retries ?? 0);
 	for (let i = 0; i < input.steps.length; i++) {
@@ -4571,8 +4823,8 @@ async function runNdExecutor(input) {
 		const paths = stepArtifactPaths(input.runDir, step$1.id);
 		await ensureDir(paths.beforePng);
 		const stepStartedAt = Date.now();
-		const systemPrompt = promptPrefix + buildRunNdSystemPromptStepSection(step$1) + suffixBlock + langDirective;
-		const userPrompt = buildRunNdUserPrompt(step$1);
+		const systemPrompt = promptPrefix + buildLiveSystemPromptStepSection(step$1) + suffixBlock + langDirective;
+		const userPrompt = buildLiveUserPrompt(step$1);
 		let attempt = 0;
 		let lastOutcome = null;
 		while (attempt <= retries) {
@@ -4602,7 +4854,7 @@ async function runNdExecutor(input) {
 		}
 	}
 	async function executeStepAttempt(step, paths, systemPrompt, userPrompt) {
-		const before = takeScreenshot(input.sessionName, paths.beforePng);
+		const before = takeScreenshot(input.sessionName, paths.beforePng, { statePath });
 		if (!before.ok) warn(`screenshot (before, ${step.id}) failed: ${before.error}`);
 		const transcriptParts = [];
 		let isError = false;
@@ -4634,7 +4886,10 @@ async function runNdExecutor(input) {
 			transcriptParts.push(`[ccqa] invokeClaudeStreaming threw: ${err instanceof Error ? err.message : String(err)}`);
 		}
 		const transcript = transcriptParts.join("\n");
-		const after = takeScreenshot(input.sessionName, paths.afterPng, { fullPage: true });
+		const after = takeScreenshot(input.sessionName, paths.afterPng, {
+			fullPage: true,
+			statePath
+		});
 		if (!after.ok) warn(`screenshot (after, ${step.id}) failed: ${after.error}`);
 		await writeFile(paths.logTxt, transcript || "(no assistant text captured)", "utf-8");
 		const { status, reasoning } = judgeStepOutcome({
@@ -4750,24 +5005,24 @@ function truncateForLog$1(s) {
 	return oneLine.length > 100 ? oneLine.slice(0, 100) + "…" : oneLine;
 }
 //#endregion
-//#region src/report/nd-adapter.ts
+//#region src/report/live-adapter.ts
 /**
-* Convert one `run-nd` execution result into the persistence-layer
-* `ReportSpecResult` shape consumed by `renderRunReport`. The conversion
-* does two non-trivial things:
+* Convert one live-mode (`mode: live`) execution result into the
+* persistence-layer `ReportSpecResult` shape consumed by `renderRunReport`.
+* The conversion does two non-trivial things:
 *
 *   - rewrites the executor's absolute `beforePng`/`afterPng` paths as
 *     `reportDir`-relative hrefs so the rendered HTML opens its PNGs
 *     directly when the report dir + the run dir are downloaded together
 *     as a CI artifact bundle
 *   - nulls out every vitest-only field so the report renderer falls
-*     through to its `ndRun` branch
+*     through to its `liveRun` branch
 *
 * Lives in `src/report/` (not the CLI) because the relative-path contract
-* on `NdReportStep.beforePng`/`afterPng` is a report-layer invariant,
+* on `LiveReportStep.beforePng`/`afterPng` is a report-layer invariant,
 * documented next to the schema, and the CLI should not own it.
 */
-function ndRunToReportResult(args) {
+function liveRunToReportResult(args) {
 	const { featureName, specName, specYaml, result, reportDir } = args;
 	const steps = result.steps.map((s) => ({
 		stepId: s.stepId,
@@ -4781,7 +5036,7 @@ function ndRunToReportResult(args) {
 		durationMs: s.durationMs,
 		cost: { ...s.cost }
 	}));
-	const ndRun = {
+	const liveRun = {
 		runId: result.runId,
 		sessionName: result.sessionName,
 		startedAt: result.startedAt,
@@ -4804,16 +5059,16 @@ function ndRunToReportResult(args) {
 		diffExcerpt: null,
 		specYaml,
 		evidence: null,
-		ndRun
+		liveRun
 	};
 }
 function relativeIfPresent(absPath, reportDir) {
 	return absPath === null ? null : relative(reportDir, absPath);
 }
 //#endregion
-//#region src/cli/run-nd.ts
+//#region src/cli/run-live.ts
 /**
-* Run pre-filtered `mode: live` specs through `runNdExecutor` (Claude +
+* Run pre-filtered `mode: live` specs through `runLiveExecutor` (Claude +
 * agent-browser) and, when `reportDir` is set, run drift audit + failure
 * analysis to produce report rows. Sibling of `runDeterministicSpecs`.
 */
@@ -4825,24 +5080,25 @@ async function runLiveSpecs(specs, opts) {
 	const cwd = opts.cwd ?? process.cwd();
 	await preflightAgentBrowserCommand();
 	meta("live-specs", specs.length);
-	const userPromptSuffix = await loadRunNdUserPrompt(cwd);
-	if (userPromptSuffix !== null) meta("user-prompt", ".ccqa/prompts/run-nd.user.md");
-	const runs = [];
-	for (let i = 0; i < specs.length; i++) {
-		const { featureName, specName } = specs[i];
-		const label = `${featureName}/${specName}`;
-		if (specs.length > 1) {
-			blank();
-			info(`[${i + 1}/${specs.length}] ${label}`);
-		}
-		runs.push(await runOneSpec({
-			featureName,
-			specName,
-			opts,
-			userPromptSuffix,
-			cwd
-		}));
-	}
+	const userPromptBundle = await loadLivePromptBundle(cwd);
+	if (userPromptBundle !== null) meta("prompt", userPromptBundle.loaded.join(" + "));
+	const userPromptSuffix = userPromptBundle?.text ?? null;
+	const concurrency = Math.max(1, opts.concurrency ?? 1);
+	const runs = await runPool(specs, concurrency, (spec, i) => {
+		const label = `${spec.featureName}/${spec.specName}`;
+		return withBuffer(label, concurrency > 1, () => {
+			if (concurrency === 1 && specs.length > 1) {
+				blank();
+				info(`[${i + 1}/${specs.length}] ${label}`);
+			}
+			return runOneSpec({
+				...spec,
+				opts,
+				userPromptSuffix,
+				cwd
+			});
+		});
+	});
 	const failedCount = runs.filter((r) => r.kind === "error" || r.kind === "run" && r.result.status === "failed").length;
 	blank();
 	meta("live-summary", `${runs.length - failedCount} passed / ${failedCount} failed`);
@@ -4859,7 +5115,7 @@ function buildLiveReportResults(runs, driftBySpec, analysisBySpec, reportDir, fa
 		if (r.kind !== "run") return [];
 		const key = `${r.featureName}/${r.specName}`;
 		return [{
-			...ndRunToReportResult({
+			...liveRunToReportResult({
 				featureName: r.featureName,
 				specName: r.specName,
 				specYaml: r.specYaml,
@@ -4889,7 +5145,7 @@ function analysisFieldsFor(a, status, failureAnalysisEnabled) {
 /**
 * Run `analyzeDrift` against every successfully-loaded spec and return a
 * `featureName/specName → driftIssues` map. Drift findings are advisory —
-* they show in the HTML report but do not change the run-nd exit code.
+* they show in the HTML report but do not change the live-run exit code.
 */
 async function runDriftAudit(runs, opts, cwd) {
 	const targets = runs.filter((r) => r.kind === "run").map((r) => ({
@@ -4939,18 +5195,36 @@ async function runOneSpec(args) {
 	meta("steps", expanded.length);
 	const includes = collectIncludedBlockNames(spec);
 	if (includes.length > 0) meta("blocks", includes.join(", "));
-	const sessionName = generateRunNdSessionName();
+	const sessionName = generateLiveSessionName();
 	meta("session", sessionName);
+	let statePath = null;
+	if (spec.statePath) {
+		statePath = isAbsolute(spec.statePath) ? spec.statePath : resolve(cwd, spec.statePath);
+		try {
+			await access(statePath);
+		} catch {
+			const msg = `spec.statePath points to a missing file: ${statePath}`;
+			error(msg);
+			return {
+				kind: "error",
+				featureName,
+				specName,
+				error: msg
+			};
+		}
+		meta("state", statePath);
+	}
 	const runId = buildRunId();
 	const runDir = opts.out ?? join(specDir, "runs", runId);
 	await mkdir(runDir, { recursive: true });
 	meta("runDir", runDir);
-	const result = await runNdExecutor({
+	const result = await runLiveExecutor({
 		spec: { title: spec.title },
 		steps: expanded,
 		runId,
 		runDir,
 		sessionName,
+		statePath,
 		systemPromptSuffix: userPromptSuffix,
 		model: opts.model,
 		language: opts.language,
@@ -4963,7 +5237,7 @@ async function runOneSpec(args) {
 	meta("saved", runJsonPath);
 	meta("status", result.status.toUpperCase());
 	meta("step-summary", `${count(result.steps, "passed")} passed / ${count(result.steps, "failed")} failed / ${count(result.steps, "skipped")} skipped`);
-	const costLine = formatNdCost(result.cost, { compact: false });
+	const costLine = formatLiveCost(result.cost, { compact: false });
 	if (costLine) meta("cost", costLine);
 	return {
 		kind: "run",
@@ -4975,7 +5249,7 @@ async function runOneSpec(args) {
 	};
 }
 function logBatchCost(runs) {
-	const line = formatNdBatchCost(runs.flatMap((r) => r.kind === "run" ? [r.result.cost] : []));
+	const line = formatLiveBatchCost(runs.flatMap((r) => r.kind === "run" ? [r.result.cost] : []));
 	if (line) meta("total-cost", line);
 }
 /**
@@ -5005,7 +5279,7 @@ async function runFailureAnalysisForLiveRuns(runs, driftBySpec, opts, cwd) {
 	for (const r of failed) {
 		const key = `${r.featureName}/${r.specName}`;
 		info(`failure analysis: ${key}`);
-		const excerpt = await buildNdTranscriptExcerpt(r.result);
+		const excerpt = await buildLiveTranscriptExcerpt(r.result);
 		if (excerpt === null) {
 			out.set(key, {
 				analysis: null,
@@ -5016,7 +5290,7 @@ async function runFailureAnalysisForLiveRuns(runs, driftBySpec, opts, cwd) {
 			continue;
 		}
 		const outcome = await analyzeFailure({
-			ndTranscriptExcerpt: excerpt,
+			liveTranscriptExcerpt: excerpt,
 			specYaml: r.specYaml,
 			diffPatch: diff.ok ? diff.diff.patch : null,
 			changedFiles: diff.ok ? diff.diff.nameStatus : null,
@@ -5067,6 +5341,100 @@ function oneLine(s) {
 	return s.replace(/\s+/g, " ").trim();
 }
 //#endregion
+//#region src/prompts/agent-update.ts
+function buildAgentUpdateSystemPrompt(input) {
+	const modeLabel = input.mode === "live" ? "live (Claude drives every step at run time)" : "record (Claude records browser actions for vitest replay)";
+	const userMdLabel = `${input.mode}.user.md`;
+	const agentMdLabel = `${input.mode}.agent.md`;
+	return `You maintain the auto-learned half of ccqa's prompt bundle for ${modeLabel}.
+${outputLanguageBlock(input.language ?? "auto", "the bullet text", "headings, agent-browser subcommand names, selector tokens")}## What you are updating
+\`.ccqa/prompts/${agentMdLabel}\` is appended to ccqa's system prompt for every ${input.mode === "live" ? "step of every `mode: live` spec" : "trace run of `ccqa record`"}. It is meant to capture **stable lessons learned from past runs** — concrete selectors that worked, login flow quirks the agent kept tripping on, common "this is fine" warnings to ignore.
+The sibling file \`${userMdLabel}\` carries human-maintained project guidance (URLs, naming conventions). Rules already well-covered by \`${userMdLabel}\` should NOT be repeated here.
+## Output rules
+- Emit the COMPLETE replacement contents of \`${agentMdLabel}\`.
+- Concise bullet points. No narrative paragraphs. No preamble. No closing summary.
+- Each bullet is a single declarative sentence (or one bullet → one short selector / command).
+- Group related bullets under \`### …\` subheaders.
+- Skip everything that was already true and well-covered by the previous file or \`${userMdLabel}\`. Only persist new lessons.
+- Keep the whole file under ~3 KB.
+- Output ONLY the new file contents. NO code fences. NO surrounding prose. NO markdown frontmatter.
+- If the run summary contains nothing worth learning from, output the previous file unchanged.
+`;
+}
+function buildAgentUpdateUserPrompt(input) {
+	const agentMdLabel = `${input.mode}.agent.md`;
+	return `## Previous \`${agentMdLabel}\`
+${input.currentAgentMd && input.currentAgentMd.trim().length > 0 ? input.currentAgentMd : "(no existing file — this will create one)"}
+## Run summary
+${input.runSummary}
+## Your task
+Write the new contents of \`${agentMdLabel}\`. Output ONLY the file contents — no preamble, no fences, no closing note.`;
+}
+//#endregion
+//#region src/cli/update-agent-prompt.ts
+/**
+* Refresh `.ccqa/prompts/<mode>.agent.md` from the latest run.
+*
+* Reads the existing file (if any) and a caller-supplied run summary, sends
+* both to Claude, and writes the response back over the agent prompt file.
+* Degrades gracefully when auth is missing — logs and returns — so the run
+* exit code is unaffected by this opt-in side step.
+*/
+async function updateAgentPrompt(args) {
+	const { mode, runSummary, cwd, model, language } = args;
+	const agentMdPath = join(cwd, ".ccqa", "prompts", `${mode}.agent.md`);
+	const relPath = relative(cwd, agentMdPath);
+	const auth = driftAuthAvailable();
+	if (!auth.ok) {
+		warn(`--update-agent-prompt skipped (${auth.reason})`);
+		return;
+	}
+	const promptInput = {
+		mode,
+		currentAgentMd: await readFile(agentMdPath, "utf-8").catch(() => null),
+		runSummary,
+		...language ? { language } : {}
+	};
+	const systemPrompt = buildAgentUpdateSystemPrompt(promptInput);
+	const userPrompt = buildAgentUpdateUserPrompt(promptInput);
+	info(`--update-agent-prompt: refreshing ${relPath}`);
+	const { result, isError } = await invokeClaudeStreaming({
+		prompt: userPrompt,
+		systemPrompt,
+		allowedTools: [],
+		disableBuiltinTools: true,
+		...model ? { model } : {}
+	}, () => {});
+	if (isError || !result || result.trim().length === 0) {
+		warn(`--update-agent-prompt: Claude returned no usable output${isError ? " (SDK error)" : ""}; leaving ${relPath} unchanged`);
+		return;
+	}
+	const newText = stripCodeFences(result.trim()) + "\n";
+	await mkdir(dirname(agentMdPath), { recursive: true });
+	await writeFile(agentMdPath, newText, "utf-8");
+	info(`--update-agent-prompt: wrote ${relPath} (${newText.length} bytes)`);
+	info(`--update-agent-prompt: review the diff with: git diff -- "${relPath}"`);
+}
+/**
+* Some models still wrap the answer in a ```markdown fence despite the
+* system prompt asking otherwise. Strip a single outer fence when present so
+* the saved file is clean.
+*/
+function stripCodeFences(text) {
+	const m = text.match(/^```[a-zA-Z]*\n([\s\S]*?)\n```\s*$/);
+	return m && m[1] !== void 0 ? m[1] : text;
+}
+//#endregion
 //#region src/cli/changed-specs.ts
 /**
 * Filter specs to those affected by the git diff against the resolved base
@@ -5122,28 +5490,57 @@ async function resolveVitestConfig(cwd) {
 		return bundledVitestConfigPath();
 	}
 }
-const runCommand = addLanguageOption(new Command("run").argument("[target]", "Spec to run: '<feature>/<spec>', '<feature>', or omit for all").description("Run specs. Each spec's execution mode comes from its spec.yaml `mode:` field (default deterministic; set `mode: live` to have Claude drive agent-browser live per step). Deterministic specs replay the recorded test.spec.ts under vitest. Pass --report to write one unified HTML report covering both modes.").option("--report [dir]", `Write a self-contained HTML run report (failure analysis + drift audit by default). Default dir: ${DEFAULT_REPORT_DIR}/`).option("--changed", "Restrict execution to specs whose relatedPaths intersect the git diff against --base (or, in CI, $GITHUB_BASE_REF, else origin/main). Cannot be combined with an explicit spec id.").option("--no-failure-analysis", "Skip the per-failure root-cause classification (TEST_DRIFT / SPEC_CHANGE / PRODUCT_BUG). --report only.").option("--no-drift-audit", "Skip the spec↔code drift audit shown in the report. --report only.").option("--base <ref>", "Base ref the source diff is taken against for failure analysis (default: GITHUB_BASE_REF, then origin/main).").option("--cwd <path>", "Working directory containing the .ccqa/ tree (monorepo support). Defaults to the current directory.").option("--format <fmt>", "Additional output format alongside HTML when --report is set: 'text' (default), 'json' (writes report.json), 'github' (GitHub Actions annotations on stdout).", (raw) => {
+const runCommand = addProfileOption(addLanguageOption(new Command("run").argument("[targets...]", "Specs to run, space-separated: each '<feature>/<spec>', '<feature>', or omit for all. Duplicates are de-duped.").description("Run specs. Each spec's execution mode comes from its spec.yaml `mode:` field (default deterministic; set `mode: live` to have Claude drive agent-browser live per step). Deterministic specs replay the recorded test.spec.ts under vitest. Pass --report to write one unified HTML report covering both modes.").option("--report [dir]", `Write a self-contained HTML run report (failure analysis + drift audit by default). Default dir: ${DEFAULT_REPORT_DIR}/`).option("--changed", "Restrict execution to specs whose relatedPaths intersect the git diff against --base (or, in CI, $GITHUB_BASE_REF, else origin/main). Cannot be combined with an explicit spec id.").option("--no-failure-analysis", "Skip the per-failure root-cause classification (TEST_DRIFT / SPEC_CHANGE / PRODUCT_BUG). --report only.").option("--no-drift-audit", "Skip the spec↔code drift audit shown in the report. --report only.").option("--base <ref>", "Base ref the source diff is taken against for failure analysis (default: GITHUB_BASE_REF, then origin/main).").option("--cwd <path>", "Working directory containing the .ccqa/ tree (monorepo support). Defaults to the current directory.").option("--format <fmt>", "Additional output format alongside HTML when --report is set: 'text' (default), 'json' (writes report.json), 'github' (GitHub Actions annotations on stdout).", (raw) => {
 	if (REPORT_FORMATS.includes(raw)) return raw;
 	throw new Error(`--format must be one of ${REPORT_FORMATS.join(" | ")}`);
 }, "text").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").option("--no-evidence", `(deterministic only) Skip step-boundary evidence capture (PNG + meta JSON written to ${DEFAULT_REPORT_DIR}/${EVIDENCE_SUBDIR}/ by default).`).option("--retry <n>", "(live only) Retry each failed step up to N more times before recording failure. Default 0.", (raw) => {
 	const n = Number(raw);
 	if (!Number.isFinite(n) || n < 0 || Math.floor(n) !== n) throw new Error(`--retry must be a non-negative integer, got "${raw}"`);
 	return n;
-}, 0).option("--out <dir>", "(live only) Override the per-spec artifact directory. Default: <specDir>/runs/<runId>. Ignored when running multiple specs.")).action(async (target, opts) => {
-	await runDispatcher(target, opts);
+}, 0).option("--out <dir>", "(live only) Override the per-spec artifact directory. Default: <specDir>/runs/<runId>. Ignored when running multiple specs.").option("--update-agent-prompt", "(live only) After the run finishes, ask Claude to refresh .ccqa/prompts/live.agent.md from a summary of the run.").option("--concurrency <n>", "Run up to N specs in parallel within each mode (deterministic / live). Default 1 (sequential). Live specs each get an isolated agent-browser session; high values spawn many headed Chrome instances.", parseConcurrency$1, 1))).action(async (targets, opts) => {
+	await runDispatcher(targets, opts);
 });
+/** Parse --concurrency: a positive integer. Rejects 0, negatives, non-integers. */
+function parseConcurrency$1(raw) {
+	const n = Number(raw);
+	if (!Number.isInteger(n) || n < 1) {
+		error(`invalid --concurrency: ${raw} (expected positive integer)`);
+		process.exit(2);
+	}
+	return n;
+}
 function resolveReportDir(report, cwd) {
 	if (report === void 0 || report === false) return void 0;
 	return resolve(cwd, typeof report === "string" ? report : DEFAULT_REPORT_DIR);
 }
-async function runDispatcher(target, opts) {
-	header("run", target ?? (opts.changed ? "(changed)" : "(all specs)"));
-	if (opts.changed && target) {
+/** Header label shown after `ccqa run`: the lone target, a count, or a mode marker. */
+function headerTarget(targets, opts) {
+	if (targets.length === 1) return targets[0];
+	if (targets.length > 1) return `${targets.length} targets`;
+	return opts.changed ? "(changed)" : "(all specs)";
+}
+/** De-dupe by `featureName/specName`, keeping first-seen order. */
+function dedupeSpecs(specs) {
+	const seen = /* @__PURE__ */ new Set();
+	const out = [];
+	for (const s of specs) {
+		const key = `${s.featureName}/${s.specName}`;
+		if (seen.has(key)) continue;
+		seen.add(key);
+		out.push(s);
+	}
+	return out;
+}
+async function runDispatcher(targets, opts) {
+	header("run", headerTarget(targets, opts));
+	if (opts.changed && targets.length > 0) {
 		error("--changed and an explicit spec target cannot be combined");
 		process.exit(2);
 	}
 	const cwd = resolveCwd(opts.cwd);
-	let specs = await resolveSpecTargets(target, () => listAllSpecsWithSpecFile(cwd), cwd);
+	await applyProfileFromOption(opts.profile, cwd);
+	const enumerateAll = () => listAllSpecsWithSpecFile(cwd);
+	let specs = dedupeSpecs((await Promise.all((targets.length ? targets : [void 0]).map((t) => resolveSpecTargets(t, enumerateAll, cwd)))).flat());
 	if (opts.changed) {
 		const before = specs.length;
 		specs = await collectChangedSpecs(specs, {
@@ -5163,7 +5560,8 @@ async function runDispatcher(target, opts) {
 	if (liveSpecs.length === 0) {
 		if (typeof opts.retry === "number" && opts.retry > 0) warn("--retry is ignored without any 'mode: live' spec");
 		if (opts.out) warn("--out is ignored without any 'mode: live' spec");
-	}
+		if (opts.updateAgentPrompt) warn("--update-agent-prompt is ignored without any 'mode: live' spec");
+	} else if (opts.out && liveSpecs.length > 1) warn("--out is ignored when running multiple live specs");
 	if (detSpecs.length === 0 && opts.evidence === false) warn("--no-evidence is ignored without any 'mode: deterministic' spec");
 	blank();
 	const reportDir = resolveReportDir(opts.report, cwd);
@@ -5172,11 +5570,12 @@ async function runDispatcher(target, opts) {
 	const live = await runLiveSpecs(liveSpecs, {
 		...opts.model ? { model: opts.model } : {},
 		...opts.language ? { language: opts.language } : {},
-		...opts.out ? { out: opts.out } : {},
+		...opts.out && liveSpecs.length === 1 ? { out: opts.out } : {},
 		cwd,
 		...opts.base ? { base: opts.base } : {},
 		...reportDir ? { reportDir } : {},
 		...typeof opts.retry === "number" ? { retry: opts.retry } : {},
+		concurrency: opts.concurrency ?? 1,
 		...reportDir && opts.driftAudit !== false ? { driftAudit: true } : {},
 		...reportDir && opts.failureAnalysis === false ? { failureAnalysis: false } : {}
 	});
@@ -5192,9 +5591,39 @@ async function runDispatcher(target, opts) {
 			opts
 		});
 	}
+	if (opts.updateAgentPrompt && liveSpecs.length > 0) {
+		blank();
+		await updateAgentPrompt({
+			mode: "live",
+			runSummary: buildLiveRunSummary(live.reportResults),
+			cwd,
+			...opts.model ? { model: opts.model } : {},
+			...opts.language ? { language: opts.language } : {}
+		});
+	}
 	process.exit(overallExitCode);
 }
 /**
+* Compact, prompt-friendly summary of one ccqa run for the live agent-prompt
+* update step. One section per spec: header line + per-step verdicts.
+* Kept to a few KB even with many specs/steps so the prompt cache can absorb
+* the bulk.
+*/
+function buildLiveRunSummary(results) {
+	const sections = [];
+	for (const r of results) {
+		if (!r.liveRun) continue;
+		const head = `## ${r.feature}/${r.spec} — ${r.status}`;
+		const steps = r.liveRun.steps.map((s) => `- [${s.status}] ${s.stepId}: ${oneLineSummary$1(s.reasoning)}`).join("\n");
+		sections.push(`${head}\n${steps}`);
+	}
+	return sections.length === 0 ? "(no live runs executed)" : sections.join("\n\n");
+}
+function oneLineSummary$1(s) {
+	const flat = s.replace(/\s+/g, " ").trim();
+	return flat.length > 240 ? flat.slice(0, 240) + "…" : flat || "(no reason given)";
+}
+/**
 * Run pre-filtered deterministic specs under vitest. Empty input is a no-op.
 * Captures step-boundary evidence under `<reportDir>/evidence/<feature>/<spec>/`
 * when enabled.
@@ -5205,72 +5634,83 @@ async function runDeterministicSpecs(specs, opts, cwd, reportDirAbs) {
 		exitCode: 0
 	};
 	const tmpDir = await mkdtemp(join(tmpdir(), "ccqa-run-"));
-	const summaries = [];
-	let exitCode = 0;
 	const vitestConfig = await resolveVitestConfig(cwd);
 	const captureOutput = Boolean(opts.report);
 	const evidenceRoot = opts.evidence !== false ? join(reportDirAbs, EVIDENCE_SUBDIR) : null;
+	const concurrency = Math.max(1, opts.concurrency ?? 1);
+	const ctx = {
+		cwd,
+		tmpDir,
+		vitestConfig,
+		captureOutput,
+		evidenceRoot
+	};
 	try {
-		for (let i = 0; i < specs.length; i++) {
-			const { featureName, specName } = specs[i];
-			const scriptFile = await getTestScript(featureName, specName, cwd);
-			if (!scriptFile) {
-				warn(`${featureName}/${specName}: no test.spec.ts found`);
-				hint("run 'ccqa record <feature>/<spec>' to record it, or set 'mode: live' in spec.yaml");
-				continue;
-			}
-			run(`${featureName}/${specName}`);
-			meta("test", scriptFile);
-			blank();
-			const reportFile = join(tmpDir, `report-${i}.json`);
-			const evidenceDir = evidenceRoot ? join(evidenceRoot, featureName, specName) : null;
-			if (evidenceDir) {
-				await rm(evidenceDir, {
-					recursive: true,
-					force: true
-				});
-				await mkdir(evidenceDir, { recursive: true });
-			}
-			const proc = spawnVitestStreaming([
-				"run",
-				"--config",
-				vitestConfig,
-				scriptFile,
-				"--reporter=json",
-				`--outputFile.json=${reportFile}`
-			], {
-				cwd,
-				env: evidenceDir ? {
-					...process.env,
-					CCQA_EVIDENCE_DIR: evidenceDir
-				} : process.env
-			});
-			const tail = captureOutput ? new TailBuffer(OUTPUT_TAIL_CAP) : null;
-			await Promise.all([streamFiltered(proc.stdout, process.stdout, tail), streamFiltered(proc.stderr, process.stderr, tail)]);
-			const specExitCode = await proc.exited;
-			if (specExitCode !== 0) exitCode = specExitCode;
-			const report = await readReport(reportFile);
-			summaries.push({
-				featureName,
-				specName,
-				scriptFile,
-				report,
-				exitCode: specExitCode,
-				outputTail: tail ? tail.toString() : null,
-				evidenceDir
-			});
-			blank();
-		}
+		const summaries = (await runPool(specs, concurrency, (spec, i) => withBuffer(`${spec.featureName}/${spec.specName}`, concurrency > 1, () => runOneDeterministicSpec(spec, i, ctx)))).filter((s) => s !== null);
 		printSummary(summaries);
+		return {
+			summaries,
+			exitCode: summaries.reduce((acc, s) => s.exitCode !== 0 ? s.exitCode : acc, 0)
+		};
 	} finally {
 		await rm(tmpDir, {
 			recursive: true,
 			force: true
 		});
 	}
+}
+/**
+* Run one spec under vitest. Returns null when the spec has no recorded
+* test.spec.ts (skipped). All output goes through the logger, so under a
+* `log.withBuffer` scope it's captured and flushed as one labelled block.
+*/
+async function runOneDeterministicSpec(spec, index, ctx) {
+	const { featureName, specName } = spec;
+	const scriptFile = await getTestScript(featureName, specName, ctx.cwd);
+	if (!scriptFile) {
+		warn(`${featureName}/${specName}: no test.spec.ts found`);
+		hint("run 'ccqa record <feature>/<spec>' to record it, or set 'mode: live' in spec.yaml");
+		return null;
+	}
+	run(`${featureName}/${specName}`);
+	meta("test", scriptFile);
+	blank();
+	const reportFile = join(ctx.tmpDir, `report-${index}.json`);
+	const evidenceDir = ctx.evidenceRoot ? join(ctx.evidenceRoot, featureName, specName) : null;
+	if (evidenceDir) {
+		await rm(evidenceDir, {
+			recursive: true,
+			force: true
+		});
+		await mkdir(evidenceDir, { recursive: true });
+	}
+	const proc = spawnVitestStreaming([
+		"run",
+		"--config",
+		ctx.vitestConfig,
+		scriptFile,
+		"--reporter=json",
+		`--outputFile.json=${reportFile}`
+	], {
+		cwd: ctx.cwd,
+		env: evidenceDir ? {
+			...process.env,
+			CCQA_EVIDENCE_DIR: evidenceDir
+		} : process.env
+	});
+	const sink = { write: emitRaw };
+	const tail = ctx.captureOutput ? new TailBuffer(OUTPUT_TAIL_CAP) : null;
+	await Promise.all([streamFiltered(proc.stdout, sink, tail), streamFiltered(proc.stderr, sink, tail)]);
+	const specExitCode = await proc.exited;
+	blank();
 	return {
-		summaries,
-		exitCode
+		featureName,
+		specName,
+		scriptFile,
+		report: await readReport(reportFile),
+		exitCode: specExitCode,
+		outputTail: tail ? tail.toString() : null,
+		evidenceDir
 	};
 }
 function failedSpec(s) {
@@ -5358,7 +5798,7 @@ async function analyzeDeterministicSummaries(summaries, opts, cwd, reportDir) {
 				failureLogExcerpt: null,
 				diffExcerpt: null,
 				specYaml: null,
-				ndRun: null
+				liveRun: null
 			});
 			continue;
 		}
@@ -5408,7 +5848,7 @@ async function analyzeDeterministicSummaries(summaries, opts, cwd, reportDir) {
 			failureLogExcerpt: failureLog.length > 0 ? failureLog : null,
 			diffExcerpt,
 			specYaml,
-			ndRun: null
+			liveRun: null
 		});
 	}
 	return {
@@ -5719,6 +6159,7 @@ agent-browser --session SESSION wait --load networkidle
 agent-browser --session SESSION get count "<selector>"   # element-existence check (returns a number, fast)
 agent-browser --session SESSION cookies clear
 agent-browser --session SESSION find <locator> <value> <action> [<input>] [--name "<n>"] [--exact]
+agent-browser --session SESSION upload "<input[type=file] selector>" <file> [<file> ...]
 # See "Selector Rules" for the full \`find\` subset.
 # IMPORTANT: do NOT use \`wait "<css-selector>"\`. agent-browser ignores --timeout on a
 # CSS-selector wait and blocks for ~150s when the selector never matches, killing the run.
@@ -5794,6 +6235,8 @@ find nth <index> "<ALLOWED-css>" <action>
 **Verifying cleanup / deletion**: assert the *absence* of the deleted thing, not the surrounding listing screen's text. Use \`wait --fn "!document.body.innerText.includes('<unique-label>')"\` (text disappearance) — never \`wait "<css-selector>" --state hidden\` (blocks the daemon) and never \`wait --text "<navbar label>"\` (passes regardless of the deletion).
+**File inputs (\`<input type="file">\`) / OS file-picker dialogs**: do NOT \`click\` the input — that opens the OS picker, which agent-browser cannot drive. Use \`upload "<selector>" <path>\` instead. agent-browser sets the input's files directly via the underlying browser API, no native dialog ever opens. Use an ALLOWED selector to identify the input (\`[aria-label='…']\`, \`[data-testid='…']\`, \`[type='file']\` only when it's unique on the page). File paths must be plain shell args — wrap each in \`"\` for safety. Reference fixtures via \`\${CCQA_FIXTURES_DIR}/<name>\` so the same spec works locally and in CI; conventionally fixtures live under \`.ccqa/fixtures/\` and the env var resolves there. Multi-file inputs accept several positionals: \`upload "[aria-label='Attach']" "\${CCQA_FIXTURES_DIR}/a.pdf" "\${CCQA_FIXTURES_DIR}/b.pdf"\`.
 ## Test Specification
 Title: ${input.title}
@@ -5876,6 +6319,7 @@ AB_ACTION|select|<selector>|<value>|<aria label>
 AB_ACTION|hover|<selector>|<visible label>
 AB_ACTION|scroll|<direction>|<pixels>
 AB_ACTION|drag|<source selector>|<target selector>|<source label>
+AB_ACTION|upload|<file-input selector>|<file1>[|<file2>...]
 AB_ACTION|wait|<selector or text>|<label>
 AB_ACTION|snapshot|<key observation, max 100 chars>
 AB_ACTION|assert|<assertType>|<selector or "">|<value or "">|<observation>
@@ -6192,6 +6636,17 @@ function actionToAbArgs(action, sessionName) {
 			sub(action.selector),
 			sub(action.target)
 		];
+		case "upload": {
+			const sel = sub(action.selector);
+			const files = (action.files ?? []).map((f) => sub(f));
+			if (!sel || files.length === 0) return null;
+			return [
+				...base,
+				"upload",
+				sel,
+				...files
+			];
+		}
 		case "wait": {
 			const raw = sub(action.selector);
 			if (!raw) return null;
@@ -6683,9 +7138,9 @@ async function runTrace(featureName, specName, model, validationMode = "lenient"
 		steps: expanded,
 		sessionName
 	});
-	const userPrompt = await loadTraceUserPrompt();
-	if (userPrompt !== null) meta("user-prompt", ".ccqa/prompts/trace.user.md");
-	const systemPrompt = (userPrompt === null ? baseSystemPrompt : `${baseSystemPrompt}\n## Project-specific guidance\n\n${userPrompt}\n`) + languageDirective(language);
+	const promptBundle = await loadRecordPromptBundle();
+	if (promptBundle !== null) meta("prompt", promptBundle.loaded.join(" + "));
+	const systemPrompt = (promptBundle === null ? baseSystemPrompt : `${baseSystemPrompt}\n## Project-specific guidance\n\n${promptBundle.text}\n`) + languageDirective(language);
 	const prompt = buildTracePrompt(spec.title);
 	info("Running agent-browser session...");
 	blank();
@@ -6767,6 +7222,11 @@ async function runTrace(featureName, specName, model, validationMode = "lenient"
 		if (written) meta("relatedPaths", `${relatedPaths.length} path(s) written to ${written}`);
 	} else warn("trace did not emit a RELATED_PATHS block; drift --changed cannot scope this spec");
 	hint(`run 'ccqa generate ${featureName}/${specName}' to generate a test script`);
+	return {
+		route,
+		actionsKept: validatedActions.length,
+		actionsRecorded: traceActions.length
+	};
 }
 /**
 * Strip actions whose recorded fields contain "unstable literal" values
@@ -6825,7 +7285,7 @@ function dedupAndReport(actions) {
 function isAdjacentDuplicate(a, b) {
 	if (a.command !== b.command) return false;
 	if ((a.stepId ?? "") !== (b.stepId ?? "")) return false;
-	return (a.selector ?? "") === (b.selector ?? "") && (a.value ?? "") === (b.value ?? "") && (a.target ?? "") === (b.target ?? "") && (a.label ?? "") === (b.label ?? "") && (a.assertType ?? "") === (b.assertType ?? "") && (a.findLocator ?? "") === (b.findLocator ?? "") && (a.findValue ?? "") === (b.findValue ?? "") && (a.findName ?? "") === (b.findName ?? "") && (a.findIndex ?? -1) === (b.findIndex ?? -1) && (a.findExact ?? false) === (b.findExact ?? false);
+	return (a.selector ?? "") === (b.selector ?? "") && (a.value ?? "") === (b.value ?? "") && (a.target ?? "") === (b.target ?? "") && (a.label ?? "") === (b.label ?? "") && (a.assertType ?? "") === (b.assertType ?? "") && (a.findLocator ?? "") === (b.findLocator ?? "") && (a.findValue ?? "") === (b.findValue ?? "") && (a.findName ?? "") === (b.findName ?? "") && (a.findIndex ?? -1) === (b.findIndex ?? -1) && (a.findExact ?? false) === (b.findExact ?? false) && (a.files ?? []).join("|") === (b.files ?? []).join("|");
 }
 /**
 * Run the post-trace replay validation and emit user-visible drop reports.
@@ -7047,6 +7507,16 @@ function parseAbAction(line) {
 			target: parts[3],
 			label: parts[4]
 		};
+		case "upload": {
+			const selector = parts[2];
+			const files = parts.slice(3).filter((f) => f !== "");
+			if (!selector || files.length === 0) return null;
+			return {
+				command,
+				selector,
+				files
+			};
+		}
 		case "find_click":
 		case "find_dblclick":
 		case "find_hover":
@@ -7097,6 +7567,7 @@ function actionsToScript(input) {
 		`import { ${[
 			"ab",
 			"abWait",
+			"abUpload",
 			"abAssertTextVisible",
 			"abAssertVisible",
 			"abAssertNotVisible",
@@ -7130,6 +7601,7 @@ const ELEMENT_COMMANDS = new Set([
 	"select",
 	"hover",
 	"drag",
+	"upload",
 	"find_click",
 	"find_dblclick",
 	"find_fill",
@@ -7261,6 +7733,11 @@ function actionToLine(action) {
 		case "hover": return `ab("hover", ${j(action.selector)});`;
 		case "scroll": return `ab("scroll", ${[action.direction ?? "down", ...action.pixels ? [action.pixels] : []].map(j).join(", ")});`;
 		case "drag": return `ab("drag", ${j(action.selector)}, ${j(action.target)});`;
+		case "upload": {
+			const files = action.files ?? [];
+			if (!action.selector || files.length === 0) return null;
+			return `abUpload(${[j(action.selector), ...files.map(jExpr)].join(", ")});`;
+		}
 		case "wait": {
 			const sel = action.selector;
 			if (/^\d+$/.test(sel)) return `spawnSync("sleep", [${j(sel)}], { stdio: "inherit" });`;
@@ -8336,21 +8813,23 @@ function toFixMode(autoFix) {
 		case "interactive": return "interactive";
 	}
 }
-const recordCommand = addLanguageOption(new Command("record").argument("<feature/spec>", "Spec id in '<feature>/<spec>' form (resolves to .ccqa/features/<feature>/test-cases/<spec>/)").description("Record a deterministic test from a spec: run agent-browser to collect actions (trace), then generate test.spec.ts with auto-fix retries (generate). After recording, `ccqa run <feature/spec>` replays it under vitest (deterministic specs only — live specs do not need recording).").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").option("--validation-mode <mode>", "Post-trace validation behaviour: 'lenient' (default) tags failing actions; 'strict' drops them.", (raw) => {
+const recordCommand = addProfileOption(addLanguageOption(new Command("record").argument("<feature/spec>", "Spec id in '<feature>/<spec>' form (resolves to .ccqa/features/<feature>/test-cases/<spec>/)").description("Record a deterministic test from a spec: run agent-browser to collect actions (trace), then generate test.spec.ts with auto-fix retries (generate). After recording, `ccqa run <feature/spec>` replays it under vitest (deterministic specs only — live specs do not need recording).").option("-m, --model <name>", "Claude model alias ('sonnet'|'opus'|'haiku') or full ID. Overrides CCQA_MODEL.").option("--validation-mode <mode>", "Post-trace validation behaviour: 'lenient' (default) tags failing actions; 'strict' drops them.", (raw) => {
 	if (VALIDATION_MODES.includes(raw)) return raw;
 	throw new Error(`--validation-mode must be one of ${VALIDATION_MODES.join(" | ")}`);
 }, "lenient").option("--auto-fix <mode>", "Auto-fix behaviour during script generation: 'interactive' (default, prompt y/N), 'auto' (apply without prompt, for CI), 'skip' (never prompt, only apply high-confidence fixes).", (raw) => {
 	if (AUTO_FIX_MODES.includes(raw)) return raw;
 	throw new Error(`--auto-fix must be one of ${AUTO_FIX_MODES.join(" | ")}`);
-}, "interactive").option("--max-retries <n>", "Maximum number of auto-fix retries", "3").option("--force", "Overwrite an existing test.spec.ts without warning").option("--no-snapshot", "Don't pin AGENT_BROWSER_SESSION / capture page snapshots after a failure (debug toggle)").option("--skip-trace", "Skip the trace step and run codegen against an existing actions.json").option("--skip-codegen", "Run only the trace step (do not generate test.spec.ts)")).action(async (specPath, opts) => {
+}, "interactive").option("--max-retries <n>", "Maximum number of auto-fix retries", "3").option("--force", "Overwrite an existing test.spec.ts without warning").option("--no-snapshot", "Don't pin AGENT_BROWSER_SESSION / capture page snapshots after a failure (debug toggle)").option("--skip-trace", "Skip the trace step and run codegen against an existing actions.json").option("--skip-codegen", "Run only the trace step (do not generate test.spec.ts)").option("--update-agent-prompt", "After the trace finishes, ask Claude to refresh .ccqa/prompts/record.agent.md from a summary of the run.").option("--cwd <path>", "Working directory containing the .ccqa/ tree (monorepo support). Defaults to the current directory."))).action(async (specPath, opts) => {
 	const { featureName, specName } = parseSpecPath(specPath);
 	const language = opts.language ?? "auto";
 	if (opts.skipTrace && opts.skipCodegen) {
 		error("--skip-trace and --skip-codegen cannot be combined; nothing would run");
 		process.exit(2);
 	}
+	await applyProfileFromOption(opts.profile, resolveCwd(opts.cwd));
+	let traceResult = null;
 	if (!opts.skipTrace) {
-		await runTrace(featureName, specName, opts.model, opts.validationMode ?? "lenient", language);
+		traceResult = await runTrace(featureName, specName, opts.model, opts.validationMode ?? "lenient", language);
 		blank();
 	}
 	if (!opts.skipCodegen) {
@@ -8358,7 +8837,37 @@ const recordCommand = addLanguageOption(new Command("record").argument("<feature
 		const useSnapshot = opts.snapshot !== false;
 		await runGenerate(featureName, specName, parseInt(opts.maxRetries ?? "3", 10), fixMode, opts.force ?? false, useSnapshot, language, opts.model);
 	}
+	if (opts.updateAgentPrompt) if (traceResult === null) warn("--update-agent-prompt is ignored when --skip-trace is set (no run summary available)");
+	else {
+		const cwd = resolveCwd(opts.cwd);
+		blank();
+		await updateAgentPrompt({
+			mode: "record",
+			runSummary: buildRecordRunSummary(featureName, specName, traceResult),
+			cwd,
+			...opts.model ? { model: opts.model } : {},
+			...language ? { language } : {}
+		});
+	}
 });
+/**
+* Compact summary of the trace pass for the record agent-prompt refresh:
+* per-step title / action / observation / status. The route steps already
+* carry the assistant's own framing of what happened — perfect input for
+* "what should I remember next time".
+*/
+function buildRecordRunSummary(featureName, specName, t) {
+	return `${`## ${featureName}/${specName} — ${t.route.status}\nActions: ${t.actionsKept} kept / ${t.actionsRecorded} recorded`}\n\n${t.route.steps.length === 0 ? "(no route steps recorded)" : t.route.steps.map((s) => [
+		`### ${s.title} (${s.status})`,
+		`- action: ${oneLineSummary(s.action)}`,
+		`- observation: ${oneLineSummary(s.observation)}`,
+		...s.reason ? [`- reason: ${oneLineSummary(s.reason)}`] : []
+	].join("\n")).join("\n\n")}`;
+}
+function oneLineSummary(s) {
+	const flat = s.replace(/\s+/g, " ").trim();
+	return flat.length > 240 ? flat.slice(0, 240) + "…" : flat || "(none)";
+}
 //#endregion
 //#region src/cli/draft.ts
 const CATEGORY_LABEL = DRAFT_CATEGORY_LABEL;
@@ -9128,6 +9637,64 @@ function parseConcurrency(raw) {
 	return n;
 }
 //#endregion
+//#region src/cli/init.ts
+const TEMPLATES = [
+	{
+		relPath: ".ccqa/prompts/live.user.md",
+		content: `# Project guidance for live specs
+Write stable, hand-maintained context here: staging URLs, naming conventions, known "this is fine" warnings. Lines you add will be appended verbatim to the system prompt of every step in 'mode: live' specs.
+`
+	},
+	{
+		relPath: ".ccqa/prompts/live.agent.md",
+		content: `# Agent learnings for live specs
+This file is updated by 'ccqa run --update-agent-prompt'. You can edit it by hand, but the next --update-agent-prompt run may rewrite the whole file. Keep stable rules in live.user.md instead.
+`
+	},
+	{
+		relPath: ".ccqa/prompts/record.user.md",
+		content: `# Project guidance for ccqa record (deterministic trace)
+Write stable, hand-maintained context here for the trace phase of 'ccqa record'. Lines you add will be appended verbatim to the trace system prompt.
+`
+	},
+	{
+		relPath: ".ccqa/prompts/record.agent.md",
+		content: `# Agent learnings for ccqa record
+This file is updated by 'ccqa record --update-agent-prompt'. Same convention as live.agent.md — stable rules go in record.user.md.
+`
+	}
+];
+const initCommand = new Command("init").description("Create .ccqa/prompts/{live,record}.{user,agent}.md template files (skips existing files unless --force).").option("--cwd <path>", "Working directory (default: cwd)").option("--force", "Overwrite existing files").action(async (opts) => {
+	const cwd = resolveCwd(opts.cwd);
+	header("init", cwd);
+	await mkdir(join(cwd, ".ccqa", "prompts"), { recursive: true });
+	const created = [];
+	const skipped = [];
+	for (const t of TEMPLATES) if (await writeTemplate(join(cwd, t.relPath), t.content, opts.force ?? false)) created.push(t.relPath);
+	else skipped.push(t.relPath);
+	for (const f of created) info(`created  ${f}`);
+	for (const f of skipped) info(`skipped  ${f} (already exists; pass --force to overwrite)`);
+	blank();
+	meta("created", created.length);
+	meta("skipped", skipped.length);
+});
+async function writeTemplate(absPath, content, force) {
+	try {
+		await writeFile(absPath, content, force ? { encoding: "utf-8" } : {
+			encoding: "utf-8",
+			flag: "wx"
+		});
+		return true;
+	} catch (err) {
+		if (typeof err === "object" && err !== null && err.code === "EEXIST") return false;
+		throw err;
+	}
+}
+//#endregion
 //#region src/prompts/perspectives.ts
 /**
 * Build the system prompt. By default the descriptive fields follow the
@@ -9595,8 +10162,6 @@ function renderSpecMarkdown(spec, labels = LABELS_JA) {
 	lines.push("");
 	lines.push(`| ${labels.itemCol} | ${labels.valueCol} |`);
 	lines.push("| --- | --- |");
-	lines.push(`| ${labels.modeLabel} | ${mdCell(modeLabel(spec.status, labels))} |`);
-	lines.push(`| ${labels.statusCol} | ${mdCell(statusLabel(spec.status, labels))} |`);
 	if (spec.summary) lines.push(`| ${labels.summary} | ${mdCell(spec.summary)} |`);
 	if (spec.preconditions && spec.preconditions.length > 0) lines.push(`| ${labels.preconditions} | ${spec.preconditions.map(mdCell).join("<br>")} |`);
 	if (spec.startScreen) lines.push(`| ${labels.startScreen} | ${mdCell(spec.startScreen)} |`);
@@ -9628,6 +10193,7 @@ function resolvePackageJson() {
 const { version } = JSON.parse(readFileSync(resolvePackageJson(), "utf8"));
 const program = new Command();
 program.name("ccqa").description("E2E test CLI using Claude Code + agent-browser").version(version);
+program.addCommand(initCommand);
 program.addCommand(draftCommand);
 program.addCommand(perspectivesCommand);
 program.addCommand(recordCommand);