npm - ai-spec-dev - Versions diffs - 0.33.0 → 0.35.0 - Mend

ai-spec-dev 0.33.0 → 0.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/.claude/commands/add-lesson.md +34 -0
package/.claude/commands/check-layers.md +65 -0
package/.claude/commands/installed-deps.md +35 -0
package/.claude/commands/recall-lessons.md +40 -0
package/.claude/commands/scan-singletons.md +45 -0
package/.claude/commands/verify-imports.md +48 -0
package/.claude/settings.local.json +11 -1
package/README.md +531 -213
package/RELEASE_LOG.md +305 -0
package/cli/commands/create.ts +1233 -0
package/cli/commands/dashboard.ts +62 -0
package/cli/commands/init.ts +45 -8
package/cli/commands/mock.ts +175 -0
package/cli/commands/scan.ts +99 -0
package/cli/commands/types.ts +69 -0
package/cli/commands/vcr.ts +70 -0
package/cli/index.ts +34 -2517
package/core/combined-generator.ts +13 -3
package/core/dashboard-generator.ts +340 -0
package/core/design-dialogue.ts +124 -0
package/core/dsl-feedback.ts +34 -4
package/core/error-feedback.ts +46 -2
package/core/project-index.ts +301 -0
package/core/reviewer.ts +84 -6
package/core/run-logger.ts +109 -3
package/core/run-trend.ts +24 -4
package/core/self-evaluator.ts +39 -11
package/core/spec-generator.ts +14 -8
package/core/task-generator.ts +17 -0
package/core/types-generator.ts +219 -0
package/core/vcr.ts +210 -0
package/dist/cli/index.js +7297 -5640
package/dist/cli/index.js.map +1 -1
package/dist/cli/index.mjs +8728 -7071
package/dist/cli/index.mjs.map +1 -1
package/dist/index.d.mts +19 -5
package/dist/index.d.ts +19 -5
package/dist/index.js +420 -224
package/dist/index.js.map +1 -1
package/dist/index.mjs +418 -224
package/dist/index.mjs.map +1 -1
package/docs-assets/purpose/architecture-overview.svg +64 -0
package/docs-assets/purpose/create-pipeline.svg +113 -0
package/docs-assets/purpose/task-layering.svg +74 -0
package/package.json +1 -1
package/prompts/codegen.prompt.ts +97 -9
package/prompts/design.prompt.ts +59 -0
package/prompts/spec.prompt.ts +8 -1
package/prompts/tasks.prompt.ts +27 -2
package/purpose.md +600 -174

package/core/self-evaluator.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import chalk from "chalk";
 import { SpecDSL } from "./dsl-types";
 import { RunLogger } from "./run-logger";
+import { extractComplianceScore } from "./reviewer";
 // ─── Types ────────────────────────────────────────────────────────────────────
@@ -11,6 +12,8 @@ export interface SelfEvalResult {
   compileScore: number;
   /** 0-10 extracted from 3-pass review text, or null when review was skipped */
   reviewScore: number | null;
+  /** 0-10 from Pass 0 spec compliance check, or null when skipped/unavailable */
+  complianceScore: number | null;
   /** 0-10 weighted overall — the "Harness Score" recorded in RunLog */
   harnessScore: number;
   /** Prompt hash at the time this run executed */
@@ -191,15 +194,35 @@ export function runSelfEval(opts: {
   // ── Review Score ──────────────────────────────────────────────────────────
   const reviewScore = reviewText ? extractReviewScore(reviewText) : null;
+  // ── Compliance Score (Pass 0) ──────────────────────────────────────────────
+  const rawCompliance = reviewText ? extractComplianceScore(reviewText) : 0;
+  const complianceScore: number | null = rawCompliance > 0 ? rawCompliance : null;
   // ── Harness Score (weighted average) ──────────────────────────────────────
-  const harnessScore = reviewScore !== null
-    ? Math.round((dslCoverageScore * 0.4 + compileScore * 0.3 + reviewScore * 0.3) * 10) / 10
-    : Math.round((dslCoverageScore * 0.55 + compileScore * 0.45) * 10) / 10;
+  // Weights reflect importance: compliance (did we build the right thing?) > dsl > review > compile
+  //
+  //  compliance + review available  → 0.30 compliance + 0.25 dsl + 0.20 compile + 0.25 review
+  //  review only                    → 0.40 dsl + 0.30 compile + 0.30 review  (unchanged)
+  //  compliance only                → 0.35 compliance + 0.35 dsl + 0.30 compile
+  //  neither                        → 0.55 dsl + 0.45 compile                (unchanged)
+  let harnessScore: number;
+  if (complianceScore !== null && reviewScore !== null) {
+    harnessScore = Math.round(
+      (complianceScore * 0.30 + dslCoverageScore * 0.25 + compileScore * 0.20 + reviewScore * 0.25) * 10
+    ) / 10;
+  } else if (reviewScore !== null) {
+    harnessScore = Math.round((dslCoverageScore * 0.4 + compileScore * 0.3 + reviewScore * 0.3) * 10) / 10;
+  } else if (complianceScore !== null) {
+    harnessScore = Math.round((complianceScore * 0.35 + dslCoverageScore * 0.35 + compileScore * 0.30) * 10) / 10;
+  } else {
+    harnessScore = Math.round((dslCoverageScore * 0.55 + compileScore * 0.45) * 10) / 10;
+  }
   const result: SelfEvalResult = {
     dslCoverageScore,
     compileScore,
     reviewScore,
+    complianceScore,
     harnessScore,
     promptHash,
     detail: {
@@ -221,6 +244,7 @@ export function runSelfEval(opts: {
     dslCoverageScore,
     compileScore,
     reviewScore: reviewScore ?? undefined,
+    complianceScore: complianceScore ?? undefined,
     promptHash,
     modelNameCoverage: result.detail.modelNameCoverage,
     modelNameMatched:  result.detail.modelNameMatched,
@@ -244,9 +268,16 @@ export function printSelfEval(result: SelfEvalResult): void {
   const compileTag = result.compileScore === 10
     ? chalk.green("pass")
     : chalk.yellow("partial");
-  const reviewTag  = result.reviewScore !== null
+  const reviewTag = result.reviewScore !== null
     ? `Review: ${result.reviewScore}/10`
     : chalk.gray("Review: skipped");
+  const complianceTag = result.complianceScore !== null
+    ? (result.complianceScore >= 8
+        ? chalk.green(`Compliance: ${result.complianceScore}/10`)
+        : result.complianceScore >= 6
+          ? chalk.yellow(`Compliance: ${result.complianceScore}/10`)
+          : chalk.red(`Compliance: ${result.complianceScore}/10 ⚠`))
+    : chalk.gray("Compliance: skipped");
   // Model coverage tag (only shown when there are declared models)
   let modelCoverageTag = "";
@@ -262,15 +293,12 @@ export function printSelfEval(result: SelfEvalResult): void {
   console.log(chalk.cyan("\n─── Harness Self-Eval ───────────────────────────"));
   console.log(`  Score  : ${scoreColor(`[${bar}] ${result.harnessScore}/10`)}`);
+  console.log(`  ${complianceTag}  Compile: ${compileTag}  ${reviewTag}`);
   console.log(
-    `  DSL    : ${scoreColor(String(result.dslCoverageScore) + "/10")}  ` +
-    `Compile: ${compileTag}  ${reviewTag}`
+    `  DSL    : ${scoreColor(String(result.dslCoverageScore) + "/10")}` +
+    (modelCoverageTag ? `  ${modelCoverageTag}` : "") +
+    chalk.gray(`  Endpoints: ${result.detail.endpointsTotal}  Files: ${result.detail.filesWritten}`)
   );
-  if (modelCoverageTag) {
-    console.log(`  Detail : ${modelCoverageTag}  ` +
-      chalk.gray(`Endpoints: ${result.detail.endpointsTotal}  Files: ${result.detail.filesWritten}`)
-    );
-  }
   console.log(chalk.gray(`  Prompt : ${result.promptHash}`));
   console.log(chalk.cyan("─".repeat(49)));
 }

package/core/spec-generator.ts CHANGED Viewed

@@ -145,15 +145,16 @@ export const PROVIDER_CATALOG: Record<string, ProviderMeta> = {
   },
   glm: {
     displayName: "智谱 GLM (Zhipu AI)",
-    description: "智谱 AI — GLM-5 / GLM-4 series + Z1 reasoning",
+    description: "智谱 AI — GLM-5.1 / GLM-5 / GLM-4 series",
     models: [
-      "glm-5",               // GLM-5 flagship (如不可用请确认最新 model ID)
-      "glm-5-flash",
-      "glm-z1",              // GLM-Z1 reasoning model
+      "glm-5.1",             // GLM-5.1 — latest flagship (2026)
+      "glm-5",               // GLM-5 — premium (Max/Pro plans)
+      "glm-5-turbo",         // GLM-5-Turbo — fast & cost-efficient
+      "glm-4.7",             // GLM-4.7
+      "glm-4.6",             // GLM-4.6
+      "glm-4.5-air",         // GLM-4.5-Air — lightweight
+      "glm-z1",              // GLM-Z1 — reasoning model
       "glm-z1-flash",
-      "glm-4-plus",
-      "glm-4-flash",
-      "glm-4-long",
     ],
     envKey: "ZHIPU_API_KEY",
     baseURL: "https://open.bigmodel.cn/api/paas/v4/",
@@ -405,8 +406,13 @@ export function createProvider(
 export class SpecGenerator {
   constructor(private provider: AIProvider) {}
-  async generateSpec(idea: string, context?: ProjectContext): Promise<string> {
+  async generateSpec(idea: string, context?: ProjectContext, architectureDecision?: string): Promise<string> {
     const parts: string[] = [idea];
+    if (architectureDecision) {
+      parts.push(
+        `\n=== Architecture Decision (MUST follow this approach in the spec) ===\n${architectureDecision}`
+      );
+    }
     if (context) {
       // Constitution is highest priority — put it first so the AI respects it

package/core/task-generator.ts CHANGED Viewed

@@ -76,6 +76,15 @@ export interface SpecTask {
   layer: TaskLayer;
   filesToTouch: string[];
   acceptanceCriteria: string[];
+  /**
+   * Concrete, runnable verification steps — each entry is a specific command
+   * or action with an expected observable outcome.
+   * Examples:
+   *   "POST /api/orders with body {...} → HTTP 201, body contains {id, status:'pending'}"
+   *   "npm run build exits 0 with no TypeScript errors"
+   *   "GET /api/orders/:id returns 404 when id does not exist"
+   */
+  verificationSteps: string[];
   dependencies: string[];
   priority: TaskPriority;
   /** Runtime checkpoint — set by code generator, persisted to tasks file */
@@ -148,6 +157,14 @@ export function printTasks(tasks: SpecTask[]): void {
     const badge = color(`[${task.layer}]`);
     const prio = task.priority === "high" ? chalk.red("●") : task.priority === "medium" ? chalk.yellow("●") : chalk.gray("●");
     console.log(`  ${prio} ${chalk.bold(task.id)} ${badge} ${task.title}`);
+    if (task.verificationSteps?.length) {
+      for (const step of task.verificationSteps.slice(0, 2)) {
+        console.log(chalk.gray(`       ✓ ${step}`));
+      }
+      if (task.verificationSteps.length > 2) {
+        console.log(chalk.gray(`       + ${task.verificationSteps.length - 2} more verification step(s)`));
+      }
+    }
   }
 }

package/core/types-generator.ts ADDED Viewed

@@ -0,0 +1,219 @@
+import * as path from "path";
+import * as fs from "fs-extra";
+import { SpecDSL, ModelField, ApiEndpoint } from "./dsl-types";
+// ─── Type Mapping ─────────────────────────────────────────────────────────────
+const PRIMITIVE_MAP: Record<string, string> = {
+  String: "string",
+  string: "string",
+  Int: "number",
+  int: "number",
+  Float: "number",
+  float: "number",
+  Number: "number",
+  number: "number",
+  Boolean: "boolean",
+  boolean: "boolean",
+  DateTime: "string",
+  Date: "string",
+  Json: "Record<string, unknown>",
+  JSON: "Record<string, unknown>",
+  Any: "unknown",
+  any: "unknown",
+};
+function mapFieldType(raw: string): string {
+  const trimmed = raw.trim();
+  // Array types: "String[]" or "User[]"
+  if (trimmed.endsWith("[]")) {
+    return `${mapFieldType(trimmed.slice(0, -2))}[]`;
+  }
+  // Nullable / optional markers
+  const base = trimmed.replace(/[?!]$/, "");
+  if (PRIMITIVE_MAP[base]) return PRIMITIVE_MAP[base];
+  // PascalCase → treat as model reference (stays as-is)
+  if (/^[A-Z]/.test(base)) return base;
+  return "string";
+}
+// ─── Model → Interface ────────────────────────────────────────────────────────
+function renderModelInterface(
+  name: string,
+  fields: ModelField[],
+  description?: string
+): string {
+  const lines: string[] = [];
+  if (description) lines.push(`/** ${description} */`);
+  lines.push(`export interface ${name} {`);
+  for (const f of fields) {
+    const optional = f.required ? "" : "?";
+    const tsType = mapFieldType(f.type);
+    if (f.description) lines.push(`  /** ${f.description} */`);
+    lines.push(`  ${f.name}${optional}: ${tsType};`);
+  }
+  lines.push("}");
+  return lines.join("\n");
+}
+// ─── Endpoint → Request/Response types ───────────────────────────────────────
+function sanitizeName(str: string): string {
+  // "/users/:id" → "UsersById", "POST /auth/login" → "PostAuthLogin"
+  return str
+    .replace(/^\//, "")
+    .replace(/:([a-zA-Z]+)/g, "By$1")
+    .split(/[\/\-_]/)
+    .map((s) => s.charAt(0).toUpperCase() + s.slice(1))
+    .join("");
+}
+function endpointTypeName(ep: ApiEndpoint): string {
+  return ep.method.charAt(0) + ep.method.slice(1).toLowerCase() + sanitizeName(ep.path);
+}
+function renderEndpointTypes(ep: ApiEndpoint): string | null {
+  const baseName = endpointTypeName(ep);
+  const parts: string[] = [];
+  parts.push(`// ${ep.method} ${ep.path}${ep.description ? ` — ${ep.description}` : ""}`);
+  let hasRequest = false;
+  // Request body
+  if (ep.request?.body && Object.keys(ep.request.body).length > 0) {
+    hasRequest = true;
+    parts.push(`export interface ${baseName}Request {`);
+    for (const [key, typeDesc] of Object.entries(ep.request.body)) {
+      const tsType = mapFieldType(typeDesc);
+      parts.push(`  ${key}: ${tsType};`);
+    }
+    parts.push("}");
+  }
+  // Query params
+  if (ep.request?.query && Object.keys(ep.request.query).length > 0) {
+    parts.push(`export interface ${baseName}Query {`);
+    for (const [key, typeDesc] of Object.entries(ep.request.query)) {
+      const tsType = mapFieldType(typeDesc);
+      parts.push(`  ${key}?: ${tsType};`);
+    }
+    parts.push("}");
+  }
+  // Path params
+  if (ep.request?.params && Object.keys(ep.request.params).length > 0) {
+    parts.push(`export interface ${baseName}Params {`);
+    for (const [key, typeDesc] of Object.entries(ep.request.params)) {
+      const tsType = mapFieldType(typeDesc);
+      parts.push(`  ${key}: ${tsType};`);
+    }
+    parts.push("}");
+  }
+  if (parts.length === 1) return null; // only comment, no types to emit
+  return parts.join("\n");
+}
+// ─── Endpoint map constant ───────────────────────────────────────────────────
+function renderEndpointMap(endpoints: ApiEndpoint[]): string {
+  const lines: string[] = [];
+  lines.push("export const API_ENDPOINTS = {");
+  for (const ep of endpoints) {
+    const key = endpointTypeName(ep);
+    const keyLower = key.charAt(0).toLowerCase() + key.slice(1);
+    lines.push(`  ${keyLower}: { method: '${ep.method}', path: '${ep.path}', auth: ${ep.auth} },`);
+  }
+  lines.push("} as const;");
+  lines.push("");
+  lines.push("export type ApiEndpointKey = keyof typeof API_ENDPOINTS;");
+  return lines.join("\n");
+}
+// ─── Main generator ───────────────────────────────────────────────────────────
+export interface TypesGeneratorOptions {
+  /** Include endpoint request/response types (default: true) */
+  includeEndpointTypes?: boolean;
+  /** Include API_ENDPOINTS constant map (default: true) */
+  includeEndpointMap?: boolean;
+  /** Header comment to inject */
+  header?: string;
+}
+export function generateTypescriptTypes(
+  dsl: SpecDSL,
+  opts: TypesGeneratorOptions = {}
+): string {
+  const {
+    includeEndpointTypes = true,
+    includeEndpointMap = true,
+  } = opts;
+  const sections: string[] = [];
+  // Header
+  const header = opts.header ?? `// Generated by ai-spec — DO NOT EDIT\n// Feature: ${dsl.feature.title}\n// Generated at: ${new Date().toISOString()}`;
+  sections.push(header);
+  // Data Models
+  if (dsl.models.length > 0) {
+    sections.push("// ─── Data Models " + "─".repeat(57));
+    for (const model of dsl.models) {
+      sections.push(renderModelInterface(model.name, model.fields, model.description));
+    }
+  }
+  // Frontend Components (props only)
+  if (dsl.components && dsl.components.length > 0) {
+    sections.push("// ─── Component Props " + "─".repeat(53));
+    for (const comp of dsl.components) {
+      const lines: string[] = [];
+      if (comp.description) lines.push(`/** ${comp.description} */`);
+      lines.push(`export interface ${comp.name}Props {`);
+      for (const prop of comp.props) {
+        const optional = prop.required ? "" : "?";
+        const tsType = mapFieldType(prop.type);
+        if (prop.description) lines.push(`  /** ${prop.description} */`);
+        lines.push(`  ${prop.name}${optional}: ${tsType};`);
+      }
+      lines.push("}");
+      sections.push(lines.join("\n"));
+    }
+  }
+  // Endpoint request/response types
+  if (includeEndpointTypes && dsl.endpoints.length > 0) {
+    sections.push("// ─── API Request Types " + "─".repeat(51));
+    for (const ep of dsl.endpoints) {
+      const rendered = renderEndpointTypes(ep);
+      if (rendered) sections.push(rendered);
+    }
+  }
+  // Endpoint map
+  if (includeEndpointMap && dsl.endpoints.length > 0) {
+    sections.push("// ─── Endpoint Map " + "─".repeat(55));
+    sections.push(renderEndpointMap(dsl.endpoints));
+  }
+  return sections.join("\n\n") + "\n";
+}
+// ─── File save ────────────────────────────────────────────────────────────────
+export async function saveTypescriptTypes(
+  dsl: SpecDSL,
+  projectDir: string,
+  opts: TypesGeneratorOptions & { outputPath?: string } = {}
+): Promise<string> {
+  const outputPath =
+    opts.outputPath ?? path.join(projectDir, ".ai-spec", `${dsl.feature.title.replace(/\s+/g, "-").toLowerCase()}.types.ts`);
+  await fs.ensureDir(path.dirname(outputPath));
+  const content = generateTypescriptTypes(dsl, opts);
+  await fs.writeFile(outputPath, content, "utf-8");
+  return outputPath;
+}

package/core/vcr.ts ADDED Viewed

@@ -0,0 +1,210 @@
+/**
+ * vcr.ts — Pipeline response recording & replay for zero-cost harness iteration.
+ *
+ * Inspired by Claude Code's VCR pattern for token counting tests.
+ *
+ * Design:
+ *  - VcrRecordingProvider wraps any AIProvider and intercepts every generate()
+ *    call, capturing (prompt, systemInstruction, response) in order.
+ *  - VcrReplayProvider implements AIProvider by returning pre-recorded responses
+ *    in sequence — zero API calls, zero tokens, deterministic output.
+ *  - Recordings are stored in .ai-spec-vcr/{runId}.json alongside RunLogs.
+ *
+ * Use cases:
+ *  - Iterating on harness scoring weights without burning tokens
+ *  - Testing prompt format changes against known pipelines
+ *  - Debugging pipeline stage logic offline
+ *
+ * CLI:
+ *  ai-spec create --vcr-record           → record this run
+ *  ai-spec create --vcr-replay <runId>   → replay with zero API calls
+ *  ai-spec vcr list                      → list available recordings
+ *  ai-spec vcr show <runId>              → inspect call details
+ */
+import { createHash } from "crypto";
+import * as fs from "fs-extra";
+import * as path from "path";
+import { AIProvider } from "./spec-generator";
+export const VCR_DIR = ".ai-spec-vcr";
+// ─── Types ────────────────────────────────────────────────────────────────────
+export interface VcrEntry {
+  /** Sequential call index within this recording */
+  index: number;
+  /** First 200 chars of prompt — for human inspection only */
+  promptPreview: string;
+  /** SHA-256[:8] of (prompt + "\x00" + systemInstruction) — stable identity */
+  callHash: string;
+  systemInstruction?: string;
+  /** Complete AI response — what replay will return */
+  response: string;
+  providerName: string;
+  modelName: string;
+  ts: string;
+  durationMs: number;
+}
+export interface VcrRecording {
+  runId: string;
+  recordedAt: string;
+  /** Total number of AI calls captured */
+  entryCount: number;
+  /** Unique provider/model strings seen across all calls */
+  providers: string[];
+  entries: VcrEntry[];
+}
+// ─── Recording Provider ───────────────────────────────────────────────────────
+/**
+ * Wraps a real AIProvider, transparently passing through all calls while
+ * recording each (prompt, response) pair in order.
+ * After the pipeline completes, call `save()` to persist the recording.
+ */
+export class VcrRecordingProvider implements AIProvider {
+  private entries: VcrEntry[] = [];
+  constructor(private readonly inner: AIProvider) {}
+  get providerName() { return this.inner.providerName; }
+  get modelName()    { return this.inner.modelName; }
+  async generate(prompt: string, systemInstruction?: string): Promise<string> {
+    const start = Date.now();
+    const response = await this.inner.generate(prompt, systemInstruction);
+    const callHash = createHash("sha256")
+      .update(prompt + "\x00" + (systemInstruction ?? ""))
+      .digest("hex")
+      .slice(0, 8);
+    this.entries.push({
+      index: this.entries.length,
+      promptPreview: prompt.slice(0, 200).replace(/\n/g, " "),
+      callHash,
+      ...(systemInstruction ? { systemInstruction } : {}),
+      response,
+      providerName: this.inner.providerName,
+      modelName: this.inner.modelName,
+      ts: new Date().toISOString(),
+      durationMs: Date.now() - start,
+    });
+    return response;
+  }
+  get callCount() { return this.entries.length; }
+  /**
+   * Persist the recording to .ai-spec-vcr/{runId}.json.
+   * Merges entries from an optional second recorder (e.g. codegenProvider),
+   * sorted by timestamp so replay order matches real execution order.
+   */
+  async save(
+    workingDir: string,
+    runId: string,
+    secondRecorder?: VcrRecordingProvider
+  ): Promise<string> {
+    const allEntries = secondRecorder
+      ? [...this.entries, ...secondRecorder.entries].sort((a, b) => a.ts.localeCompare(b.ts))
+      : this.entries;
+    // Re-index after merge
+    allEntries.forEach((e, i) => { e.index = i; });
+    const recording: VcrRecording = {
+      runId,
+      recordedAt: new Date().toISOString(),
+      entryCount: allEntries.length,
+      providers: [...new Set(allEntries.map((e) => `${e.providerName}/${e.modelName}`))],
+      entries: allEntries,
+    };
+    const vcrDir = path.join(workingDir, VCR_DIR);
+    await fs.ensureDir(vcrDir);
+    const filePath = path.join(vcrDir, `${runId}.json`);
+    await fs.writeJson(filePath, recording, { spaces: 2 });
+    return filePath;
+  }
+}
+// ─── Replay Provider ──────────────────────────────────────────────────────────
+/**
+ * Implements AIProvider by replaying pre-recorded responses in sequence.
+ * Every generate() call pops the next entry from the recording — no API call,
+ * no tokens, deterministic output.
+ *
+ * Note: responses are returned in strict index order, regardless of the prompt
+ * content. This works correctly as long as the pipeline makes calls in the same
+ * structural order as the recording.
+ */
+export class VcrReplayProvider implements AIProvider {
+  private index = 0;
+  constructor(private readonly recording: VcrRecording) {}
+  get providerName() { return "vcr-replay"; }
+  get modelName()    { return this.recording.runId; }
+  async generate(_prompt: string, _systemInstruction?: string): Promise<string> {
+    const entry = this.recording.entries[this.index++];
+    if (!entry) {
+      throw new Error(
+        `VCR replay exhausted: all ${this.recording.entries.length} recorded ` +
+        `responses have been consumed. The pipeline made more AI calls than the recording has.`
+      );
+    }
+    return entry.response;
+  }
+  get remaining() { return this.recording.entries.length - this.index; }
+  get consumed()  { return this.index; }
+}
+// ─── Loader helpers ───────────────────────────────────────────────────────────
+export async function loadVcrRecording(
+  workingDir: string,
+  runId: string
+): Promise<VcrRecording | null> {
+  const filePath = path.join(workingDir, VCR_DIR, `${runId}.json`);
+  try {
+    return await fs.readJson(filePath);
+  } catch {
+    return null;
+  }
+}
+export interface VcrSummary {
+  runId: string;
+  recordedAt: string;
+  entryCount: number;
+  providers: string[];
+}
+export async function listVcrRecordings(workingDir: string): Promise<VcrSummary[]> {
+  const vcrDir = path.join(workingDir, VCR_DIR);
+  if (!(await fs.pathExists(vcrDir))) return [];
+  const files = (await fs.readdir(vcrDir))
+    .filter((f) => f.endsWith(".json"))
+    .sort()
+    .reverse();
+  const results: VcrSummary[] = [];
+  for (const file of files) {
+    try {
+      const rec: VcrRecording = await fs.readJson(path.join(vcrDir, file));
+      results.push({
+        runId: rec.runId,
+        recordedAt: rec.recordedAt,
+        entryCount: rec.entryCount,
+        providers: rec.providers,
+      });
+    } catch {
+      // skip corrupt files
+    }
+  }
+  return results;
+}