ai-spec-dev 0.33.0 → 0.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/.claude/commands/add-lesson.md +34 -0
  2. package/.claude/commands/check-layers.md +65 -0
  3. package/.claude/commands/installed-deps.md +35 -0
  4. package/.claude/commands/recall-lessons.md +40 -0
  5. package/.claude/commands/scan-singletons.md +45 -0
  6. package/.claude/commands/verify-imports.md +48 -0
  7. package/.claude/settings.local.json +11 -1
  8. package/README.md +531 -213
  9. package/RELEASE_LOG.md +305 -0
  10. package/cli/commands/create.ts +1233 -0
  11. package/cli/commands/dashboard.ts +62 -0
  12. package/cli/commands/init.ts +45 -8
  13. package/cli/commands/mock.ts +175 -0
  14. package/cli/commands/scan.ts +99 -0
  15. package/cli/commands/types.ts +69 -0
  16. package/cli/commands/vcr.ts +70 -0
  17. package/cli/index.ts +34 -2517
  18. package/core/combined-generator.ts +13 -3
  19. package/core/dashboard-generator.ts +340 -0
  20. package/core/design-dialogue.ts +124 -0
  21. package/core/dsl-feedback.ts +34 -4
  22. package/core/error-feedback.ts +46 -2
  23. package/core/project-index.ts +301 -0
  24. package/core/reviewer.ts +84 -6
  25. package/core/run-logger.ts +109 -3
  26. package/core/run-trend.ts +24 -4
  27. package/core/self-evaluator.ts +39 -11
  28. package/core/spec-generator.ts +14 -8
  29. package/core/task-generator.ts +17 -0
  30. package/core/types-generator.ts +219 -0
  31. package/core/vcr.ts +210 -0
  32. package/dist/cli/index.js +7297 -5640
  33. package/dist/cli/index.js.map +1 -1
  34. package/dist/cli/index.mjs +8728 -7071
  35. package/dist/cli/index.mjs.map +1 -1
  36. package/dist/index.d.mts +19 -5
  37. package/dist/index.d.ts +19 -5
  38. package/dist/index.js +420 -224
  39. package/dist/index.js.map +1 -1
  40. package/dist/index.mjs +418 -224
  41. package/dist/index.mjs.map +1 -1
  42. package/docs-assets/purpose/architecture-overview.svg +64 -0
  43. package/docs-assets/purpose/create-pipeline.svg +113 -0
  44. package/docs-assets/purpose/task-layering.svg +74 -0
  45. package/package.json +1 -1
  46. package/prompts/codegen.prompt.ts +97 -9
  47. package/prompts/design.prompt.ts +59 -0
  48. package/prompts/spec.prompt.ts +8 -1
  49. package/prompts/tasks.prompt.ts +27 -2
  50. package/purpose.md +600 -174
@@ -1,6 +1,7 @@
1
1
  import chalk from "chalk";
2
2
  import { SpecDSL } from "./dsl-types";
3
3
  import { RunLogger } from "./run-logger";
4
+ import { extractComplianceScore } from "./reviewer";
4
5
 
5
6
  // ─── Types ────────────────────────────────────────────────────────────────────
6
7
 
@@ -11,6 +12,8 @@ export interface SelfEvalResult {
11
12
  compileScore: number;
12
13
  /** 0-10 extracted from 3-pass review text, or null when review was skipped */
13
14
  reviewScore: number | null;
15
+ /** 0-10 from Pass 0 spec compliance check, or null when skipped/unavailable */
16
+ complianceScore: number | null;
14
17
  /** 0-10 weighted overall — the "Harness Score" recorded in RunLog */
15
18
  harnessScore: number;
16
19
  /** Prompt hash at the time this run executed */
@@ -191,15 +194,35 @@ export function runSelfEval(opts: {
191
194
  // ── Review Score ──────────────────────────────────────────────────────────
192
195
  const reviewScore = reviewText ? extractReviewScore(reviewText) : null;
193
196
 
197
+ // ── Compliance Score (Pass 0) ──────────────────────────────────────────────
198
+ const rawCompliance = reviewText ? extractComplianceScore(reviewText) : 0;
199
+ const complianceScore: number | null = rawCompliance > 0 ? rawCompliance : null;
200
+
194
201
  // ── Harness Score (weighted average) ──────────────────────────────────────
195
- const harnessScore = reviewScore !== null
196
- ? Math.round((dslCoverageScore * 0.4 + compileScore * 0.3 + reviewScore * 0.3) * 10) / 10
197
- : Math.round((dslCoverageScore * 0.55 + compileScore * 0.45) * 10) / 10;
202
+ // Weights reflect importance: compliance (did we build the right thing?) > dsl > review > compile
203
+ //
204
+ // compliance + review available → 0.30 compliance + 0.25 dsl + 0.20 compile + 0.25 review
205
+ // review only → 0.40 dsl + 0.30 compile + 0.30 review (unchanged)
206
+ // compliance only → 0.35 compliance + 0.35 dsl + 0.30 compile
207
+ // neither → 0.55 dsl + 0.45 compile (unchanged)
208
+ let harnessScore: number;
209
+ if (complianceScore !== null && reviewScore !== null) {
210
+ harnessScore = Math.round(
211
+ (complianceScore * 0.30 + dslCoverageScore * 0.25 + compileScore * 0.20 + reviewScore * 0.25) * 10
212
+ ) / 10;
213
+ } else if (reviewScore !== null) {
214
+ harnessScore = Math.round((dslCoverageScore * 0.4 + compileScore * 0.3 + reviewScore * 0.3) * 10) / 10;
215
+ } else if (complianceScore !== null) {
216
+ harnessScore = Math.round((complianceScore * 0.35 + dslCoverageScore * 0.35 + compileScore * 0.30) * 10) / 10;
217
+ } else {
218
+ harnessScore = Math.round((dslCoverageScore * 0.55 + compileScore * 0.45) * 10) / 10;
219
+ }
198
220
 
199
221
  const result: SelfEvalResult = {
200
222
  dslCoverageScore,
201
223
  compileScore,
202
224
  reviewScore,
225
+ complianceScore,
203
226
  harnessScore,
204
227
  promptHash,
205
228
  detail: {
@@ -221,6 +244,7 @@ export function runSelfEval(opts: {
221
244
  dslCoverageScore,
222
245
  compileScore,
223
246
  reviewScore: reviewScore ?? undefined,
247
+ complianceScore: complianceScore ?? undefined,
224
248
  promptHash,
225
249
  modelNameCoverage: result.detail.modelNameCoverage,
226
250
  modelNameMatched: result.detail.modelNameMatched,
@@ -244,9 +268,16 @@ export function printSelfEval(result: SelfEvalResult): void {
244
268
  const compileTag = result.compileScore === 10
245
269
  ? chalk.green("pass")
246
270
  : chalk.yellow("partial");
247
- const reviewTag = result.reviewScore !== null
271
+ const reviewTag = result.reviewScore !== null
248
272
  ? `Review: ${result.reviewScore}/10`
249
273
  : chalk.gray("Review: skipped");
274
+ const complianceTag = result.complianceScore !== null
275
+ ? (result.complianceScore >= 8
276
+ ? chalk.green(`Compliance: ${result.complianceScore}/10`)
277
+ : result.complianceScore >= 6
278
+ ? chalk.yellow(`Compliance: ${result.complianceScore}/10`)
279
+ : chalk.red(`Compliance: ${result.complianceScore}/10 ⚠`))
280
+ : chalk.gray("Compliance: skipped");
250
281
 
251
282
  // Model coverage tag (only shown when there are declared models)
252
283
  let modelCoverageTag = "";
@@ -262,15 +293,12 @@ export function printSelfEval(result: SelfEvalResult): void {
262
293
 
263
294
  console.log(chalk.cyan("\n─── Harness Self-Eval ───────────────────────────"));
264
295
  console.log(` Score : ${scoreColor(`[${bar}] ${result.harnessScore}/10`)}`);
296
+ console.log(` ${complianceTag} Compile: ${compileTag} ${reviewTag}`);
265
297
  console.log(
266
- ` DSL : ${scoreColor(String(result.dslCoverageScore) + "/10")} ` +
267
- `Compile: ${compileTag} ${reviewTag}`
298
+ ` DSL : ${scoreColor(String(result.dslCoverageScore) + "/10")}` +
299
+ (modelCoverageTag ? ` ${modelCoverageTag}` : "") +
300
+ chalk.gray(` Endpoints: ${result.detail.endpointsTotal} Files: ${result.detail.filesWritten}`)
268
301
  );
269
- if (modelCoverageTag) {
270
- console.log(` Detail : ${modelCoverageTag} ` +
271
- chalk.gray(`Endpoints: ${result.detail.endpointsTotal} Files: ${result.detail.filesWritten}`)
272
- );
273
- }
274
302
  console.log(chalk.gray(` Prompt : ${result.promptHash}`));
275
303
  console.log(chalk.cyan("─".repeat(49)));
276
304
  }
@@ -145,15 +145,16 @@ export const PROVIDER_CATALOG: Record<string, ProviderMeta> = {
145
145
  },
146
146
  glm: {
147
147
  displayName: "智谱 GLM (Zhipu AI)",
148
- description: "智谱 AI — GLM-5 / GLM-4 series + Z1 reasoning",
148
+ description: "智谱 AI — GLM-5.1 / GLM-5 / GLM-4 series",
149
149
  models: [
150
- "glm-5", // GLM-5 flagship (如不可用请确认最新 model ID)
151
- "glm-5-flash",
152
- "glm-z1", // GLM-Z1 reasoning model
150
+ "glm-5.1", // GLM-5.1 — latest flagship (2026)
151
+ "glm-5", // GLM-5 — premium (Max/Pro plans)
152
+ "glm-5-turbo", // GLM-5-Turbo fast & cost-efficient
153
+ "glm-4.7", // GLM-4.7
154
+ "glm-4.6", // GLM-4.6
155
+ "glm-4.5-air", // GLM-4.5-Air — lightweight
156
+ "glm-z1", // GLM-Z1 — reasoning model
153
157
  "glm-z1-flash",
154
- "glm-4-plus",
155
- "glm-4-flash",
156
- "glm-4-long",
157
158
  ],
158
159
  envKey: "ZHIPU_API_KEY",
159
160
  baseURL: "https://open.bigmodel.cn/api/paas/v4/",
@@ -405,8 +406,13 @@ export function createProvider(
405
406
  export class SpecGenerator {
406
407
  constructor(private provider: AIProvider) {}
407
408
 
408
- async generateSpec(idea: string, context?: ProjectContext): Promise<string> {
409
+ async generateSpec(idea: string, context?: ProjectContext, architectureDecision?: string): Promise<string> {
409
410
  const parts: string[] = [idea];
411
+ if (architectureDecision) {
412
+ parts.push(
413
+ `\n=== Architecture Decision (MUST follow this approach in the spec) ===\n${architectureDecision}`
414
+ );
415
+ }
410
416
 
411
417
  if (context) {
412
418
  // Constitution is highest priority — put it first so the AI respects it
@@ -76,6 +76,15 @@ export interface SpecTask {
76
76
  layer: TaskLayer;
77
77
  filesToTouch: string[];
78
78
  acceptanceCriteria: string[];
79
+ /**
80
+ * Concrete, runnable verification steps — each entry is a specific command
81
+ * or action with an expected observable outcome.
82
+ * Examples:
83
+ * "POST /api/orders with body {...} → HTTP 201, body contains {id, status:'pending'}"
84
+ * "npm run build exits 0 with no TypeScript errors"
85
+ * "GET /api/orders/:id returns 404 when id does not exist"
86
+ */
87
+ verificationSteps: string[];
79
88
  dependencies: string[];
80
89
  priority: TaskPriority;
81
90
  /** Runtime checkpoint — set by code generator, persisted to tasks file */
@@ -148,6 +157,14 @@ export function printTasks(tasks: SpecTask[]): void {
148
157
  const badge = color(`[${task.layer}]`);
149
158
  const prio = task.priority === "high" ? chalk.red("●") : task.priority === "medium" ? chalk.yellow("●") : chalk.gray("●");
150
159
  console.log(` ${prio} ${chalk.bold(task.id)} ${badge} ${task.title}`);
160
+ if (task.verificationSteps?.length) {
161
+ for (const step of task.verificationSteps.slice(0, 2)) {
162
+ console.log(chalk.gray(` ✓ ${step}`));
163
+ }
164
+ if (task.verificationSteps.length > 2) {
165
+ console.log(chalk.gray(` + ${task.verificationSteps.length - 2} more verification step(s)`));
166
+ }
167
+ }
151
168
  }
152
169
  }
153
170
 
@@ -0,0 +1,219 @@
1
+ import * as path from "path";
2
+ import * as fs from "fs-extra";
3
+ import { SpecDSL, ModelField, ApiEndpoint } from "./dsl-types";
4
+
5
+ // ─── Type Mapping ─────────────────────────────────────────────────────────────
6
+
7
+ const PRIMITIVE_MAP: Record<string, string> = {
8
+ String: "string",
9
+ string: "string",
10
+ Int: "number",
11
+ int: "number",
12
+ Float: "number",
13
+ float: "number",
14
+ Number: "number",
15
+ number: "number",
16
+ Boolean: "boolean",
17
+ boolean: "boolean",
18
+ DateTime: "string",
19
+ Date: "string",
20
+ Json: "Record<string, unknown>",
21
+ JSON: "Record<string, unknown>",
22
+ Any: "unknown",
23
+ any: "unknown",
24
+ };
25
+
26
+ function mapFieldType(raw: string): string {
27
+ const trimmed = raw.trim();
28
+ // Array types: "String[]" or "User[]"
29
+ if (trimmed.endsWith("[]")) {
30
+ return `${mapFieldType(trimmed.slice(0, -2))}[]`;
31
+ }
32
+ // Nullable / optional markers
33
+ const base = trimmed.replace(/[?!]$/, "");
34
+ if (PRIMITIVE_MAP[base]) return PRIMITIVE_MAP[base];
35
+ // PascalCase → treat as model reference (stays as-is)
36
+ if (/^[A-Z]/.test(base)) return base;
37
+ return "string";
38
+ }
39
+
40
+ // ─── Model → Interface ────────────────────────────────────────────────────────
41
+
42
+ function renderModelInterface(
43
+ name: string,
44
+ fields: ModelField[],
45
+ description?: string
46
+ ): string {
47
+ const lines: string[] = [];
48
+ if (description) lines.push(`/** ${description} */`);
49
+ lines.push(`export interface ${name} {`);
50
+ for (const f of fields) {
51
+ const optional = f.required ? "" : "?";
52
+ const tsType = mapFieldType(f.type);
53
+ if (f.description) lines.push(` /** ${f.description} */`);
54
+ lines.push(` ${f.name}${optional}: ${tsType};`);
55
+ }
56
+ lines.push("}");
57
+ return lines.join("\n");
58
+ }
59
+
60
+ // ─── Endpoint → Request/Response types ───────────────────────────────────────
61
+
62
+ function sanitizeName(str: string): string {
63
+ // "/users/:id" → "UsersById", "POST /auth/login" → "PostAuthLogin"
64
+ return str
65
+ .replace(/^\//, "")
66
+ .replace(/:([a-zA-Z]+)/g, "By$1")
67
+ .split(/[\/\-_]/)
68
+ .map((s) => s.charAt(0).toUpperCase() + s.slice(1))
69
+ .join("");
70
+ }
71
+
72
+ function endpointTypeName(ep: ApiEndpoint): string {
73
+ return ep.method.charAt(0) + ep.method.slice(1).toLowerCase() + sanitizeName(ep.path);
74
+ }
75
+
76
+ function renderEndpointTypes(ep: ApiEndpoint): string | null {
77
+ const baseName = endpointTypeName(ep);
78
+ const parts: string[] = [];
79
+
80
+ parts.push(`// ${ep.method} ${ep.path}${ep.description ? ` — ${ep.description}` : ""}`);
81
+
82
+ let hasRequest = false;
83
+
84
+ // Request body
85
+ if (ep.request?.body && Object.keys(ep.request.body).length > 0) {
86
+ hasRequest = true;
87
+ parts.push(`export interface ${baseName}Request {`);
88
+ for (const [key, typeDesc] of Object.entries(ep.request.body)) {
89
+ const tsType = mapFieldType(typeDesc);
90
+ parts.push(` ${key}: ${tsType};`);
91
+ }
92
+ parts.push("}");
93
+ }
94
+
95
+ // Query params
96
+ if (ep.request?.query && Object.keys(ep.request.query).length > 0) {
97
+ parts.push(`export interface ${baseName}Query {`);
98
+ for (const [key, typeDesc] of Object.entries(ep.request.query)) {
99
+ const tsType = mapFieldType(typeDesc);
100
+ parts.push(` ${key}?: ${tsType};`);
101
+ }
102
+ parts.push("}");
103
+ }
104
+
105
+ // Path params
106
+ if (ep.request?.params && Object.keys(ep.request.params).length > 0) {
107
+ parts.push(`export interface ${baseName}Params {`);
108
+ for (const [key, typeDesc] of Object.entries(ep.request.params)) {
109
+ const tsType = mapFieldType(typeDesc);
110
+ parts.push(` ${key}: ${tsType};`);
111
+ }
112
+ parts.push("}");
113
+ }
114
+
115
+ if (parts.length === 1) return null; // only comment, no types to emit
116
+ return parts.join("\n");
117
+ }
118
+
119
+ // ─── Endpoint map constant ───────────────────────────────────────────────────
120
+
121
+ function renderEndpointMap(endpoints: ApiEndpoint[]): string {
122
+ const lines: string[] = [];
123
+ lines.push("export const API_ENDPOINTS = {");
124
+ for (const ep of endpoints) {
125
+ const key = endpointTypeName(ep);
126
+ const keyLower = key.charAt(0).toLowerCase() + key.slice(1);
127
+ lines.push(` ${keyLower}: { method: '${ep.method}', path: '${ep.path}', auth: ${ep.auth} },`);
128
+ }
129
+ lines.push("} as const;");
130
+ lines.push("");
131
+ lines.push("export type ApiEndpointKey = keyof typeof API_ENDPOINTS;");
132
+ return lines.join("\n");
133
+ }
134
+
135
+ // ─── Main generator ───────────────────────────────────────────────────────────
136
+
137
+ export interface TypesGeneratorOptions {
138
+ /** Include endpoint request/response types (default: true) */
139
+ includeEndpointTypes?: boolean;
140
+ /** Include API_ENDPOINTS constant map (default: true) */
141
+ includeEndpointMap?: boolean;
142
+ /** Header comment to inject */
143
+ header?: string;
144
+ }
145
+
146
+ export function generateTypescriptTypes(
147
+ dsl: SpecDSL,
148
+ opts: TypesGeneratorOptions = {}
149
+ ): string {
150
+ const {
151
+ includeEndpointTypes = true,
152
+ includeEndpointMap = true,
153
+ } = opts;
154
+
155
+ const sections: string[] = [];
156
+
157
+ // Header
158
+ const header = opts.header ?? `// Generated by ai-spec — DO NOT EDIT\n// Feature: ${dsl.feature.title}\n// Generated at: ${new Date().toISOString()}`;
159
+ sections.push(header);
160
+
161
+ // Data Models
162
+ if (dsl.models.length > 0) {
163
+ sections.push("// ─── Data Models " + "─".repeat(57));
164
+ for (const model of dsl.models) {
165
+ sections.push(renderModelInterface(model.name, model.fields, model.description));
166
+ }
167
+ }
168
+
169
+ // Frontend Components (props only)
170
+ if (dsl.components && dsl.components.length > 0) {
171
+ sections.push("// ─── Component Props " + "─".repeat(53));
172
+ for (const comp of dsl.components) {
173
+ const lines: string[] = [];
174
+ if (comp.description) lines.push(`/** ${comp.description} */`);
175
+ lines.push(`export interface ${comp.name}Props {`);
176
+ for (const prop of comp.props) {
177
+ const optional = prop.required ? "" : "?";
178
+ const tsType = mapFieldType(prop.type);
179
+ if (prop.description) lines.push(` /** ${prop.description} */`);
180
+ lines.push(` ${prop.name}${optional}: ${tsType};`);
181
+ }
182
+ lines.push("}");
183
+ sections.push(lines.join("\n"));
184
+ }
185
+ }
186
+
187
+ // Endpoint request/response types
188
+ if (includeEndpointTypes && dsl.endpoints.length > 0) {
189
+ sections.push("// ─── API Request Types " + "─".repeat(51));
190
+ for (const ep of dsl.endpoints) {
191
+ const rendered = renderEndpointTypes(ep);
192
+ if (rendered) sections.push(rendered);
193
+ }
194
+ }
195
+
196
+ // Endpoint map
197
+ if (includeEndpointMap && dsl.endpoints.length > 0) {
198
+ sections.push("// ─── Endpoint Map " + "─".repeat(55));
199
+ sections.push(renderEndpointMap(dsl.endpoints));
200
+ }
201
+
202
+ return sections.join("\n\n") + "\n";
203
+ }
204
+
205
+ // ─── File save ────────────────────────────────────────────────────────────────
206
+
207
+ export async function saveTypescriptTypes(
208
+ dsl: SpecDSL,
209
+ projectDir: string,
210
+ opts: TypesGeneratorOptions & { outputPath?: string } = {}
211
+ ): Promise<string> {
212
+ const outputPath =
213
+ opts.outputPath ?? path.join(projectDir, ".ai-spec", `${dsl.feature.title.replace(/\s+/g, "-").toLowerCase()}.types.ts`);
214
+
215
+ await fs.ensureDir(path.dirname(outputPath));
216
+ const content = generateTypescriptTypes(dsl, opts);
217
+ await fs.writeFile(outputPath, content, "utf-8");
218
+ return outputPath;
219
+ }
package/core/vcr.ts ADDED
@@ -0,0 +1,210 @@
1
+ /**
2
+ * vcr.ts — Pipeline response recording & replay for zero-cost harness iteration.
3
+ *
4
+ * Inspired by Claude Code's VCR pattern for token counting tests.
5
+ *
6
+ * Design:
7
+ * - VcrRecordingProvider wraps any AIProvider and intercepts every generate()
8
+ * call, capturing (prompt, systemInstruction, response) in order.
9
+ * - VcrReplayProvider implements AIProvider by returning pre-recorded responses
10
+ * in sequence — zero API calls, zero tokens, deterministic output.
11
+ * - Recordings are stored in .ai-spec-vcr/{runId}.json alongside RunLogs.
12
+ *
13
+ * Use cases:
14
+ * - Iterating on harness scoring weights without burning tokens
15
+ * - Testing prompt format changes against known pipelines
16
+ * - Debugging pipeline stage logic offline
17
+ *
18
+ * CLI:
19
+ * ai-spec create --vcr-record → record this run
20
+ * ai-spec create --vcr-replay <runId> → replay with zero API calls
21
+ * ai-spec vcr list → list available recordings
22
+ * ai-spec vcr show <runId> → inspect call details
23
+ */
24
+
25
+ import { createHash } from "crypto";
26
+ import * as fs from "fs-extra";
27
+ import * as path from "path";
28
+ import { AIProvider } from "./spec-generator";
29
+
30
+ export const VCR_DIR = ".ai-spec-vcr";
31
+
32
+ // ─── Types ────────────────────────────────────────────────────────────────────
33
+
34
+ export interface VcrEntry {
35
+ /** Sequential call index within this recording */
36
+ index: number;
37
+ /** First 200 chars of prompt — for human inspection only */
38
+ promptPreview: string;
39
+ /** SHA-256[:8] of (prompt + "\x00" + systemInstruction) — stable identity */
40
+ callHash: string;
41
+ systemInstruction?: string;
42
+ /** Complete AI response — what replay will return */
43
+ response: string;
44
+ providerName: string;
45
+ modelName: string;
46
+ ts: string;
47
+ durationMs: number;
48
+ }
49
+
50
+ export interface VcrRecording {
51
+ runId: string;
52
+ recordedAt: string;
53
+ /** Total number of AI calls captured */
54
+ entryCount: number;
55
+ /** Unique provider/model strings seen across all calls */
56
+ providers: string[];
57
+ entries: VcrEntry[];
58
+ }
59
+
60
+ // ─── Recording Provider ───────────────────────────────────────────────────────
61
+
62
+ /**
63
+ * Wraps a real AIProvider, transparently passing through all calls while
64
+ * recording each (prompt, response) pair in order.
65
+ * After the pipeline completes, call `save()` to persist the recording.
66
+ */
67
+ export class VcrRecordingProvider implements AIProvider {
68
+ private entries: VcrEntry[] = [];
69
+
70
+ constructor(private readonly inner: AIProvider) {}
71
+
72
+ get providerName() { return this.inner.providerName; }
73
+ get modelName() { return this.inner.modelName; }
74
+
75
+ async generate(prompt: string, systemInstruction?: string): Promise<string> {
76
+ const start = Date.now();
77
+ const response = await this.inner.generate(prompt, systemInstruction);
78
+ const callHash = createHash("sha256")
79
+ .update(prompt + "\x00" + (systemInstruction ?? ""))
80
+ .digest("hex")
81
+ .slice(0, 8);
82
+ this.entries.push({
83
+ index: this.entries.length,
84
+ promptPreview: prompt.slice(0, 200).replace(/\n/g, " "),
85
+ callHash,
86
+ ...(systemInstruction ? { systemInstruction } : {}),
87
+ response,
88
+ providerName: this.inner.providerName,
89
+ modelName: this.inner.modelName,
90
+ ts: new Date().toISOString(),
91
+ durationMs: Date.now() - start,
92
+ });
93
+ return response;
94
+ }
95
+
96
+ get callCount() { return this.entries.length; }
97
+
98
+ /**
99
+ * Persist the recording to .ai-spec-vcr/{runId}.json.
100
+ * Merges entries from an optional second recorder (e.g. codegenProvider),
101
+ * sorted by timestamp so replay order matches real execution order.
102
+ */
103
+ async save(
104
+ workingDir: string,
105
+ runId: string,
106
+ secondRecorder?: VcrRecordingProvider
107
+ ): Promise<string> {
108
+ const allEntries = secondRecorder
109
+ ? [...this.entries, ...secondRecorder.entries].sort((a, b) => a.ts.localeCompare(b.ts))
110
+ : this.entries;
111
+
112
+ // Re-index after merge
113
+ allEntries.forEach((e, i) => { e.index = i; });
114
+
115
+ const recording: VcrRecording = {
116
+ runId,
117
+ recordedAt: new Date().toISOString(),
118
+ entryCount: allEntries.length,
119
+ providers: [...new Set(allEntries.map((e) => `${e.providerName}/${e.modelName}`))],
120
+ entries: allEntries,
121
+ };
122
+
123
+ const vcrDir = path.join(workingDir, VCR_DIR);
124
+ await fs.ensureDir(vcrDir);
125
+ const filePath = path.join(vcrDir, `${runId}.json`);
126
+ await fs.writeJson(filePath, recording, { spaces: 2 });
127
+ return filePath;
128
+ }
129
+ }
130
+
131
+ // ─── Replay Provider ──────────────────────────────────────────────────────────
132
+
133
+ /**
134
+ * Implements AIProvider by replaying pre-recorded responses in sequence.
135
+ * Every generate() call pops the next entry from the recording — no API call,
136
+ * no tokens, deterministic output.
137
+ *
138
+ * Note: responses are returned in strict index order, regardless of the prompt
139
+ * content. This works correctly as long as the pipeline makes calls in the same
140
+ * structural order as the recording.
141
+ */
142
+ export class VcrReplayProvider implements AIProvider {
143
+ private index = 0;
144
+
145
+ constructor(private readonly recording: VcrRecording) {}
146
+
147
+ get providerName() { return "vcr-replay"; }
148
+ get modelName() { return this.recording.runId; }
149
+
150
+ async generate(_prompt: string, _systemInstruction?: string): Promise<string> {
151
+ const entry = this.recording.entries[this.index++];
152
+ if (!entry) {
153
+ throw new Error(
154
+ `VCR replay exhausted: all ${this.recording.entries.length} recorded ` +
155
+ `responses have been consumed. The pipeline made more AI calls than the recording has.`
156
+ );
157
+ }
158
+ return entry.response;
159
+ }
160
+
161
+ get remaining() { return this.recording.entries.length - this.index; }
162
+ get consumed() { return this.index; }
163
+ }
164
+
165
+ // ─── Loader helpers ───────────────────────────────────────────────────────────
166
+
167
+ export async function loadVcrRecording(
168
+ workingDir: string,
169
+ runId: string
170
+ ): Promise<VcrRecording | null> {
171
+ const filePath = path.join(workingDir, VCR_DIR, `${runId}.json`);
172
+ try {
173
+ return await fs.readJson(filePath);
174
+ } catch {
175
+ return null;
176
+ }
177
+ }
178
+
179
+ export interface VcrSummary {
180
+ runId: string;
181
+ recordedAt: string;
182
+ entryCount: number;
183
+ providers: string[];
184
+ }
185
+
186
+ export async function listVcrRecordings(workingDir: string): Promise<VcrSummary[]> {
187
+ const vcrDir = path.join(workingDir, VCR_DIR);
188
+ if (!(await fs.pathExists(vcrDir))) return [];
189
+
190
+ const files = (await fs.readdir(vcrDir))
191
+ .filter((f) => f.endsWith(".json"))
192
+ .sort()
193
+ .reverse();
194
+
195
+ const results: VcrSummary[] = [];
196
+ for (const file of files) {
197
+ try {
198
+ const rec: VcrRecording = await fs.readJson(path.join(vcrDir, file));
199
+ results.push({
200
+ runId: rec.runId,
201
+ recordedAt: rec.recordedAt,
202
+ entryCount: rec.entryCount,
203
+ providers: rec.providers,
204
+ });
205
+ } catch {
206
+ // skip corrupt files
207
+ }
208
+ }
209
+ return results;
210
+ }