ai-spec-dev 0.33.0 → 0.35.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/add-lesson.md +34 -0
- package/.claude/commands/check-layers.md +65 -0
- package/.claude/commands/installed-deps.md +35 -0
- package/.claude/commands/recall-lessons.md +40 -0
- package/.claude/commands/scan-singletons.md +45 -0
- package/.claude/commands/verify-imports.md +48 -0
- package/.claude/settings.local.json +11 -1
- package/README.md +531 -213
- package/RELEASE_LOG.md +305 -0
- package/cli/commands/create.ts +1233 -0
- package/cli/commands/dashboard.ts +62 -0
- package/cli/commands/init.ts +45 -8
- package/cli/commands/mock.ts +175 -0
- package/cli/commands/scan.ts +99 -0
- package/cli/commands/types.ts +69 -0
- package/cli/commands/vcr.ts +70 -0
- package/cli/index.ts +34 -2517
- package/core/combined-generator.ts +13 -3
- package/core/dashboard-generator.ts +340 -0
- package/core/design-dialogue.ts +124 -0
- package/core/dsl-feedback.ts +34 -4
- package/core/error-feedback.ts +46 -2
- package/core/project-index.ts +301 -0
- package/core/reviewer.ts +84 -6
- package/core/run-logger.ts +109 -3
- package/core/run-trend.ts +24 -4
- package/core/self-evaluator.ts +39 -11
- package/core/spec-generator.ts +14 -8
- package/core/task-generator.ts +17 -0
- package/core/types-generator.ts +219 -0
- package/core/vcr.ts +210 -0
- package/dist/cli/index.js +7297 -5640
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/index.mjs +8728 -7071
- package/dist/cli/index.mjs.map +1 -1
- package/dist/index.d.mts +19 -5
- package/dist/index.d.ts +19 -5
- package/dist/index.js +420 -224
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +418 -224
- package/dist/index.mjs.map +1 -1
- package/docs-assets/purpose/architecture-overview.svg +64 -0
- package/docs-assets/purpose/create-pipeline.svg +113 -0
- package/docs-assets/purpose/task-layering.svg +74 -0
- package/package.json +1 -1
- package/prompts/codegen.prompt.ts +97 -9
- package/prompts/design.prompt.ts +59 -0
- package/prompts/spec.prompt.ts +8 -1
- package/prompts/tasks.prompt.ts +27 -2
- package/purpose.md +600 -174
package/core/self-evaluator.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import chalk from "chalk";
|
|
2
2
|
import { SpecDSL } from "./dsl-types";
|
|
3
3
|
import { RunLogger } from "./run-logger";
|
|
4
|
+
import { extractComplianceScore } from "./reviewer";
|
|
4
5
|
|
|
5
6
|
// ─── Types ────────────────────────────────────────────────────────────────────
|
|
6
7
|
|
|
@@ -11,6 +12,8 @@ export interface SelfEvalResult {
|
|
|
11
12
|
compileScore: number;
|
|
12
13
|
/** 0-10 extracted from 3-pass review text, or null when review was skipped */
|
|
13
14
|
reviewScore: number | null;
|
|
15
|
+
/** 0-10 from Pass 0 spec compliance check, or null when skipped/unavailable */
|
|
16
|
+
complianceScore: number | null;
|
|
14
17
|
/** 0-10 weighted overall — the "Harness Score" recorded in RunLog */
|
|
15
18
|
harnessScore: number;
|
|
16
19
|
/** Prompt hash at the time this run executed */
|
|
@@ -191,15 +194,35 @@ export function runSelfEval(opts: {
|
|
|
191
194
|
// ── Review Score ──────────────────────────────────────────────────────────
|
|
192
195
|
const reviewScore = reviewText ? extractReviewScore(reviewText) : null;
|
|
193
196
|
|
|
197
|
+
// ── Compliance Score (Pass 0) ──────────────────────────────────────────────
|
|
198
|
+
const rawCompliance = reviewText ? extractComplianceScore(reviewText) : 0;
|
|
199
|
+
const complianceScore: number | null = rawCompliance > 0 ? rawCompliance : null;
|
|
200
|
+
|
|
194
201
|
// ── Harness Score (weighted average) ──────────────────────────────────────
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
202
|
+
// Weights reflect importance: compliance (did we build the right thing?) > dsl > review > compile
|
|
203
|
+
//
|
|
204
|
+
// compliance + review available → 0.30 compliance + 0.25 dsl + 0.20 compile + 0.25 review
|
|
205
|
+
// review only → 0.40 dsl + 0.30 compile + 0.30 review (unchanged)
|
|
206
|
+
// compliance only → 0.35 compliance + 0.35 dsl + 0.30 compile
|
|
207
|
+
// neither → 0.55 dsl + 0.45 compile (unchanged)
|
|
208
|
+
let harnessScore: number;
|
|
209
|
+
if (complianceScore !== null && reviewScore !== null) {
|
|
210
|
+
harnessScore = Math.round(
|
|
211
|
+
(complianceScore * 0.30 + dslCoverageScore * 0.25 + compileScore * 0.20 + reviewScore * 0.25) * 10
|
|
212
|
+
) / 10;
|
|
213
|
+
} else if (reviewScore !== null) {
|
|
214
|
+
harnessScore = Math.round((dslCoverageScore * 0.4 + compileScore * 0.3 + reviewScore * 0.3) * 10) / 10;
|
|
215
|
+
} else if (complianceScore !== null) {
|
|
216
|
+
harnessScore = Math.round((complianceScore * 0.35 + dslCoverageScore * 0.35 + compileScore * 0.30) * 10) / 10;
|
|
217
|
+
} else {
|
|
218
|
+
harnessScore = Math.round((dslCoverageScore * 0.55 + compileScore * 0.45) * 10) / 10;
|
|
219
|
+
}
|
|
198
220
|
|
|
199
221
|
const result: SelfEvalResult = {
|
|
200
222
|
dslCoverageScore,
|
|
201
223
|
compileScore,
|
|
202
224
|
reviewScore,
|
|
225
|
+
complianceScore,
|
|
203
226
|
harnessScore,
|
|
204
227
|
promptHash,
|
|
205
228
|
detail: {
|
|
@@ -221,6 +244,7 @@ export function runSelfEval(opts: {
|
|
|
221
244
|
dslCoverageScore,
|
|
222
245
|
compileScore,
|
|
223
246
|
reviewScore: reviewScore ?? undefined,
|
|
247
|
+
complianceScore: complianceScore ?? undefined,
|
|
224
248
|
promptHash,
|
|
225
249
|
modelNameCoverage: result.detail.modelNameCoverage,
|
|
226
250
|
modelNameMatched: result.detail.modelNameMatched,
|
|
@@ -244,9 +268,16 @@ export function printSelfEval(result: SelfEvalResult): void {
|
|
|
244
268
|
const compileTag = result.compileScore === 10
|
|
245
269
|
? chalk.green("pass")
|
|
246
270
|
: chalk.yellow("partial");
|
|
247
|
-
const reviewTag
|
|
271
|
+
const reviewTag = result.reviewScore !== null
|
|
248
272
|
? `Review: ${result.reviewScore}/10`
|
|
249
273
|
: chalk.gray("Review: skipped");
|
|
274
|
+
const complianceTag = result.complianceScore !== null
|
|
275
|
+
? (result.complianceScore >= 8
|
|
276
|
+
? chalk.green(`Compliance: ${result.complianceScore}/10`)
|
|
277
|
+
: result.complianceScore >= 6
|
|
278
|
+
? chalk.yellow(`Compliance: ${result.complianceScore}/10`)
|
|
279
|
+
: chalk.red(`Compliance: ${result.complianceScore}/10 ⚠`))
|
|
280
|
+
: chalk.gray("Compliance: skipped");
|
|
250
281
|
|
|
251
282
|
// Model coverage tag (only shown when there are declared models)
|
|
252
283
|
let modelCoverageTag = "";
|
|
@@ -262,15 +293,12 @@ export function printSelfEval(result: SelfEvalResult): void {
|
|
|
262
293
|
|
|
263
294
|
console.log(chalk.cyan("\n─── Harness Self-Eval ───────────────────────────"));
|
|
264
295
|
console.log(` Score : ${scoreColor(`[${bar}] ${result.harnessScore}/10`)}`);
|
|
296
|
+
console.log(` ${complianceTag} Compile: ${compileTag} ${reviewTag}`);
|
|
265
297
|
console.log(
|
|
266
|
-
` DSL : ${scoreColor(String(result.dslCoverageScore) + "/10")}
|
|
267
|
-
`
|
|
298
|
+
` DSL : ${scoreColor(String(result.dslCoverageScore) + "/10")}` +
|
|
299
|
+
(modelCoverageTag ? ` ${modelCoverageTag}` : "") +
|
|
300
|
+
chalk.gray(` Endpoints: ${result.detail.endpointsTotal} Files: ${result.detail.filesWritten}`)
|
|
268
301
|
);
|
|
269
|
-
if (modelCoverageTag) {
|
|
270
|
-
console.log(` Detail : ${modelCoverageTag} ` +
|
|
271
|
-
chalk.gray(`Endpoints: ${result.detail.endpointsTotal} Files: ${result.detail.filesWritten}`)
|
|
272
|
-
);
|
|
273
|
-
}
|
|
274
302
|
console.log(chalk.gray(` Prompt : ${result.promptHash}`));
|
|
275
303
|
console.log(chalk.cyan("─".repeat(49)));
|
|
276
304
|
}
|
package/core/spec-generator.ts
CHANGED
|
@@ -145,15 +145,16 @@ export const PROVIDER_CATALOG: Record<string, ProviderMeta> = {
|
|
|
145
145
|
},
|
|
146
146
|
glm: {
|
|
147
147
|
displayName: "智谱 GLM (Zhipu AI)",
|
|
148
|
-
description: "智谱 AI — GLM-5 / GLM-4 series
|
|
148
|
+
description: "智谱 AI — GLM-5.1 / GLM-5 / GLM-4 series",
|
|
149
149
|
models: [
|
|
150
|
-
"glm-5",
|
|
151
|
-
"glm-5
|
|
152
|
-
"glm-
|
|
150
|
+
"glm-5.1", // GLM-5.1 — latest flagship (2026)
|
|
151
|
+
"glm-5", // GLM-5 — premium (Max/Pro plans)
|
|
152
|
+
"glm-5-turbo", // GLM-5-Turbo — fast & cost-efficient
|
|
153
|
+
"glm-4.7", // GLM-4.7
|
|
154
|
+
"glm-4.6", // GLM-4.6
|
|
155
|
+
"glm-4.5-air", // GLM-4.5-Air — lightweight
|
|
156
|
+
"glm-z1", // GLM-Z1 — reasoning model
|
|
153
157
|
"glm-z1-flash",
|
|
154
|
-
"glm-4-plus",
|
|
155
|
-
"glm-4-flash",
|
|
156
|
-
"glm-4-long",
|
|
157
158
|
],
|
|
158
159
|
envKey: "ZHIPU_API_KEY",
|
|
159
160
|
baseURL: "https://open.bigmodel.cn/api/paas/v4/",
|
|
@@ -405,8 +406,13 @@ export function createProvider(
|
|
|
405
406
|
export class SpecGenerator {
|
|
406
407
|
constructor(private provider: AIProvider) {}
|
|
407
408
|
|
|
408
|
-
async generateSpec(idea: string, context?: ProjectContext): Promise<string> {
|
|
409
|
+
async generateSpec(idea: string, context?: ProjectContext, architectureDecision?: string): Promise<string> {
|
|
409
410
|
const parts: string[] = [idea];
|
|
411
|
+
if (architectureDecision) {
|
|
412
|
+
parts.push(
|
|
413
|
+
`\n=== Architecture Decision (MUST follow this approach in the spec) ===\n${architectureDecision}`
|
|
414
|
+
);
|
|
415
|
+
}
|
|
410
416
|
|
|
411
417
|
if (context) {
|
|
412
418
|
// Constitution is highest priority — put it first so the AI respects it
|
package/core/task-generator.ts
CHANGED
|
@@ -76,6 +76,15 @@ export interface SpecTask {
|
|
|
76
76
|
layer: TaskLayer;
|
|
77
77
|
filesToTouch: string[];
|
|
78
78
|
acceptanceCriteria: string[];
|
|
79
|
+
/**
|
|
80
|
+
* Concrete, runnable verification steps — each entry is a specific command
|
|
81
|
+
* or action with an expected observable outcome.
|
|
82
|
+
* Examples:
|
|
83
|
+
* "POST /api/orders with body {...} → HTTP 201, body contains {id, status:'pending'}"
|
|
84
|
+
* "npm run build exits 0 with no TypeScript errors"
|
|
85
|
+
* "GET /api/orders/:id returns 404 when id does not exist"
|
|
86
|
+
*/
|
|
87
|
+
verificationSteps: string[];
|
|
79
88
|
dependencies: string[];
|
|
80
89
|
priority: TaskPriority;
|
|
81
90
|
/** Runtime checkpoint — set by code generator, persisted to tasks file */
|
|
@@ -148,6 +157,14 @@ export function printTasks(tasks: SpecTask[]): void {
|
|
|
148
157
|
const badge = color(`[${task.layer}]`);
|
|
149
158
|
const prio = task.priority === "high" ? chalk.red("●") : task.priority === "medium" ? chalk.yellow("●") : chalk.gray("●");
|
|
150
159
|
console.log(` ${prio} ${chalk.bold(task.id)} ${badge} ${task.title}`);
|
|
160
|
+
if (task.verificationSteps?.length) {
|
|
161
|
+
for (const step of task.verificationSteps.slice(0, 2)) {
|
|
162
|
+
console.log(chalk.gray(` ✓ ${step}`));
|
|
163
|
+
}
|
|
164
|
+
if (task.verificationSteps.length > 2) {
|
|
165
|
+
console.log(chalk.gray(` + ${task.verificationSteps.length - 2} more verification step(s)`));
|
|
166
|
+
}
|
|
167
|
+
}
|
|
151
168
|
}
|
|
152
169
|
}
|
|
153
170
|
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
import * as path from "path";
|
|
2
|
+
import * as fs from "fs-extra";
|
|
3
|
+
import { SpecDSL, ModelField, ApiEndpoint } from "./dsl-types";
|
|
4
|
+
|
|
5
|
+
// ─── Type Mapping ─────────────────────────────────────────────────────────────
|
|
6
|
+
|
|
7
|
+
const PRIMITIVE_MAP: Record<string, string> = {
|
|
8
|
+
String: "string",
|
|
9
|
+
string: "string",
|
|
10
|
+
Int: "number",
|
|
11
|
+
int: "number",
|
|
12
|
+
Float: "number",
|
|
13
|
+
float: "number",
|
|
14
|
+
Number: "number",
|
|
15
|
+
number: "number",
|
|
16
|
+
Boolean: "boolean",
|
|
17
|
+
boolean: "boolean",
|
|
18
|
+
DateTime: "string",
|
|
19
|
+
Date: "string",
|
|
20
|
+
Json: "Record<string, unknown>",
|
|
21
|
+
JSON: "Record<string, unknown>",
|
|
22
|
+
Any: "unknown",
|
|
23
|
+
any: "unknown",
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
function mapFieldType(raw: string): string {
|
|
27
|
+
const trimmed = raw.trim();
|
|
28
|
+
// Array types: "String[]" or "User[]"
|
|
29
|
+
if (trimmed.endsWith("[]")) {
|
|
30
|
+
return `${mapFieldType(trimmed.slice(0, -2))}[]`;
|
|
31
|
+
}
|
|
32
|
+
// Nullable / optional markers
|
|
33
|
+
const base = trimmed.replace(/[?!]$/, "");
|
|
34
|
+
if (PRIMITIVE_MAP[base]) return PRIMITIVE_MAP[base];
|
|
35
|
+
// PascalCase → treat as model reference (stays as-is)
|
|
36
|
+
if (/^[A-Z]/.test(base)) return base;
|
|
37
|
+
return "string";
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// ─── Model → Interface ────────────────────────────────────────────────────────
|
|
41
|
+
|
|
42
|
+
function renderModelInterface(
|
|
43
|
+
name: string,
|
|
44
|
+
fields: ModelField[],
|
|
45
|
+
description?: string
|
|
46
|
+
): string {
|
|
47
|
+
const lines: string[] = [];
|
|
48
|
+
if (description) lines.push(`/** ${description} */`);
|
|
49
|
+
lines.push(`export interface ${name} {`);
|
|
50
|
+
for (const f of fields) {
|
|
51
|
+
const optional = f.required ? "" : "?";
|
|
52
|
+
const tsType = mapFieldType(f.type);
|
|
53
|
+
if (f.description) lines.push(` /** ${f.description} */`);
|
|
54
|
+
lines.push(` ${f.name}${optional}: ${tsType};`);
|
|
55
|
+
}
|
|
56
|
+
lines.push("}");
|
|
57
|
+
return lines.join("\n");
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// ─── Endpoint → Request/Response types ───────────────────────────────────────
|
|
61
|
+
|
|
62
|
+
function sanitizeName(str: string): string {
|
|
63
|
+
// "/users/:id" → "UsersById", "POST /auth/login" → "PostAuthLogin"
|
|
64
|
+
return str
|
|
65
|
+
.replace(/^\//, "")
|
|
66
|
+
.replace(/:([a-zA-Z]+)/g, "By$1")
|
|
67
|
+
.split(/[\/\-_]/)
|
|
68
|
+
.map((s) => s.charAt(0).toUpperCase() + s.slice(1))
|
|
69
|
+
.join("");
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function endpointTypeName(ep: ApiEndpoint): string {
|
|
73
|
+
return ep.method.charAt(0) + ep.method.slice(1).toLowerCase() + sanitizeName(ep.path);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function renderEndpointTypes(ep: ApiEndpoint): string | null {
|
|
77
|
+
const baseName = endpointTypeName(ep);
|
|
78
|
+
const parts: string[] = [];
|
|
79
|
+
|
|
80
|
+
parts.push(`// ${ep.method} ${ep.path}${ep.description ? ` — ${ep.description}` : ""}`);
|
|
81
|
+
|
|
82
|
+
let hasRequest = false;
|
|
83
|
+
|
|
84
|
+
// Request body
|
|
85
|
+
if (ep.request?.body && Object.keys(ep.request.body).length > 0) {
|
|
86
|
+
hasRequest = true;
|
|
87
|
+
parts.push(`export interface ${baseName}Request {`);
|
|
88
|
+
for (const [key, typeDesc] of Object.entries(ep.request.body)) {
|
|
89
|
+
const tsType = mapFieldType(typeDesc);
|
|
90
|
+
parts.push(` ${key}: ${tsType};`);
|
|
91
|
+
}
|
|
92
|
+
parts.push("}");
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Query params
|
|
96
|
+
if (ep.request?.query && Object.keys(ep.request.query).length > 0) {
|
|
97
|
+
parts.push(`export interface ${baseName}Query {`);
|
|
98
|
+
for (const [key, typeDesc] of Object.entries(ep.request.query)) {
|
|
99
|
+
const tsType = mapFieldType(typeDesc);
|
|
100
|
+
parts.push(` ${key}?: ${tsType};`);
|
|
101
|
+
}
|
|
102
|
+
parts.push("}");
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Path params
|
|
106
|
+
if (ep.request?.params && Object.keys(ep.request.params).length > 0) {
|
|
107
|
+
parts.push(`export interface ${baseName}Params {`);
|
|
108
|
+
for (const [key, typeDesc] of Object.entries(ep.request.params)) {
|
|
109
|
+
const tsType = mapFieldType(typeDesc);
|
|
110
|
+
parts.push(` ${key}: ${tsType};`);
|
|
111
|
+
}
|
|
112
|
+
parts.push("}");
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
if (parts.length === 1) return null; // only comment, no types to emit
|
|
116
|
+
return parts.join("\n");
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// ─── Endpoint map constant ───────────────────────────────────────────────────
|
|
120
|
+
|
|
121
|
+
function renderEndpointMap(endpoints: ApiEndpoint[]): string {
|
|
122
|
+
const lines: string[] = [];
|
|
123
|
+
lines.push("export const API_ENDPOINTS = {");
|
|
124
|
+
for (const ep of endpoints) {
|
|
125
|
+
const key = endpointTypeName(ep);
|
|
126
|
+
const keyLower = key.charAt(0).toLowerCase() + key.slice(1);
|
|
127
|
+
lines.push(` ${keyLower}: { method: '${ep.method}', path: '${ep.path}', auth: ${ep.auth} },`);
|
|
128
|
+
}
|
|
129
|
+
lines.push("} as const;");
|
|
130
|
+
lines.push("");
|
|
131
|
+
lines.push("export type ApiEndpointKey = keyof typeof API_ENDPOINTS;");
|
|
132
|
+
return lines.join("\n");
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// ─── Main generator ───────────────────────────────────────────────────────────
|
|
136
|
+
|
|
137
|
+
export interface TypesGeneratorOptions {
|
|
138
|
+
/** Include endpoint request/response types (default: true) */
|
|
139
|
+
includeEndpointTypes?: boolean;
|
|
140
|
+
/** Include API_ENDPOINTS constant map (default: true) */
|
|
141
|
+
includeEndpointMap?: boolean;
|
|
142
|
+
/** Header comment to inject */
|
|
143
|
+
header?: string;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
export function generateTypescriptTypes(
|
|
147
|
+
dsl: SpecDSL,
|
|
148
|
+
opts: TypesGeneratorOptions = {}
|
|
149
|
+
): string {
|
|
150
|
+
const {
|
|
151
|
+
includeEndpointTypes = true,
|
|
152
|
+
includeEndpointMap = true,
|
|
153
|
+
} = opts;
|
|
154
|
+
|
|
155
|
+
const sections: string[] = [];
|
|
156
|
+
|
|
157
|
+
// Header
|
|
158
|
+
const header = opts.header ?? `// Generated by ai-spec — DO NOT EDIT\n// Feature: ${dsl.feature.title}\n// Generated at: ${new Date().toISOString()}`;
|
|
159
|
+
sections.push(header);
|
|
160
|
+
|
|
161
|
+
// Data Models
|
|
162
|
+
if (dsl.models.length > 0) {
|
|
163
|
+
sections.push("// ─── Data Models " + "─".repeat(57));
|
|
164
|
+
for (const model of dsl.models) {
|
|
165
|
+
sections.push(renderModelInterface(model.name, model.fields, model.description));
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// Frontend Components (props only)
|
|
170
|
+
if (dsl.components && dsl.components.length > 0) {
|
|
171
|
+
sections.push("// ─── Component Props " + "─".repeat(53));
|
|
172
|
+
for (const comp of dsl.components) {
|
|
173
|
+
const lines: string[] = [];
|
|
174
|
+
if (comp.description) lines.push(`/** ${comp.description} */`);
|
|
175
|
+
lines.push(`export interface ${comp.name}Props {`);
|
|
176
|
+
for (const prop of comp.props) {
|
|
177
|
+
const optional = prop.required ? "" : "?";
|
|
178
|
+
const tsType = mapFieldType(prop.type);
|
|
179
|
+
if (prop.description) lines.push(` /** ${prop.description} */`);
|
|
180
|
+
lines.push(` ${prop.name}${optional}: ${tsType};`);
|
|
181
|
+
}
|
|
182
|
+
lines.push("}");
|
|
183
|
+
sections.push(lines.join("\n"));
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// Endpoint request/response types
|
|
188
|
+
if (includeEndpointTypes && dsl.endpoints.length > 0) {
|
|
189
|
+
sections.push("// ─── API Request Types " + "─".repeat(51));
|
|
190
|
+
for (const ep of dsl.endpoints) {
|
|
191
|
+
const rendered = renderEndpointTypes(ep);
|
|
192
|
+
if (rendered) sections.push(rendered);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// Endpoint map
|
|
197
|
+
if (includeEndpointMap && dsl.endpoints.length > 0) {
|
|
198
|
+
sections.push("// ─── Endpoint Map " + "─".repeat(55));
|
|
199
|
+
sections.push(renderEndpointMap(dsl.endpoints));
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
return sections.join("\n\n") + "\n";
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// ─── File save ────────────────────────────────────────────────────────────────
|
|
206
|
+
|
|
207
|
+
export async function saveTypescriptTypes(
|
|
208
|
+
dsl: SpecDSL,
|
|
209
|
+
projectDir: string,
|
|
210
|
+
opts: TypesGeneratorOptions & { outputPath?: string } = {}
|
|
211
|
+
): Promise<string> {
|
|
212
|
+
const outputPath =
|
|
213
|
+
opts.outputPath ?? path.join(projectDir, ".ai-spec", `${dsl.feature.title.replace(/\s+/g, "-").toLowerCase()}.types.ts`);
|
|
214
|
+
|
|
215
|
+
await fs.ensureDir(path.dirname(outputPath));
|
|
216
|
+
const content = generateTypescriptTypes(dsl, opts);
|
|
217
|
+
await fs.writeFile(outputPath, content, "utf-8");
|
|
218
|
+
return outputPath;
|
|
219
|
+
}
|
package/core/vcr.ts
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* vcr.ts — Pipeline response recording & replay for zero-cost harness iteration.
|
|
3
|
+
*
|
|
4
|
+
* Inspired by Claude Code's VCR pattern for token counting tests.
|
|
5
|
+
*
|
|
6
|
+
* Design:
|
|
7
|
+
* - VcrRecordingProvider wraps any AIProvider and intercepts every generate()
|
|
8
|
+
* call, capturing (prompt, systemInstruction, response) in order.
|
|
9
|
+
* - VcrReplayProvider implements AIProvider by returning pre-recorded responses
|
|
10
|
+
* in sequence — zero API calls, zero tokens, deterministic output.
|
|
11
|
+
* - Recordings are stored in .ai-spec-vcr/{runId}.json alongside RunLogs.
|
|
12
|
+
*
|
|
13
|
+
* Use cases:
|
|
14
|
+
* - Iterating on harness scoring weights without burning tokens
|
|
15
|
+
* - Testing prompt format changes against known pipelines
|
|
16
|
+
* - Debugging pipeline stage logic offline
|
|
17
|
+
*
|
|
18
|
+
* CLI:
|
|
19
|
+
* ai-spec create --vcr-record → record this run
|
|
20
|
+
* ai-spec create --vcr-replay <runId> → replay with zero API calls
|
|
21
|
+
* ai-spec vcr list → list available recordings
|
|
22
|
+
* ai-spec vcr show <runId> → inspect call details
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
import { createHash } from "crypto";
|
|
26
|
+
import * as fs from "fs-extra";
|
|
27
|
+
import * as path from "path";
|
|
28
|
+
import { AIProvider } from "./spec-generator";
|
|
29
|
+
|
|
30
|
+
export const VCR_DIR = ".ai-spec-vcr";
|
|
31
|
+
|
|
32
|
+
// ─── Types ────────────────────────────────────────────────────────────────────
|
|
33
|
+
|
|
34
|
+
export interface VcrEntry {
|
|
35
|
+
/** Sequential call index within this recording */
|
|
36
|
+
index: number;
|
|
37
|
+
/** First 200 chars of prompt — for human inspection only */
|
|
38
|
+
promptPreview: string;
|
|
39
|
+
/** SHA-256[:8] of (prompt + "\x00" + systemInstruction) — stable identity */
|
|
40
|
+
callHash: string;
|
|
41
|
+
systemInstruction?: string;
|
|
42
|
+
/** Complete AI response — what replay will return */
|
|
43
|
+
response: string;
|
|
44
|
+
providerName: string;
|
|
45
|
+
modelName: string;
|
|
46
|
+
ts: string;
|
|
47
|
+
durationMs: number;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export interface VcrRecording {
|
|
51
|
+
runId: string;
|
|
52
|
+
recordedAt: string;
|
|
53
|
+
/** Total number of AI calls captured */
|
|
54
|
+
entryCount: number;
|
|
55
|
+
/** Unique provider/model strings seen across all calls */
|
|
56
|
+
providers: string[];
|
|
57
|
+
entries: VcrEntry[];
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// ─── Recording Provider ───────────────────────────────────────────────────────
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Wraps a real AIProvider, transparently passing through all calls while
|
|
64
|
+
* recording each (prompt, response) pair in order.
|
|
65
|
+
* After the pipeline completes, call `save()` to persist the recording.
|
|
66
|
+
*/
|
|
67
|
+
export class VcrRecordingProvider implements AIProvider {
|
|
68
|
+
private entries: VcrEntry[] = [];
|
|
69
|
+
|
|
70
|
+
constructor(private readonly inner: AIProvider) {}
|
|
71
|
+
|
|
72
|
+
get providerName() { return this.inner.providerName; }
|
|
73
|
+
get modelName() { return this.inner.modelName; }
|
|
74
|
+
|
|
75
|
+
async generate(prompt: string, systemInstruction?: string): Promise<string> {
|
|
76
|
+
const start = Date.now();
|
|
77
|
+
const response = await this.inner.generate(prompt, systemInstruction);
|
|
78
|
+
const callHash = createHash("sha256")
|
|
79
|
+
.update(prompt + "\x00" + (systemInstruction ?? ""))
|
|
80
|
+
.digest("hex")
|
|
81
|
+
.slice(0, 8);
|
|
82
|
+
this.entries.push({
|
|
83
|
+
index: this.entries.length,
|
|
84
|
+
promptPreview: prompt.slice(0, 200).replace(/\n/g, " "),
|
|
85
|
+
callHash,
|
|
86
|
+
...(systemInstruction ? { systemInstruction } : {}),
|
|
87
|
+
response,
|
|
88
|
+
providerName: this.inner.providerName,
|
|
89
|
+
modelName: this.inner.modelName,
|
|
90
|
+
ts: new Date().toISOString(),
|
|
91
|
+
durationMs: Date.now() - start,
|
|
92
|
+
});
|
|
93
|
+
return response;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
get callCount() { return this.entries.length; }
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Persist the recording to .ai-spec-vcr/{runId}.json.
|
|
100
|
+
* Merges entries from an optional second recorder (e.g. codegenProvider),
|
|
101
|
+
* sorted by timestamp so replay order matches real execution order.
|
|
102
|
+
*/
|
|
103
|
+
async save(
|
|
104
|
+
workingDir: string,
|
|
105
|
+
runId: string,
|
|
106
|
+
secondRecorder?: VcrRecordingProvider
|
|
107
|
+
): Promise<string> {
|
|
108
|
+
const allEntries = secondRecorder
|
|
109
|
+
? [...this.entries, ...secondRecorder.entries].sort((a, b) => a.ts.localeCompare(b.ts))
|
|
110
|
+
: this.entries;
|
|
111
|
+
|
|
112
|
+
// Re-index after merge
|
|
113
|
+
allEntries.forEach((e, i) => { e.index = i; });
|
|
114
|
+
|
|
115
|
+
const recording: VcrRecording = {
|
|
116
|
+
runId,
|
|
117
|
+
recordedAt: new Date().toISOString(),
|
|
118
|
+
entryCount: allEntries.length,
|
|
119
|
+
providers: [...new Set(allEntries.map((e) => `${e.providerName}/${e.modelName}`))],
|
|
120
|
+
entries: allEntries,
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
const vcrDir = path.join(workingDir, VCR_DIR);
|
|
124
|
+
await fs.ensureDir(vcrDir);
|
|
125
|
+
const filePath = path.join(vcrDir, `${runId}.json`);
|
|
126
|
+
await fs.writeJson(filePath, recording, { spaces: 2 });
|
|
127
|
+
return filePath;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// ─── Replay Provider ──────────────────────────────────────────────────────────
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Implements AIProvider by replaying pre-recorded responses in sequence.
|
|
135
|
+
* Every generate() call pops the next entry from the recording — no API call,
|
|
136
|
+
* no tokens, deterministic output.
|
|
137
|
+
*
|
|
138
|
+
* Note: responses are returned in strict index order, regardless of the prompt
|
|
139
|
+
* content. This works correctly as long as the pipeline makes calls in the same
|
|
140
|
+
* structural order as the recording.
|
|
141
|
+
*/
|
|
142
|
+
export class VcrReplayProvider implements AIProvider {
|
|
143
|
+
private index = 0;
|
|
144
|
+
|
|
145
|
+
constructor(private readonly recording: VcrRecording) {}
|
|
146
|
+
|
|
147
|
+
get providerName() { return "vcr-replay"; }
|
|
148
|
+
get modelName() { return this.recording.runId; }
|
|
149
|
+
|
|
150
|
+
async generate(_prompt: string, _systemInstruction?: string): Promise<string> {
|
|
151
|
+
const entry = this.recording.entries[this.index++];
|
|
152
|
+
if (!entry) {
|
|
153
|
+
throw new Error(
|
|
154
|
+
`VCR replay exhausted: all ${this.recording.entries.length} recorded ` +
|
|
155
|
+
`responses have been consumed. The pipeline made more AI calls than the recording has.`
|
|
156
|
+
);
|
|
157
|
+
}
|
|
158
|
+
return entry.response;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
get remaining() { return this.recording.entries.length - this.index; }
|
|
162
|
+
get consumed() { return this.index; }
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// ─── Loader helpers ───────────────────────────────────────────────────────────
|
|
166
|
+
|
|
167
|
+
export async function loadVcrRecording(
|
|
168
|
+
workingDir: string,
|
|
169
|
+
runId: string
|
|
170
|
+
): Promise<VcrRecording | null> {
|
|
171
|
+
const filePath = path.join(workingDir, VCR_DIR, `${runId}.json`);
|
|
172
|
+
try {
|
|
173
|
+
return await fs.readJson(filePath);
|
|
174
|
+
} catch {
|
|
175
|
+
return null;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
export interface VcrSummary {
|
|
180
|
+
runId: string;
|
|
181
|
+
recordedAt: string;
|
|
182
|
+
entryCount: number;
|
|
183
|
+
providers: string[];
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
export async function listVcrRecordings(workingDir: string): Promise<VcrSummary[]> {
|
|
187
|
+
const vcrDir = path.join(workingDir, VCR_DIR);
|
|
188
|
+
if (!(await fs.pathExists(vcrDir))) return [];
|
|
189
|
+
|
|
190
|
+
const files = (await fs.readdir(vcrDir))
|
|
191
|
+
.filter((f) => f.endsWith(".json"))
|
|
192
|
+
.sort()
|
|
193
|
+
.reverse();
|
|
194
|
+
|
|
195
|
+
const results: VcrSummary[] = [];
|
|
196
|
+
for (const file of files) {
|
|
197
|
+
try {
|
|
198
|
+
const rec: VcrRecording = await fs.readJson(path.join(vcrDir, file));
|
|
199
|
+
results.push({
|
|
200
|
+
runId: rec.runId,
|
|
201
|
+
recordedAt: rec.recordedAt,
|
|
202
|
+
entryCount: rec.entryCount,
|
|
203
|
+
providers: rec.providers,
|
|
204
|
+
});
|
|
205
|
+
} catch {
|
|
206
|
+
// skip corrupt files
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
return results;
|
|
210
|
+
}
|