ai-spec-dev 0.31.0 → 0.35.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/add-lesson.md +34 -0
- package/.claude/commands/check-layers.md +65 -0
- package/.claude/commands/installed-deps.md +35 -0
- package/.claude/commands/recall-lessons.md +40 -0
- package/.claude/commands/scan-singletons.md +45 -0
- package/.claude/commands/verify-imports.md +48 -0
- package/.claude/settings.local.json +15 -1
- package/README.md +531 -213
- package/RELEASE_LOG.md +460 -0
- package/cli/commands/config.ts +93 -0
- package/cli/commands/create.ts +1233 -0
- package/cli/commands/dashboard.ts +62 -0
- package/cli/commands/export.ts +66 -0
- package/cli/commands/init.ts +190 -0
- package/cli/commands/learn.ts +30 -0
- package/cli/commands/logs.ts +106 -0
- package/cli/commands/mock.ts +175 -0
- package/cli/commands/model.ts +156 -0
- package/cli/commands/restore.ts +22 -0
- package/cli/commands/review.ts +63 -0
- package/cli/commands/scan.ts +99 -0
- package/cli/commands/trend.ts +36 -0
- package/cli/commands/types.ts +69 -0
- package/cli/commands/update.ts +178 -0
- package/cli/commands/vcr.ts +70 -0
- package/cli/commands/workspace.ts +219 -0
- package/cli/index.ts +34 -2240
- package/cli/utils.ts +83 -0
- package/core/combined-generator.ts +13 -3
- package/core/dashboard-generator.ts +340 -0
- package/core/design-dialogue.ts +124 -0
- package/core/dsl-feedback.ts +285 -0
- package/core/error-feedback.ts +46 -2
- package/core/project-index.ts +301 -0
- package/core/reviewer.ts +84 -6
- package/core/run-logger.ts +109 -3
- package/core/run-trend.ts +261 -0
- package/core/self-evaluator.ts +139 -7
- package/core/spec-generator.ts +14 -8
- package/core/task-generator.ts +17 -0
- package/core/types-generator.ts +219 -0
- package/core/vcr.ts +210 -0
- package/dist/cli/index.js +6692 -4512
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/index.mjs +6692 -4512
- package/dist/cli/index.mjs.map +1 -1
- package/dist/index.d.mts +19 -5
- package/dist/index.d.ts +19 -5
- package/dist/index.js +420 -224
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +418 -224
- package/dist/index.mjs.map +1 -1
- package/docs-assets/purpose/architecture-overview.svg +64 -0
- package/docs-assets/purpose/create-pipeline.svg +113 -0
- package/docs-assets/purpose/task-layering.svg +74 -0
- package/package.json +6 -3
- package/prompts/codegen.prompt.ts +97 -9
- package/prompts/design.prompt.ts +59 -0
- package/prompts/spec.prompt.ts +8 -1
- package/prompts/tasks.prompt.ts +27 -2
- package/purpose.md +600 -174
- package/tests/dsl-extractor.test.ts +264 -0
- package/tests/dsl-feedback.test.ts +266 -0
- package/tests/dsl-validator.test.ts +283 -0
- package/tests/error-feedback.test.ts +292 -0
- package/tests/provider-utils.test.ts +173 -0
- package/tests/run-trend.test.ts +186 -0
- package/tests/self-evaluator.test.ts +339 -0
- package/tests/spec-assessor.test.ts +142 -0
- package/tests/task-generator.test.ts +230 -0
package/core/reviewer.ts
CHANGED
|
@@ -4,10 +4,37 @@ import * as path from "path";
|
|
|
4
4
|
import * as fs from "fs-extra";
|
|
5
5
|
import { AIProvider } from "./spec-generator";
|
|
6
6
|
import {
|
|
7
|
+
specComplianceSystemPrompt,
|
|
7
8
|
reviewArchitectureSystemPrompt,
|
|
8
9
|
reviewImplementationSystemPrompt,
|
|
9
10
|
reviewImpactComplexitySystemPrompt,
|
|
10
11
|
} from "../prompts/codegen.prompt";
|
|
12
|
+
import { CONSTITUTION_FILE } from "./constitution-generator";
|
|
13
|
+
|
|
14
|
+
// ─── Constitution Lessons Helper ──────────────────────────────────────────────
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Extract the §9 accumulated lessons section from a constitution file.
|
|
18
|
+
* Returns null if the section is absent or the file cannot be read.
|
|
19
|
+
*/
|
|
20
|
+
async function loadAccumulatedLessons(projectRoot: string): Promise<string | null> {
|
|
21
|
+
const constitutionPath = path.join(projectRoot, CONSTITUTION_FILE);
|
|
22
|
+
let content: string;
|
|
23
|
+
try {
|
|
24
|
+
content = await fs.readFile(constitutionPath, "utf-8");
|
|
25
|
+
} catch {
|
|
26
|
+
return null;
|
|
27
|
+
}
|
|
28
|
+
const marker = "## 9. 积累教训";
|
|
29
|
+
const idx = content.indexOf(marker);
|
|
30
|
+
if (idx === -1) return null;
|
|
31
|
+
// Extract from §9 header to end of file (or next top-level section)
|
|
32
|
+
const section = content.slice(idx);
|
|
33
|
+
const nextSection = section.slice(marker.length).match(/\n## \d/);
|
|
34
|
+
return nextSection
|
|
35
|
+
? section.slice(0, marker.length + nextSection.index!)
|
|
36
|
+
: section;
|
|
37
|
+
}
|
|
11
38
|
|
|
12
39
|
// ─── Review History ────────────────────────────────────────────────────────────
|
|
13
40
|
|
|
@@ -15,6 +42,7 @@ interface ReviewHistoryEntry {
|
|
|
15
42
|
date: string;
|
|
16
43
|
specFile: string;
|
|
17
44
|
score: number;
|
|
45
|
+
complianceScore?: number;
|
|
18
46
|
topIssues: string[];
|
|
19
47
|
impactLevel?: "低" | "中" | "高";
|
|
20
48
|
complexityLevel?: "低" | "中" | "高";
|
|
@@ -55,6 +83,18 @@ function extractScore(reviewText: string): number {
|
|
|
55
83
|
return match ? parseFloat(match[1]) : 0;
|
|
56
84
|
}
|
|
57
85
|
|
|
86
|
+
/** Extract compliance score from Pass 0 output (looks for "ComplianceScore: X/10") */
|
|
87
|
+
export function extractComplianceScore(complianceText: string): number {
|
|
88
|
+
const match = complianceText.match(/ComplianceScore:\s*(\d+(?:\.\d+)?)\s*\/\s*10/i);
|
|
89
|
+
return match ? parseFloat(match[1]) : 0;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/** Count missing requirements from Pass 0 output */
|
|
93
|
+
export function extractMissingCount(complianceText: string): number {
|
|
94
|
+
const summaryMatch = complianceText.match(/Missing:\s*(\d+)/i);
|
|
95
|
+
return summaryMatch ? parseInt(summaryMatch[1], 10) : 0;
|
|
96
|
+
}
|
|
97
|
+
|
|
58
98
|
/** Extract impact level from Pass 3 review ("影响等级:低/中/高") */
|
|
59
99
|
function extractImpactLevel(reviewText: string): "低" | "中" | "高" | undefined {
|
|
60
100
|
const match = reviewText.match(/影响等级[::]\s*(低|中|高)/);
|
|
@@ -126,8 +166,9 @@ export class CodeReviewer {
|
|
|
126
166
|
}
|
|
127
167
|
|
|
128
168
|
/**
|
|
129
|
-
*
|
|
130
|
-
* Pass
|
|
169
|
+
* Four-pass review:
|
|
170
|
+
* Pass 0 — spec compliance (exhaustive requirement coverage audit)
|
|
171
|
+
* Pass 1 — architecture (layer separation, contract design, auth posture)
|
|
131
172
|
* Pass 2 — implementation details (validation, error handling, edge cases)
|
|
132
173
|
* + historical issue recurrence check
|
|
133
174
|
* Pass 3 — impact assessment + code complexity
|
|
@@ -137,11 +178,43 @@ export class CodeReviewer {
|
|
|
137
178
|
codeContext: string,
|
|
138
179
|
specFile?: string
|
|
139
180
|
): Promise<string> {
|
|
140
|
-
|
|
181
|
+
// ── Pass 0: Spec Compliance (skip if no spec provided) ───────────────────
|
|
182
|
+
let complianceReview = "";
|
|
183
|
+
if (specContent && specContent.trim() && specContent !== "(No spec — review for general code quality)") {
|
|
184
|
+
console.log(chalk.gray(" Pass 0/3: Spec compliance check..."));
|
|
185
|
+
const compliancePrompt = `Check whether the implementation covers every requirement in the spec.
|
|
141
186
|
|
|
142
|
-
|
|
143
|
-
|
|
187
|
+
=== Feature Spec ===
|
|
188
|
+
${specContent}
|
|
189
|
+
|
|
190
|
+
=== Code ===
|
|
191
|
+
${codeContext}`;
|
|
192
|
+
complianceReview = await this.provider.generate(compliancePrompt, specComplianceSystemPrompt);
|
|
193
|
+
|
|
194
|
+
// Surface compliance score immediately
|
|
195
|
+
const complianceScore = extractComplianceScore(complianceReview);
|
|
196
|
+
const missingCount = extractMissingCount(complianceReview);
|
|
197
|
+
if (complianceScore > 0) {
|
|
198
|
+
const scoreColor = complianceScore >= 8 ? chalk.green : complianceScore >= 6 ? chalk.yellow : chalk.red;
|
|
199
|
+
console.log(
|
|
200
|
+
chalk.gray(" Pass 0 result: ") +
|
|
201
|
+
scoreColor(`ComplianceScore ${complianceScore}/10`) +
|
|
202
|
+
(missingCount > 0 ? chalk.red(` · ${missingCount} missing requirement(s)`) : chalk.green(" · all requirements covered"))
|
|
203
|
+
);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
144
206
|
|
|
207
|
+
console.log(chalk.gray(` Pass 1/3: Architecture review...`));
|
|
208
|
+
|
|
209
|
+
// ── Pass 1: Architecture (+ §9 lessons cross-check) ──────────────────────
|
|
210
|
+
const accumulatedLessons = await loadAccumulatedLessons(this.projectRoot);
|
|
211
|
+
const archPrompt = `Review the architecture of this change.
|
|
212
|
+
${complianceReview
|
|
213
|
+
? `\n=== Spec Compliance Report (Pass 0 — already audited, do NOT re-audit missing requirements) ===\n${complianceReview}\n`
|
|
214
|
+
: ""}
|
|
215
|
+
${accumulatedLessons
|
|
216
|
+
? `\n=== §9 历史积累教训 (Accumulated Lessons — check if any are repeated in this code) ===\n${accumulatedLessons}\n`
|
|
217
|
+
: ""}
|
|
145
218
|
=== Feature Spec ===
|
|
146
219
|
${specContent || "(No spec — review for general code quality)"}
|
|
147
220
|
|
|
@@ -189,10 +262,14 @@ ${implReview}`;
|
|
|
189
262
|
|
|
190
263
|
// ── Combine ───────────────────────────────────────────────────────────────
|
|
191
264
|
const sep = "─".repeat(52);
|
|
192
|
-
const
|
|
265
|
+
const parts = complianceReview
|
|
266
|
+
? [complianceReview, archReview, implReview, impactReview]
|
|
267
|
+
: [archReview, implReview, impactReview];
|
|
268
|
+
const combined = parts.join(`\n\n${sep}\n\n`);
|
|
193
269
|
|
|
194
270
|
// ── Persist history ───────────────────────────────────────────────────────
|
|
195
271
|
const score = extractScore(implReview) || extractScore(archReview);
|
|
272
|
+
const complianceScore = extractComplianceScore(complianceReview);
|
|
196
273
|
const topIssues = extractTopIssues(implReview);
|
|
197
274
|
const impactLevel = extractImpactLevel(impactReview);
|
|
198
275
|
const complexityLevel = extractComplexityLevel(impactReview);
|
|
@@ -201,6 +278,7 @@ ${implReview}`;
|
|
|
201
278
|
date: new Date().toISOString().slice(0, 10),
|
|
202
279
|
specFile: path.relative(this.projectRoot, specFile),
|
|
203
280
|
score,
|
|
281
|
+
...(complianceScore > 0 ? { complianceScore } : {}),
|
|
204
282
|
topIssues,
|
|
205
283
|
...(impactLevel ? { impactLevel } : {}),
|
|
206
284
|
...(complexityLevel ? { complexityLevel } : {}),
|
package/core/run-logger.ts
CHANGED
|
@@ -4,6 +4,86 @@ import chalk from "chalk";
|
|
|
4
4
|
|
|
5
5
|
const LOG_DIR = ".ai-spec-logs";
|
|
6
6
|
|
|
7
|
+
// ─── JSONL helpers ────────────────────────────────────────────────────────────
|
|
8
|
+
// Each event is synchronously appended as one JSON line to a `.jsonl` shadow
|
|
9
|
+
// file alongside the full `.json`. If the process crashes mid-run the `.json`
|
|
10
|
+
// may be empty or stale, but every line written to the `.jsonl` is durable.
|
|
11
|
+
// `loadRunLogs` (run-trend.ts) can reconstruct a RunLog from orphan `.jsonl`
|
|
12
|
+
// files for crash recovery.
|
|
13
|
+
|
|
14
|
+
function appendJsonlLine(filePath: string, record: Record<string, unknown>): void {
|
|
15
|
+
try {
|
|
16
|
+
fs.appendFileSync(filePath, JSON.stringify(record) + "\n");
|
|
17
|
+
} catch {
|
|
18
|
+
// JSONL write must never crash the pipeline
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/** Reconstruct a RunLog from a `.jsonl` file (crash recovery path). */
|
|
23
|
+
export function reconstructRunLogFromJsonl(jsonlPath: string): RunLog | null {
|
|
24
|
+
let raw: string;
|
|
25
|
+
try {
|
|
26
|
+
raw = fs.readFileSync(jsonlPath, "utf-8");
|
|
27
|
+
} catch {
|
|
28
|
+
return null;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const log: Partial<RunLog> & { entries: LogEntry[]; filesWritten: string[]; errors: string[] } = {
|
|
32
|
+
entries: [],
|
|
33
|
+
filesWritten: [],
|
|
34
|
+
errors: [],
|
|
35
|
+
runId: "",
|
|
36
|
+
startedAt: "",
|
|
37
|
+
workingDir: "",
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
for (const line of raw.split("\n")) {
|
|
41
|
+
const trimmed = line.trim();
|
|
42
|
+
if (!trimmed) continue;
|
|
43
|
+
try {
|
|
44
|
+
const rec = JSON.parse(trimmed) as Record<string, unknown>;
|
|
45
|
+
switch (rec["type"]) {
|
|
46
|
+
case "header":
|
|
47
|
+
log.runId = rec["runId"] as string;
|
|
48
|
+
log.startedAt = rec["startedAt"] as string;
|
|
49
|
+
log.workingDir = rec["workingDir"] as string;
|
|
50
|
+
if (rec["provider"]) log.provider = rec["provider"] as string;
|
|
51
|
+
if (rec["model"]) log.model = rec["model"] as string;
|
|
52
|
+
if (rec["specPath"]) log.specPath = rec["specPath"] as string;
|
|
53
|
+
break;
|
|
54
|
+
case "meta":
|
|
55
|
+
if (rec["key"] === "promptHash") log.promptHash = rec["value"] as string;
|
|
56
|
+
if (rec["key"] === "harnessScore") log.harnessScore = rec["value"] as number;
|
|
57
|
+
break;
|
|
58
|
+
case "entry":
|
|
59
|
+
log.entries.push({
|
|
60
|
+
ts: rec["ts"] as string,
|
|
61
|
+
event: rec["event"] as string,
|
|
62
|
+
...(rec["durationMs"] !== undefined ? { durationMs: rec["durationMs"] as number } : {}),
|
|
63
|
+
...(rec["data"] ? { data: rec["data"] as Record<string, unknown> } : {}),
|
|
64
|
+
});
|
|
65
|
+
break;
|
|
66
|
+
case "file":
|
|
67
|
+
if (rec["path"]) log.filesWritten.push(rec["path"] as string);
|
|
68
|
+
break;
|
|
69
|
+
case "error":
|
|
70
|
+
if (rec["message"]) log.errors.push(rec["message"] as string);
|
|
71
|
+
break;
|
|
72
|
+
case "footer":
|
|
73
|
+
if (rec["endedAt"]) log.endedAt = rec["endedAt"] as string;
|
|
74
|
+
if (rec["totalDurationMs"]) log.totalDurationMs = rec["totalDurationMs"] as number;
|
|
75
|
+
if (rec["harnessScore"]) log.harnessScore = rec["harnessScore"] as number;
|
|
76
|
+
break;
|
|
77
|
+
}
|
|
78
|
+
} catch {
|
|
79
|
+
// corrupt line — skip
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if (!log.runId || !log.startedAt) return null;
|
|
84
|
+
return log as RunLog;
|
|
85
|
+
}
|
|
86
|
+
|
|
7
87
|
// ─── Types ────────────────────────────────────────────────────────────────────
|
|
8
88
|
|
|
9
89
|
export interface LogEntry {
|
|
@@ -42,6 +122,7 @@ export class RunLogger {
|
|
|
42
122
|
private log: RunLog;
|
|
43
123
|
private readonly startMs: number;
|
|
44
124
|
private readonly logPath: string;
|
|
125
|
+
private readonly jsonlPath: string;
|
|
45
126
|
private readonly stageStartMs = new Map<string, number>();
|
|
46
127
|
|
|
47
128
|
constructor(
|
|
@@ -50,7 +131,8 @@ export class RunLogger {
|
|
|
50
131
|
meta?: { provider?: string; model?: string; specPath?: string }
|
|
51
132
|
) {
|
|
52
133
|
this.startMs = Date.now();
|
|
53
|
-
this.logPath
|
|
134
|
+
this.logPath = path.join(workingDir, LOG_DIR, `${runId}.json`);
|
|
135
|
+
this.jsonlPath = path.join(workingDir, LOG_DIR, `${runId}.jsonl`);
|
|
54
136
|
this.log = {
|
|
55
137
|
runId,
|
|
56
138
|
startedAt: new Date().toISOString(),
|
|
@@ -60,6 +142,16 @@ export class RunLogger {
|
|
|
60
142
|
filesWritten: [],
|
|
61
143
|
errors: [],
|
|
62
144
|
};
|
|
145
|
+
// Write JSONL header immediately — ensures the file exists even on early crash
|
|
146
|
+
fs.ensureDir(path.dirname(this.jsonlPath)).then(() => {
|
|
147
|
+
appendJsonlLine(this.jsonlPath, {
|
|
148
|
+
type: "header",
|
|
149
|
+
runId,
|
|
150
|
+
startedAt: this.log.startedAt,
|
|
151
|
+
workingDir,
|
|
152
|
+
...meta,
|
|
153
|
+
});
|
|
154
|
+
}).catch(() => {});
|
|
63
155
|
this.flush();
|
|
64
156
|
}
|
|
65
157
|
|
|
@@ -78,25 +170,30 @@ export class RunLogger {
|
|
|
78
170
|
const start = this.stageStartMs.get(event);
|
|
79
171
|
const durationMs = start !== undefined ? Date.now() - start : undefined;
|
|
80
172
|
this.push(`${event}:failed`, { ...data, error, durationMs });
|
|
81
|
-
|
|
173
|
+
const errorMsg = `[${event}] ${error}`;
|
|
174
|
+
this.log.errors.push(errorMsg);
|
|
175
|
+
appendJsonlLine(this.jsonlPath, { type: "error", message: errorMsg });
|
|
82
176
|
this.flush();
|
|
83
177
|
}
|
|
84
178
|
|
|
85
179
|
/** Record the prompt hash for this run (call once at run start). */
|
|
86
180
|
setPromptHash(hash: string): void {
|
|
87
181
|
this.log.promptHash = hash;
|
|
182
|
+
appendJsonlLine(this.jsonlPath, { type: "meta", key: "promptHash", value: hash });
|
|
88
183
|
this.flush();
|
|
89
184
|
}
|
|
90
185
|
|
|
91
186
|
/** Record the harness self-eval score (call once at run end). */
|
|
92
187
|
setHarnessScore(score: number): void {
|
|
93
188
|
this.log.harnessScore = score;
|
|
189
|
+
appendJsonlLine(this.jsonlPath, { type: "meta", key: "harnessScore", value: score });
|
|
94
190
|
this.flush();
|
|
95
191
|
}
|
|
96
192
|
|
|
97
193
|
fileWritten(filePath: string): void {
|
|
98
194
|
if (!this.log.filesWritten.includes(filePath)) {
|
|
99
195
|
this.log.filesWritten.push(filePath);
|
|
196
|
+
appendJsonlLine(this.jsonlPath, { type: "file", path: filePath });
|
|
100
197
|
this.flush();
|
|
101
198
|
}
|
|
102
199
|
}
|
|
@@ -104,6 +201,12 @@ export class RunLogger {
|
|
|
104
201
|
finish(): void {
|
|
105
202
|
this.log.endedAt = new Date().toISOString();
|
|
106
203
|
this.log.totalDurationMs = Date.now() - this.startMs;
|
|
204
|
+
appendJsonlLine(this.jsonlPath, {
|
|
205
|
+
type: "footer",
|
|
206
|
+
endedAt: this.log.endedAt,
|
|
207
|
+
totalDurationMs: this.log.totalDurationMs,
|
|
208
|
+
harnessScore: this.log.harnessScore,
|
|
209
|
+
});
|
|
107
210
|
this.flush();
|
|
108
211
|
}
|
|
109
212
|
|
|
@@ -123,7 +226,10 @@ export class RunLogger {
|
|
|
123
226
|
}
|
|
124
227
|
|
|
125
228
|
private push(event: string, data?: Record<string, unknown>): void {
|
|
126
|
-
|
|
229
|
+
const entry: LogEntry = { ts: new Date().toISOString(), event, ...(data ? { data } : {}) };
|
|
230
|
+
this.log.entries.push(entry);
|
|
231
|
+
// Append to JSONL synchronously — durable even on crash
|
|
232
|
+
appendJsonlLine(this.jsonlPath, { type: "entry", ...entry });
|
|
127
233
|
this.flush();
|
|
128
234
|
}
|
|
129
235
|
|
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
import * as fs from "fs-extra";
|
|
2
|
+
import * as path from "path";
|
|
3
|
+
import chalk from "chalk";
|
|
4
|
+
import { RunLog, reconstructRunLogFromJsonl } from "./run-logger";
|
|
5
|
+
|
|
6
|
+
const LOG_DIR = ".ai-spec-logs";
|
|
7
|
+
|
|
8
|
+
// ─── Types ────────────────────────────────────────────────────────────────────
|
|
9
|
+
|
|
10
|
+
export interface TrendEntry {
|
|
11
|
+
runId: string;
|
|
12
|
+
startedAt: string;
|
|
13
|
+
promptHash: string | null;
|
|
14
|
+
harnessScore: number | null;
|
|
15
|
+
specPath: string | null;
|
|
16
|
+
provider: string | null;
|
|
17
|
+
model: string | null;
|
|
18
|
+
filesWritten: number;
|
|
19
|
+
totalDurationMs: number | null;
|
|
20
|
+
errors: number;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface PromptGroupSummary {
|
|
24
|
+
promptHash: string;
|
|
25
|
+
runs: number;
|
|
26
|
+
avg: number;
|
|
27
|
+
best: number;
|
|
28
|
+
worst: number;
|
|
29
|
+
firstSeen: string;
|
|
30
|
+
lastSeen: string;
|
|
31
|
+
/** true if this is the most recently used prompt hash */
|
|
32
|
+
isCurrent: boolean;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export interface TrendReport {
|
|
36
|
+
entries: TrendEntry[];
|
|
37
|
+
promptGroups: PromptGroupSummary[];
|
|
38
|
+
totalRuns: number;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// ─── Loader ──────────────────────────────────────────────────────────────────
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Read all RunLog JSON files from `.ai-spec-logs/`, sorted newest-first.
|
|
45
|
+
* Silently skips unreadable / corrupt files.
|
|
46
|
+
*/
|
|
47
|
+
export async function loadRunLogs(workingDir: string): Promise<RunLog[]> {
|
|
48
|
+
const logDir = path.join(workingDir, LOG_DIR);
|
|
49
|
+
if (!(await fs.pathExists(logDir))) return [];
|
|
50
|
+
|
|
51
|
+
const files = await fs.readdir(logDir);
|
|
52
|
+
const jsonFiles = new Set(files.filter((f) => f.endsWith(".json")));
|
|
53
|
+
const jsonlFiles = files.filter((f) => f.endsWith(".jsonl")).sort().reverse();
|
|
54
|
+
|
|
55
|
+
const logs: RunLog[] = [];
|
|
56
|
+
const seenRunIds = new Set<string>();
|
|
57
|
+
|
|
58
|
+
// Primary path: read complete .json files (newest-first)
|
|
59
|
+
for (const file of [...jsonFiles].sort().reverse()) {
|
|
60
|
+
try {
|
|
61
|
+
const log: RunLog = await fs.readJson(path.join(logDir, file));
|
|
62
|
+
if (log.runId && log.startedAt) {
|
|
63
|
+
logs.push(log);
|
|
64
|
+
seenRunIds.add(log.runId);
|
|
65
|
+
}
|
|
66
|
+
} catch {
|
|
67
|
+
// corrupt file — skip silently
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Crash-recovery path: reconstruct from orphan .jsonl files (no matching .json)
|
|
72
|
+
for (const file of jsonlFiles) {
|
|
73
|
+
const runId = file.replace(/\.jsonl$/, "");
|
|
74
|
+
if (seenRunIds.has(runId)) continue; // already loaded via .json
|
|
75
|
+
const correspondingJson = `${runId}.json`;
|
|
76
|
+
if (jsonFiles.has(correspondingJson)) continue; // .json exists, prefer it
|
|
77
|
+
const log = reconstructRunLogFromJsonl(path.join(logDir, file));
|
|
78
|
+
if (log) {
|
|
79
|
+
logs.push(log);
|
|
80
|
+
seenRunIds.add(log.runId);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Sort newest-first by startedAt
|
|
85
|
+
logs.sort((a, b) => b.startedAt.localeCompare(a.startedAt));
|
|
86
|
+
return logs;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// ─── Aggregation ─────────────────────────────────────────────────────────────
|
|
90
|
+
|
|
91
|
+
export function buildTrendReport(
|
|
92
|
+
logs: RunLog[],
|
|
93
|
+
opts: { last?: number; promptFilter?: string } = {}
|
|
94
|
+
): TrendReport {
|
|
95
|
+
let entries: TrendEntry[] = logs.map((log) => ({
|
|
96
|
+
runId: log.runId,
|
|
97
|
+
startedAt: log.startedAt,
|
|
98
|
+
promptHash: log.promptHash ?? null,
|
|
99
|
+
harnessScore: log.harnessScore ?? null,
|
|
100
|
+
specPath: log.specPath ?? null,
|
|
101
|
+
provider: log.provider ?? null,
|
|
102
|
+
model: log.model ?? null,
|
|
103
|
+
filesWritten: log.filesWritten?.length ?? 0,
|
|
104
|
+
totalDurationMs: log.totalDurationMs ?? null,
|
|
105
|
+
errors: log.errors?.length ?? 0,
|
|
106
|
+
}));
|
|
107
|
+
|
|
108
|
+
// filter: only runs with a harnessScore (create runs)
|
|
109
|
+
entries = entries.filter((e) => e.harnessScore !== null);
|
|
110
|
+
|
|
111
|
+
// filter by prompt hash if requested
|
|
112
|
+
if (opts.promptFilter) {
|
|
113
|
+
entries = entries.filter((e) =>
|
|
114
|
+
e.promptHash?.startsWith(opts.promptFilter!)
|
|
115
|
+
);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// limit to last N
|
|
119
|
+
if (opts.last && opts.last > 0) {
|
|
120
|
+
entries = entries.slice(0, opts.last);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// build prompt group summaries (only from filtered entries)
|
|
124
|
+
const groupMap = new Map<string, TrendEntry[]>();
|
|
125
|
+
for (const e of entries) {
|
|
126
|
+
const key = e.promptHash ?? "(none)";
|
|
127
|
+
if (!groupMap.has(key)) groupMap.set(key, []);
|
|
128
|
+
groupMap.get(key)!.push(e);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// determine "current" = the prompt hash of the most recent run
|
|
132
|
+
const currentHash = entries[0]?.promptHash ?? null;
|
|
133
|
+
|
|
134
|
+
const promptGroups: PromptGroupSummary[] = [];
|
|
135
|
+
for (const [hash, group] of groupMap.entries()) {
|
|
136
|
+
const scores = group.map((e) => e.harnessScore as number);
|
|
137
|
+
promptGroups.push({
|
|
138
|
+
promptHash: hash,
|
|
139
|
+
runs: group.length,
|
|
140
|
+
avg: Math.round((scores.reduce((a, b) => a + b, 0) / scores.length) * 10) / 10,
|
|
141
|
+
best: Math.max(...scores),
|
|
142
|
+
worst: Math.min(...scores),
|
|
143
|
+
firstSeen: group[group.length - 1].startedAt,
|
|
144
|
+
lastSeen: group[0].startedAt,
|
|
145
|
+
isCurrent: hash === currentHash,
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// sort groups: most recently used first
|
|
150
|
+
promptGroups.sort((a, b) => b.lastSeen.localeCompare(a.lastSeen));
|
|
151
|
+
|
|
152
|
+
return { entries, promptGroups, totalRuns: entries.length };
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// ─── Display ─────────────────────────────────────────────────────────────────
|
|
156
|
+
|
|
157
|
+
function scoreBar(score: number): string {
|
|
158
|
+
const filled = Math.round(score);
|
|
159
|
+
return "█".repeat(filled) + "░".repeat(10 - filled);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
function scoreColor(score: number, text: string): string {
|
|
163
|
+
if (score >= 8) return chalk.green(text);
|
|
164
|
+
if (score >= 6) return chalk.yellow(text);
|
|
165
|
+
return chalk.red(text);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
function formatDate(iso: string): string {
|
|
169
|
+
return iso.slice(0, 10); // YYYY-MM-DD
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
function formatDuration(ms: number | null): string {
|
|
173
|
+
if (ms === null) return " — ";
|
|
174
|
+
const s = Math.round(ms / 1000);
|
|
175
|
+
if (s < 60) return `${s}s`;
|
|
176
|
+
return `${Math.floor(s / 60)}m${s % 60}s`;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
function shortSpec(specPath: string | null): string {
|
|
180
|
+
if (!specPath) return chalk.gray("—");
|
|
181
|
+
return path.basename(specPath);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
export function printTrendReport(report: TrendReport, workingDir: string): void {
|
|
185
|
+
const { entries, promptGroups } = report;
|
|
186
|
+
|
|
187
|
+
console.log(chalk.cyan("\n─── Harness Trend ───────────────────────────────────────────"));
|
|
188
|
+
|
|
189
|
+
if (entries.length === 0) {
|
|
190
|
+
console.log(chalk.gray(" No scored runs found. Run `ai-spec create` to start tracking."));
|
|
191
|
+
console.log(chalk.cyan("─".repeat(63)));
|
|
192
|
+
return;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// ── Prompt Version Summary ────────────────────────────────────────
|
|
196
|
+
if (promptGroups.length > 0) {
|
|
197
|
+
console.log(chalk.bold("\n Prompt Versions:\n"));
|
|
198
|
+
|
|
199
|
+
const colWidths = {
|
|
200
|
+
hash: 10,
|
|
201
|
+
runs: 5,
|
|
202
|
+
avg: 5,
|
|
203
|
+
best: 5,
|
|
204
|
+
worst: 5,
|
|
205
|
+
};
|
|
206
|
+
|
|
207
|
+
// header
|
|
208
|
+
console.log(
|
|
209
|
+
chalk.gray(
|
|
210
|
+
" " +
|
|
211
|
+
"Hash ".padEnd(colWidths.hash) + " " +
|
|
212
|
+
"Runs ".padStart(colWidths.runs) + " " +
|
|
213
|
+
" Avg" + " " +
|
|
214
|
+
" Best" + " " +
|
|
215
|
+
"Worst" + " " +
|
|
216
|
+
"Last seen"
|
|
217
|
+
)
|
|
218
|
+
);
|
|
219
|
+
console.log(chalk.gray(" " + "─".repeat(55)));
|
|
220
|
+
|
|
221
|
+
for (const g of promptGroups) {
|
|
222
|
+
const currentMark = g.isCurrent ? chalk.cyan(" ◀ current") : "";
|
|
223
|
+
const avgStr = scoreColor(g.avg, g.avg.toFixed(1).padStart(5));
|
|
224
|
+
const bestStr = chalk.green(g.best.toFixed(1).padStart(5));
|
|
225
|
+
const worstStr = g.worst < 6 ? chalk.red(g.worst.toFixed(1).padStart(5)) : chalk.yellow(g.worst.toFixed(1).padStart(5));
|
|
226
|
+
|
|
227
|
+
console.log(
|
|
228
|
+
" " +
|
|
229
|
+
chalk.white(g.promptHash.padEnd(colWidths.hash)) + " " +
|
|
230
|
+
chalk.gray(String(g.runs).padStart(colWidths.runs)) + " " +
|
|
231
|
+
avgStr + " " +
|
|
232
|
+
bestStr + " " +
|
|
233
|
+
worstStr + " " +
|
|
234
|
+
chalk.gray(formatDate(g.lastSeen)) +
|
|
235
|
+
currentMark
|
|
236
|
+
);
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// ── Run History ───────────────────────────────────────────────────
|
|
241
|
+
console.log(chalk.bold("\n Run History:\n"));
|
|
242
|
+
|
|
243
|
+
for (const e of entries) {
|
|
244
|
+
const score = e.harnessScore as number;
|
|
245
|
+
const bar = scoreColor(score, `[${scoreBar(score)}]`);
|
|
246
|
+
const scoreStr = scoreColor(score, score.toFixed(1).padStart(4));
|
|
247
|
+
const hash = e.promptHash ? chalk.gray(e.promptHash) : chalk.gray("(no hash)");
|
|
248
|
+
const dur = chalk.gray(formatDuration(e.totalDurationMs));
|
|
249
|
+
const errMark = e.errors > 0 ? chalk.yellow(` ⚠${e.errors}err`) : "";
|
|
250
|
+
const spec = chalk.gray(shortSpec(e.specPath));
|
|
251
|
+
|
|
252
|
+
console.log(
|
|
253
|
+
` ${chalk.gray(formatDate(e.startedAt))} ${bar}${scoreStr} ${hash} ${dur}${errMark} ${spec}`
|
|
254
|
+
);
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// ── Footer ────────────────────────────────────────────────────────
|
|
258
|
+
const logRelDir = path.relative(workingDir, path.join(workingDir, LOG_DIR));
|
|
259
|
+
console.log(chalk.gray(`\n ${entries.length} run(s) shown · logs: ${logRelDir}/`));
|
|
260
|
+
console.log(chalk.cyan("─".repeat(63)));
|
|
261
|
+
}
|