ai-spec-dev 0.31.0 → 0.33.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +5 -1
- package/RELEASE_LOG.md +155 -0
- package/cli/commands/config.ts +93 -0
- package/cli/commands/export.ts +66 -0
- package/cli/commands/init.ts +153 -0
- package/cli/commands/learn.ts +30 -0
- package/cli/commands/logs.ts +106 -0
- package/cli/commands/model.ts +156 -0
- package/cli/commands/restore.ts +22 -0
- package/cli/commands/review.ts +63 -0
- package/cli/commands/trend.ts +36 -0
- package/cli/commands/update.ts +178 -0
- package/cli/commands/workspace.ts +219 -0
- package/cli/index.ts +277 -0
- package/cli/utils.ts +83 -0
- package/core/dsl-feedback.ts +255 -0
- package/core/run-trend.ts +241 -0
- package/core/self-evaluator.ts +106 -2
- package/dist/cli/index.js +972 -449
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/index.mjs +972 -449
- package/dist/cli/index.mjs.map +1 -1
- package/package.json +6 -3
- package/tests/dsl-extractor.test.ts +264 -0
- package/tests/dsl-feedback.test.ts +266 -0
- package/tests/dsl-validator.test.ts +283 -0
- package/tests/error-feedback.test.ts +292 -0
- package/tests/provider-utils.test.ts +173 -0
- package/tests/run-trend.test.ts +186 -0
- package/tests/self-evaluator.test.ts +339 -0
- package/tests/spec-assessor.test.ts +142 -0
- package/tests/task-generator.test.ts +230 -0
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
import * as fs from "fs-extra";
|
|
2
|
+
import * as path from "path";
|
|
3
|
+
import chalk from "chalk";
|
|
4
|
+
import { RunLog } from "./run-logger";
|
|
5
|
+
|
|
6
|
+
const LOG_DIR = ".ai-spec-logs";
|
|
7
|
+
|
|
8
|
+
// ─── Types ────────────────────────────────────────────────────────────────────
|
|
9
|
+
|
|
10
|
+
export interface TrendEntry {
|
|
11
|
+
runId: string;
|
|
12
|
+
startedAt: string;
|
|
13
|
+
promptHash: string | null;
|
|
14
|
+
harnessScore: number | null;
|
|
15
|
+
specPath: string | null;
|
|
16
|
+
provider: string | null;
|
|
17
|
+
model: string | null;
|
|
18
|
+
filesWritten: number;
|
|
19
|
+
totalDurationMs: number | null;
|
|
20
|
+
errors: number;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface PromptGroupSummary {
|
|
24
|
+
promptHash: string;
|
|
25
|
+
runs: number;
|
|
26
|
+
avg: number;
|
|
27
|
+
best: number;
|
|
28
|
+
worst: number;
|
|
29
|
+
firstSeen: string;
|
|
30
|
+
lastSeen: string;
|
|
31
|
+
/** true if this is the most recently used prompt hash */
|
|
32
|
+
isCurrent: boolean;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export interface TrendReport {
|
|
36
|
+
entries: TrendEntry[];
|
|
37
|
+
promptGroups: PromptGroupSummary[];
|
|
38
|
+
totalRuns: number;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// ─── Loader ──────────────────────────────────────────────────────────────────
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Read all RunLog JSON files from `.ai-spec-logs/`, sorted newest-first.
|
|
45
|
+
* Silently skips unreadable / corrupt files.
|
|
46
|
+
*/
|
|
47
|
+
export async function loadRunLogs(workingDir: string): Promise<RunLog[]> {
|
|
48
|
+
const logDir = path.join(workingDir, LOG_DIR);
|
|
49
|
+
if (!(await fs.pathExists(logDir))) return [];
|
|
50
|
+
|
|
51
|
+
const files = await fs.readdir(logDir);
|
|
52
|
+
const jsonFiles = files.filter((f) => f.endsWith(".json")).sort().reverse();
|
|
53
|
+
|
|
54
|
+
const logs: RunLog[] = [];
|
|
55
|
+
for (const file of jsonFiles) {
|
|
56
|
+
try {
|
|
57
|
+
const log: RunLog = await fs.readJson(path.join(logDir, file));
|
|
58
|
+
// only include runs that have a startedAt (minimal validity check)
|
|
59
|
+
if (log.runId && log.startedAt) {
|
|
60
|
+
logs.push(log);
|
|
61
|
+
}
|
|
62
|
+
} catch {
|
|
63
|
+
// corrupt file — skip silently
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
return logs;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// ─── Aggregation ─────────────────────────────────────────────────────────────
|
|
70
|
+
|
|
71
|
+
export function buildTrendReport(
|
|
72
|
+
logs: RunLog[],
|
|
73
|
+
opts: { last?: number; promptFilter?: string } = {}
|
|
74
|
+
): TrendReport {
|
|
75
|
+
let entries: TrendEntry[] = logs.map((log) => ({
|
|
76
|
+
runId: log.runId,
|
|
77
|
+
startedAt: log.startedAt,
|
|
78
|
+
promptHash: log.promptHash ?? null,
|
|
79
|
+
harnessScore: log.harnessScore ?? null,
|
|
80
|
+
specPath: log.specPath ?? null,
|
|
81
|
+
provider: log.provider ?? null,
|
|
82
|
+
model: log.model ?? null,
|
|
83
|
+
filesWritten: log.filesWritten?.length ?? 0,
|
|
84
|
+
totalDurationMs: log.totalDurationMs ?? null,
|
|
85
|
+
errors: log.errors?.length ?? 0,
|
|
86
|
+
}));
|
|
87
|
+
|
|
88
|
+
// filter: only runs with a harnessScore (create runs)
|
|
89
|
+
entries = entries.filter((e) => e.harnessScore !== null);
|
|
90
|
+
|
|
91
|
+
// filter by prompt hash if requested
|
|
92
|
+
if (opts.promptFilter) {
|
|
93
|
+
entries = entries.filter((e) =>
|
|
94
|
+
e.promptHash?.startsWith(opts.promptFilter!)
|
|
95
|
+
);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// limit to last N
|
|
99
|
+
if (opts.last && opts.last > 0) {
|
|
100
|
+
entries = entries.slice(0, opts.last);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// build prompt group summaries (only from filtered entries)
|
|
104
|
+
const groupMap = new Map<string, TrendEntry[]>();
|
|
105
|
+
for (const e of entries) {
|
|
106
|
+
const key = e.promptHash ?? "(none)";
|
|
107
|
+
if (!groupMap.has(key)) groupMap.set(key, []);
|
|
108
|
+
groupMap.get(key)!.push(e);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// determine "current" = the prompt hash of the most recent run
|
|
112
|
+
const currentHash = entries[0]?.promptHash ?? null;
|
|
113
|
+
|
|
114
|
+
const promptGroups: PromptGroupSummary[] = [];
|
|
115
|
+
for (const [hash, group] of groupMap.entries()) {
|
|
116
|
+
const scores = group.map((e) => e.harnessScore as number);
|
|
117
|
+
promptGroups.push({
|
|
118
|
+
promptHash: hash,
|
|
119
|
+
runs: group.length,
|
|
120
|
+
avg: Math.round((scores.reduce((a, b) => a + b, 0) / scores.length) * 10) / 10,
|
|
121
|
+
best: Math.max(...scores),
|
|
122
|
+
worst: Math.min(...scores),
|
|
123
|
+
firstSeen: group[group.length - 1].startedAt,
|
|
124
|
+
lastSeen: group[0].startedAt,
|
|
125
|
+
isCurrent: hash === currentHash,
|
|
126
|
+
});
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// sort groups: most recently used first
|
|
130
|
+
promptGroups.sort((a, b) => b.lastSeen.localeCompare(a.lastSeen));
|
|
131
|
+
|
|
132
|
+
return { entries, promptGroups, totalRuns: entries.length };
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// ─── Display ─────────────────────────────────────────────────────────────────
|
|
136
|
+
|
|
137
|
+
function scoreBar(score: number): string {
|
|
138
|
+
const filled = Math.round(score);
|
|
139
|
+
return "█".repeat(filled) + "░".repeat(10 - filled);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
function scoreColor(score: number, text: string): string {
|
|
143
|
+
if (score >= 8) return chalk.green(text);
|
|
144
|
+
if (score >= 6) return chalk.yellow(text);
|
|
145
|
+
return chalk.red(text);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
function formatDate(iso: string): string {
|
|
149
|
+
return iso.slice(0, 10); // YYYY-MM-DD
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
function formatDuration(ms: number | null): string {
|
|
153
|
+
if (ms === null) return " — ";
|
|
154
|
+
const s = Math.round(ms / 1000);
|
|
155
|
+
if (s < 60) return `${s}s`;
|
|
156
|
+
return `${Math.floor(s / 60)}m${s % 60}s`;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
function shortSpec(specPath: string | null): string {
|
|
160
|
+
if (!specPath) return chalk.gray("—");
|
|
161
|
+
return path.basename(specPath);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
export function printTrendReport(report: TrendReport, workingDir: string): void {
|
|
165
|
+
const { entries, promptGroups } = report;
|
|
166
|
+
|
|
167
|
+
console.log(chalk.cyan("\n─── Harness Trend ───────────────────────────────────────────"));
|
|
168
|
+
|
|
169
|
+
if (entries.length === 0) {
|
|
170
|
+
console.log(chalk.gray(" No scored runs found. Run `ai-spec create` to start tracking."));
|
|
171
|
+
console.log(chalk.cyan("─".repeat(63)));
|
|
172
|
+
return;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// ── Prompt Version Summary ────────────────────────────────────────
|
|
176
|
+
if (promptGroups.length > 0) {
|
|
177
|
+
console.log(chalk.bold("\n Prompt Versions:\n"));
|
|
178
|
+
|
|
179
|
+
const colWidths = {
|
|
180
|
+
hash: 10,
|
|
181
|
+
runs: 5,
|
|
182
|
+
avg: 5,
|
|
183
|
+
best: 5,
|
|
184
|
+
worst: 5,
|
|
185
|
+
};
|
|
186
|
+
|
|
187
|
+
// header
|
|
188
|
+
console.log(
|
|
189
|
+
chalk.gray(
|
|
190
|
+
" " +
|
|
191
|
+
"Hash ".padEnd(colWidths.hash) + " " +
|
|
192
|
+
"Runs ".padStart(colWidths.runs) + " " +
|
|
193
|
+
" Avg" + " " +
|
|
194
|
+
" Best" + " " +
|
|
195
|
+
"Worst" + " " +
|
|
196
|
+
"Last seen"
|
|
197
|
+
)
|
|
198
|
+
);
|
|
199
|
+
console.log(chalk.gray(" " + "─".repeat(55)));
|
|
200
|
+
|
|
201
|
+
for (const g of promptGroups) {
|
|
202
|
+
const currentMark = g.isCurrent ? chalk.cyan(" ◀ current") : "";
|
|
203
|
+
const avgStr = scoreColor(g.avg, g.avg.toFixed(1).padStart(5));
|
|
204
|
+
const bestStr = chalk.green(g.best.toFixed(1).padStart(5));
|
|
205
|
+
const worstStr = g.worst < 6 ? chalk.red(g.worst.toFixed(1).padStart(5)) : chalk.yellow(g.worst.toFixed(1).padStart(5));
|
|
206
|
+
|
|
207
|
+
console.log(
|
|
208
|
+
" " +
|
|
209
|
+
chalk.white(g.promptHash.padEnd(colWidths.hash)) + " " +
|
|
210
|
+
chalk.gray(String(g.runs).padStart(colWidths.runs)) + " " +
|
|
211
|
+
avgStr + " " +
|
|
212
|
+
bestStr + " " +
|
|
213
|
+
worstStr + " " +
|
|
214
|
+
chalk.gray(formatDate(g.lastSeen)) +
|
|
215
|
+
currentMark
|
|
216
|
+
);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// ── Run History ───────────────────────────────────────────────────
|
|
221
|
+
console.log(chalk.bold("\n Run History:\n"));
|
|
222
|
+
|
|
223
|
+
for (const e of entries) {
|
|
224
|
+
const score = e.harnessScore as number;
|
|
225
|
+
const bar = scoreColor(score, `[${scoreBar(score)}]`);
|
|
226
|
+
const scoreStr = scoreColor(score, score.toFixed(1).padStart(4));
|
|
227
|
+
const hash = e.promptHash ? chalk.gray(e.promptHash) : chalk.gray("(no hash)");
|
|
228
|
+
const dur = chalk.gray(formatDuration(e.totalDurationMs));
|
|
229
|
+
const errMark = e.errors > 0 ? chalk.yellow(` ⚠${e.errors}err`) : "";
|
|
230
|
+
const spec = chalk.gray(shortSpec(e.specPath));
|
|
231
|
+
|
|
232
|
+
console.log(
|
|
233
|
+
` ${chalk.gray(formatDate(e.startedAt))} ${bar}${scoreStr} ${hash} ${dur}${errMark} ${spec}`
|
|
234
|
+
);
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// ── Footer ────────────────────────────────────────────────────────
|
|
238
|
+
const logRelDir = path.relative(workingDir, path.join(workingDir, LOG_DIR));
|
|
239
|
+
console.log(chalk.gray(`\n ${entries.length} run(s) shown · logs: ${logRelDir}/`));
|
|
240
|
+
console.log(chalk.cyan("─".repeat(63)));
|
|
241
|
+
}
|
package/core/self-evaluator.ts
CHANGED
|
@@ -18,8 +18,14 @@ export interface SelfEvalResult {
|
|
|
18
18
|
detail: {
|
|
19
19
|
endpointsTotal: number;
|
|
20
20
|
endpointLayerCovered: boolean;
|
|
21
|
+
/** Number of endpoint-layer files generated */
|
|
22
|
+
endpointLayerFiles: number;
|
|
21
23
|
modelsTotal: number;
|
|
22
24
|
modelLayerCovered: boolean;
|
|
25
|
+
/** 0-1: fraction of DSL model names found in generated file paths */
|
|
26
|
+
modelNameCoverage: number;
|
|
27
|
+
/** Number of DSL model names actually matched in file paths */
|
|
28
|
+
modelNameMatched: number;
|
|
23
29
|
filesWritten: number;
|
|
24
30
|
};
|
|
25
31
|
}
|
|
@@ -57,6 +63,32 @@ function extractReviewScore(reviewText: string): number | null {
|
|
|
57
63
|
|
|
58
64
|
// ─── Main ─────────────────────────────────────────────────────────────────────
|
|
59
65
|
|
|
66
|
+
/**
|
|
67
|
+
* Normalize a PascalCase or camelCase model name to a set of search tokens
|
|
68
|
+
* that would appear in file paths.
|
|
69
|
+
*
|
|
70
|
+
* "OrderItem" → ["orderitem", "order-item", "order_item"]
|
|
71
|
+
* "User" → ["user"]
|
|
72
|
+
*/
|
|
73
|
+
export function modelNameTokens(name: string): string[] {
|
|
74
|
+
const lower = name.toLowerCase();
|
|
75
|
+
// split on uppercase boundaries: "OrderItem" → ["order", "item"]
|
|
76
|
+
const parts = name
|
|
77
|
+
.replace(/([A-Z])/g, "-$1")
|
|
78
|
+
.toLowerCase()
|
|
79
|
+
.replace(/^-/, "")
|
|
80
|
+
.split("-")
|
|
81
|
+
.filter(Boolean);
|
|
82
|
+
|
|
83
|
+
const tokens = new Set<string>();
|
|
84
|
+
tokens.add(lower);
|
|
85
|
+
if (parts.length > 1) {
|
|
86
|
+
tokens.add(parts.join("-"));
|
|
87
|
+
tokens.add(parts.join("_"));
|
|
88
|
+
}
|
|
89
|
+
return [...tokens];
|
|
90
|
+
}
|
|
91
|
+
|
|
60
92
|
/**
|
|
61
93
|
* Run a lightweight self-evaluation at the end of `ai-spec create`.
|
|
62
94
|
*
|
|
@@ -71,6 +103,18 @@ function extractReviewScore(reviewText: string): number | null {
|
|
|
71
103
|
* | DSL Coverage | 40 % | 55 % |
|
|
72
104
|
* | Compile/Error | 30 % | 45 % |
|
|
73
105
|
* | Review Score | 30 % | — |
|
|
106
|
+
*
|
|
107
|
+
* DSL Coverage Score breakdown (0-10):
|
|
108
|
+
* Tier 1 — Layer existence (same as before):
|
|
109
|
+
* - No files generated → 0 (early exit)
|
|
110
|
+
* - Endpoints declared but no endpoint layer → -4
|
|
111
|
+
* - Models declared but no model layer → -3
|
|
112
|
+
* Tier 2 — Model name coverage (new):
|
|
113
|
+
* - coverage < 50 % → -2
|
|
114
|
+
* - coverage 50–79 % → -1
|
|
115
|
+
* - coverage ≥ 80 % → 0
|
|
116
|
+
* Tier 3 — Endpoint file adequacy (new):
|
|
117
|
+
* - ≥5 endpoints declared but only 1 endpoint-layer file → -1
|
|
74
118
|
*/
|
|
75
119
|
export function runSelfEval(opts: {
|
|
76
120
|
dsl: SpecDSL | null;
|
|
@@ -91,18 +135,55 @@ export function runSelfEval(opts: {
|
|
|
91
135
|
const endpointLayerCovered = generatedFiles.some((f) =>
|
|
92
136
|
ENDPOINT_LAYER_PATTERNS.some((p) => p.test(f))
|
|
93
137
|
);
|
|
138
|
+
const endpointLayerFiles = generatedFiles.filter((f) =>
|
|
139
|
+
ENDPOINT_LAYER_PATTERNS.some((p) => p.test(f))
|
|
140
|
+
).length;
|
|
94
141
|
const modelLayerCovered = generatedFiles.some((f) =>
|
|
95
142
|
MODEL_LAYER_PATTERNS.some((p) => p.test(f))
|
|
96
143
|
);
|
|
97
144
|
|
|
145
|
+
// ── Tier 2: Model name coverage ───────────────────────────────────────────
|
|
146
|
+
// For each DSL model, check if its name (lowercased/tokenized) appears
|
|
147
|
+
// in any generated file path. This catches "User model was declared but
|
|
148
|
+
// no user.ts / user.model.ts was generated".
|
|
149
|
+
let modelNameMatched = 0;
|
|
150
|
+
if (modelsTotal > 0 && dsl?.models) {
|
|
151
|
+
for (const model of dsl.models) {
|
|
152
|
+
const tokens = modelNameTokens(model.name);
|
|
153
|
+
const found = generatedFiles.some((f) => {
|
|
154
|
+
const lf = f.toLowerCase();
|
|
155
|
+
return tokens.some((t) => lf.includes(t));
|
|
156
|
+
});
|
|
157
|
+
if (found) modelNameMatched++;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
const modelNameCoverage = modelsTotal > 0 ? modelNameMatched / modelsTotal : 1;
|
|
161
|
+
|
|
162
|
+
// ── Compute DSL Coverage Score ────────────────────────────────────────────
|
|
98
163
|
let dslCoverageScore = 10;
|
|
164
|
+
|
|
99
165
|
if (generatedFiles.length === 0) {
|
|
100
166
|
dslCoverageScore = 0;
|
|
101
167
|
} else {
|
|
168
|
+
// Tier 1: layer existence
|
|
102
169
|
if (endpointsTotal > 0 && !endpointLayerCovered) dslCoverageScore -= 4;
|
|
103
170
|
if (modelsTotal > 0 && !modelLayerCovered) dslCoverageScore -= 3;
|
|
171
|
+
|
|
172
|
+
// Tier 2: model name coverage (only meaningful when model layer exists)
|
|
173
|
+
if (modelsTotal > 0 && modelLayerCovered) {
|
|
174
|
+
if (modelNameCoverage < 0.5) dslCoverageScore -= 2;
|
|
175
|
+
else if (modelNameCoverage < 0.8) dslCoverageScore -= 1;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// Tier 3: endpoint file adequacy (many endpoints, very few files)
|
|
179
|
+
if (endpointsTotal >= 5 && endpointLayerCovered && endpointLayerFiles < 2) {
|
|
180
|
+
dslCoverageScore -= 1;
|
|
181
|
+
}
|
|
104
182
|
}
|
|
105
183
|
|
|
184
|
+
// clamp to [0, 10]
|
|
185
|
+
dslCoverageScore = Math.max(0, Math.min(10, dslCoverageScore));
|
|
186
|
+
|
|
106
187
|
// ── Compile Score ─────────────────────────────────────────────────────────
|
|
107
188
|
// 10 = clean pass, 5 = error feedback ran but didn't fully clear / was skipped
|
|
108
189
|
const compileScore = compilePassed ? 10 : 5;
|
|
@@ -124,8 +205,11 @@ export function runSelfEval(opts: {
|
|
|
124
205
|
detail: {
|
|
125
206
|
endpointsTotal,
|
|
126
207
|
endpointLayerCovered,
|
|
208
|
+
endpointLayerFiles,
|
|
127
209
|
modelsTotal,
|
|
128
210
|
modelLayerCovered,
|
|
211
|
+
modelNameCoverage: Math.round(modelNameCoverage * 100) / 100,
|
|
212
|
+
modelNameMatched,
|
|
129
213
|
filesWritten: generatedFiles.length,
|
|
130
214
|
},
|
|
131
215
|
};
|
|
@@ -138,6 +222,9 @@ export function runSelfEval(opts: {
|
|
|
138
222
|
compileScore,
|
|
139
223
|
reviewScore: reviewScore ?? undefined,
|
|
140
224
|
promptHash,
|
|
225
|
+
modelNameCoverage: result.detail.modelNameCoverage,
|
|
226
|
+
modelNameMatched: result.detail.modelNameMatched,
|
|
227
|
+
endpointLayerFiles: result.detail.endpointLayerFiles,
|
|
141
228
|
});
|
|
142
229
|
|
|
143
230
|
return result;
|
|
@@ -161,12 +248,29 @@ export function printSelfEval(result: SelfEvalResult): void {
|
|
|
161
248
|
? `Review: ${result.reviewScore}/10`
|
|
162
249
|
: chalk.gray("Review: skipped");
|
|
163
250
|
|
|
251
|
+
// Model coverage tag (only shown when there are declared models)
|
|
252
|
+
let modelCoverageTag = "";
|
|
253
|
+
if (result.detail.modelsTotal > 0) {
|
|
254
|
+
const pct = Math.round(result.detail.modelNameCoverage * 100);
|
|
255
|
+
const tag = `Models: ${result.detail.modelNameMatched}/${result.detail.modelsTotal} (${pct}%)`;
|
|
256
|
+
modelCoverageTag = pct >= 80
|
|
257
|
+
? chalk.green(tag)
|
|
258
|
+
: pct >= 50
|
|
259
|
+
? chalk.yellow(tag)
|
|
260
|
+
: chalk.red(tag);
|
|
261
|
+
}
|
|
262
|
+
|
|
164
263
|
console.log(chalk.cyan("\n─── Harness Self-Eval ───────────────────────────"));
|
|
165
264
|
console.log(` Score : ${scoreColor(`[${bar}] ${result.harnessScore}/10`)}`);
|
|
166
265
|
console.log(
|
|
167
|
-
` DSL : ${scoreColor(result.dslCoverageScore + "/10")} ` +
|
|
266
|
+
` DSL : ${scoreColor(String(result.dslCoverageScore) + "/10")} ` +
|
|
168
267
|
`Compile: ${compileTag} ${reviewTag}`
|
|
169
268
|
);
|
|
269
|
+
if (modelCoverageTag) {
|
|
270
|
+
console.log(` Detail : ${modelCoverageTag} ` +
|
|
271
|
+
chalk.gray(`Endpoints: ${result.detail.endpointsTotal} Files: ${result.detail.filesWritten}`)
|
|
272
|
+
);
|
|
273
|
+
}
|
|
170
274
|
console.log(chalk.gray(` Prompt : ${result.promptHash}`));
|
|
171
|
-
console.log(chalk.
|
|
275
|
+
console.log(chalk.cyan("─".repeat(49)));
|
|
172
276
|
}
|