ai-spec-dev 0.30.1 → 0.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,255 @@
1
+ /**
2
+ * dsl-feedback.ts — Two pipeline feedback loops for ai-spec create
3
+ *
4
+ * Loop 1 (DSL → Spec): after DSL extraction, detect sparse/incomplete DSL
5
+ * and offer a targeted spec refinement pass before codegen starts.
6
+ *
7
+ * Loop 2 (Review → DSL): after 3-pass review, detect design-level findings
8
+ * (as opposed to implementation issues) and offer to amend the spec + DSL
9
+ * so the next update/regen starts from a corrected contract.
10
+ *
11
+ * Design constraints:
12
+ * - Both loops are SKIPPED in --auto / --fast / --skip-dsl modes.
13
+ * - Zero extra AI calls until the user explicitly opts in.
14
+ * - Non-blocking: user can always skip.
15
+ */
16
+
17
+ import chalk from "chalk";
18
+ import { SpecDSL } from "./dsl-types";
19
+
20
+ // ─── Loop 1 Types ─────────────────────────────────────────────────────────────
21
+
22
+ export interface DslGap {
23
+ /** Short machine key for RunLog serialisation */
24
+ code: "sparse_model" | "missing_errors" | "generic_endpoint_desc" | "no_models_no_endpoints";
25
+ /** Human-readable message shown to the user */
26
+ message: string;
27
+ /** Concrete suggestion injected into the refinement prompt */
28
+ hint: string;
29
+ }
30
+
31
+ // ─── Loop 1: DSL Richness Assessment ─────────────────────────────────────────
32
+
33
+ /**
34
+ * Inspect a freshly-extracted DSL for common completeness gaps.
35
+ * Returns a list of DslGap objects (empty = DSL looks adequate).
36
+ *
37
+ * All checks are pure heuristics — zero AI calls.
38
+ */
39
+ export function assessDslRichness(dsl: SpecDSL): DslGap[] {
40
+ const gaps: DslGap[] = [];
41
+
42
+ // ── No endpoints AND no models ────────────────────────────────────────────
43
+ if (dsl.endpoints.length === 0 && dsl.models.length === 0) {
44
+ gaps.push({
45
+ code: "no_models_no_endpoints",
46
+ message: "DSL has no endpoints and no models — spec may be too abstract for structured extraction",
47
+ hint: "Please add explicit API endpoint definitions (method, path, request/response) and any data models that this feature requires.",
48
+ });
49
+ return gaps; // no point checking the rest
50
+ }
51
+
52
+ // ── Endpoints with very generic / short descriptions ─────────────────────
53
+ const GENERIC_DESC_KEYWORDS = ["handles", "processes", "manages", "操作", "处理", "管理"];
54
+ const GENERIC_DESC_MIN_LEN = 15;
55
+
56
+ for (const ep of dsl.endpoints) {
57
+ const desc = (ep.description ?? "").trim();
58
+ const isGeneric =
59
+ desc.length < GENERIC_DESC_MIN_LEN ||
60
+ GENERIC_DESC_KEYWORDS.some((kw) => desc.toLowerCase().startsWith(kw));
61
+
62
+ if (isGeneric) {
63
+ gaps.push({
64
+ code: "generic_endpoint_desc",
65
+ message: `Endpoint ${ep.method} ${ep.path} has a vague description: "${desc}"`,
66
+ hint: `Clarify what ${ep.method} ${ep.path} does: what inputs are required, what the success response contains, and what business rule it enforces.`,
67
+ });
68
+ }
69
+ }
70
+
71
+ // ── Endpoints with no error definitions (but spec text likely mentions them) ──
72
+ const endpointsWithoutErrors = dsl.endpoints.filter(
73
+ (ep) => !ep.errors || ep.errors.length === 0
74
+ );
75
+ if (endpointsWithoutErrors.length > 0 && dsl.endpoints.length >= 2) {
76
+ gaps.push({
77
+ code: "missing_errors",
78
+ message: `${endpointsWithoutErrors.length}/${dsl.endpoints.length} endpoints have no error definitions`,
79
+ hint: `For each endpoint, specify at least the main error cases: e.g. 400 validation errors, 401 auth failures, 404 not found, 409 conflict. Include an error code (e.g. INVALID_INPUT) and description for each.`,
80
+ });
81
+ }
82
+
83
+ // ── Models with fewer than 2 fields ──────────────────────────────────────
84
+ for (const model of dsl.models) {
85
+ if (!model.fields || model.fields.length < 2) {
86
+ gaps.push({
87
+ code: "sparse_model",
88
+ message: `Model "${model.name}" has only ${model.fields?.length ?? 0} field(s) — likely incomplete`,
89
+ hint: `List all fields for "${model.name}" with their types and whether they are required. Include at minimum an id, created_at, and the core domain fields this model needs.`,
90
+ });
91
+ }
92
+ }
93
+
94
+ return gaps;
95
+ }
96
+
97
+ // ─── Loop 1: Targeted Spec Refinement Prompt ─────────────────────────────────
98
+
99
+ /**
100
+ * Build a targeted AI refinement prompt that focuses the LLM on filling
101
+ * only the specific gaps detected by `assessDslRichness`.
102
+ */
103
+ export function buildDslGapRefinementPrompt(spec: string, gaps: DslGap[]): string {
104
+ const gapList = gaps
105
+ .map((g, i) => `${i + 1}. [${g.code}] ${g.message}\n → ${g.hint}`)
106
+ .join("\n\n");
107
+
108
+ return `The following feature spec has been structurally analysed. The DSL extracted from it was found to be incomplete in these specific areas:
109
+
110
+ ${gapList}
111
+
112
+ Your task: revise the spec below to address ONLY the gaps listed above.
113
+ - Do NOT change the overall feature scope or business logic.
114
+ - Do NOT rewrite sections that are already complete.
115
+ - Add missing error cases, clarify vague endpoint descriptions, complete sparse model field lists.
116
+ - Output ONLY the complete revised Markdown spec. No preamble, no explanation.
117
+
118
+ === Current Spec ===
119
+ ${spec}`;
120
+ }
121
+
122
+ // ─── Loop 2 Types ─────────────────────────────────────────────────────────────
123
+
124
+ export interface StructuralFinding {
125
+ /** Short label for display + RunLog */
126
+ category: "auth_design" | "model_design" | "api_contract" | "layer_violation" | "other_design";
127
+ description: string;
128
+ }
129
+
130
+ // ─── Loop 2: Review Structural Issue Classifier ───────────────────────────────
131
+
132
+ /**
133
+ * Parse a 3-pass review text to extract Pass 1 (architecture) findings
134
+ * that indicate design-level issues in the Spec/DSL — as opposed to
135
+ * implementation-level issues that belong in §9 knowledge.
136
+ *
137
+ * Returns an empty array if no structural issues are found or if the
138
+ * review score for Pass 1 is high (≥ 8), indicating overall approval.
139
+ */
140
+ export function extractStructuralFindings(reviewText: string): StructuralFinding[] {
141
+ // Split by the separator used between passes ("─────...")
142
+ const parts = reviewText.split(/─{20,}/);
143
+ // Pass 1 is always the first section
144
+ const pass1Text = parts[0] ?? "";
145
+
146
+ // If Pass 1 scored well, treat as no structural issues
147
+ const pass1Score = extractPassScore(pass1Text);
148
+ if (pass1Score !== null && pass1Score >= 8) return [];
149
+
150
+ const findings: StructuralFinding[] = [];
151
+
152
+ // ── Auth / 认证 design issues ──────────────────────────────────────────
153
+ if (
154
+ /缺少认证|missing auth|auth.*false|未加认证|鉴权.*缺|endpoint.*public.*should/i.test(pass1Text)
155
+ ) {
156
+ const match = pass1Text.match(/[^。\n]*(?:缺少认证|missing auth|auth.*false|未加认证|鉴权.*缺|endpoint.*public.*should)[^。\n]*/i);
157
+ findings.push({
158
+ category: "auth_design",
159
+ description: match ? match[0].trim() : "One or more endpoints may have incorrect authentication requirements",
160
+ });
161
+ }
162
+
163
+ // ── API contract / 接口设计 issues ────────────────────────────────────
164
+ if (
165
+ /接口设计.*问题|接口.*不合理|API design|response.*missing|request.*missing|接口.*缺少/i.test(pass1Text)
166
+ ) {
167
+ const match = pass1Text.match(/[^。\n]*(?:接口设计.*问题|接口.*不合理|API design|response.*missing|接口.*缺少)[^。\n]*/i);
168
+ findings.push({
169
+ category: "api_contract",
170
+ description: match ? match[0].trim() : "API contract design may have issues",
171
+ });
172
+ }
173
+
174
+ // ── Model / 数据模型 design issues ────────────────────────────────────
175
+ if (
176
+ /模型.*缺少字段|model.*missing field|数据结构.*问题|schema.*incomplete|字段.*missing/i.test(pass1Text)
177
+ ) {
178
+ const match = pass1Text.match(/[^。\n]*(?:模型.*缺少字段|model.*missing field|数据结构.*问题|schema.*incomplete)[^。\n]*/i);
179
+ findings.push({
180
+ category: "model_design",
181
+ description: match ? match[0].trim() : "Data model design may be incomplete",
182
+ });
183
+ }
184
+
185
+ // ── Layer separation / 层级分离 violations ────────────────────────────
186
+ if (
187
+ /层级.*违反|layer.*violation|business logic.*controller|controller.*service.*混|分层.*问题/i.test(pass1Text)
188
+ ) {
189
+ const match = pass1Text.match(/[^。\n]*(?:层级.*违反|layer.*violation|business logic.*controller|分层.*问题)[^。\n]*/i);
190
+ findings.push({
191
+ category: "layer_violation",
192
+ description: match ? match[0].trim() : "Layer separation may be violated in the generated code",
193
+ });
194
+ }
195
+
196
+ return findings;
197
+ }
198
+
199
+ /** Extract the numeric score from a single pass section. */
200
+ function extractPassScore(text: string): number | null {
201
+ const m = text.match(/Score:\s*(\d+(?:\.\d+)?)\s*\/\s*10/i);
202
+ return m ? parseFloat(m[1]) : null;
203
+ }
204
+
205
+ // ─── Loop 2: Spec Amendment Prompt ────────────────────────────────────────────
206
+
207
+ /**
208
+ * Build a prompt asking the AI to produce a minimal spec amendment
209
+ * that addresses the structural findings from the review.
210
+ *
211
+ * The amendment is a targeted addition/correction — NOT a full rewrite.
212
+ */
213
+ export function buildStructuralAmendmentPrompt(
214
+ spec: string,
215
+ findings: StructuralFinding[]
216
+ ): string {
217
+ const findingList = findings
218
+ .map((f, i) => `${i + 1}. [${f.category}] ${f.description}`)
219
+ .join("\n");
220
+
221
+ return `A code review of the feature built from this spec found the following DESIGN-LEVEL issues.
222
+ These are problems in the spec/contract itself, not in the implementation.
223
+
224
+ === Structural Findings ===
225
+ ${findingList}
226
+
227
+ Your task:
228
+ - Revise the spec below to correct the design issues listed above.
229
+ - Do NOT change the feature scope, business logic, or sections unrelated to these findings.
230
+ - Be minimal: only change what is necessary to fix the design issues.
231
+ - Output ONLY the complete revised Markdown spec. No preamble, no explanation.
232
+
233
+ === Current Spec ===
234
+ ${spec}`;
235
+ }
236
+
237
+ // ─── Display Helpers ──────────────────────────────────────────────────────────
238
+
239
+ export function printDslGaps(gaps: DslGap[]): void {
240
+ console.log(chalk.yellow("\n ⚠ DSL Completeness Check — gaps detected:"));
241
+ for (const gap of gaps) {
242
+ console.log(chalk.yellow(` · ${gap.message}`));
243
+ }
244
+ console.log(chalk.gray(" → A targeted spec refinement can fill these gaps before codegen."));
245
+ }
246
+
247
+ export function printStructuralFindings(findings: StructuralFinding[]): void {
248
+ console.log(chalk.yellow("\n ⚠ Review — structural (design-level) issues found:"));
249
+ for (const f of findings) {
250
+ const label = chalk.gray(`[${f.category}]`);
251
+ console.log(` ${label} ${f.description}`);
252
+ }
253
+ console.log(chalk.gray(" → These are contract issues in the Spec/DSL, not just implementation problems."));
254
+ console.log(chalk.gray(" → Fixing the spec now means the next run generates correct code from the start."));
255
+ }
@@ -0,0 +1,42 @@
1
+ import { createHash } from "crypto";
2
+
3
+ import { codeGenSystemPrompt } from "../prompts/codegen.prompt";
4
+ import {
5
+ reviewArchitectureSystemPrompt,
6
+ reviewImplementationSystemPrompt,
7
+ reviewImpactComplexitySystemPrompt,
8
+ } from "../prompts/codegen.prompt";
9
+ import { dslSystemPrompt } from "../prompts/dsl.prompt";
10
+ import { specPrompt } from "../prompts/spec.prompt";
11
+
12
+ /**
13
+ * Compute a short deterministic hash of the key prompt strings used in a run.
14
+ *
15
+ * Why this matters (Harness Engineering):
16
+ * When you change a prompt and re-run `ai-spec create`, the resulting RunLog
17
+ * will have a different promptHash. Cross-referencing RunLogs by promptHash
18
+ * lets you quantify whether a prompt change improved or degraded harnessScore
19
+ * without keeping a separate changelog.
20
+ *
21
+ * Coverage: codegen system prompt (TS), DSL extractor, spec generator, and all
22
+ * three review-pass prompts — these drive the vast majority of token spend and
23
+ * output variance.
24
+ *
25
+ * Returns: 8-char lowercase hex (e.g. "a3f2c1d8"). Collision probability for
26
+ * practical prompt-tweak scenarios is negligible.
27
+ */
28
+ export function computePromptHash(): string {
29
+ const segments = [
30
+ codeGenSystemPrompt,
31
+ dslSystemPrompt,
32
+ specPrompt,
33
+ reviewArchitectureSystemPrompt,
34
+ reviewImplementationSystemPrompt,
35
+ reviewImpactComplexitySystemPrompt,
36
+ ];
37
+
38
+ return createHash("sha256")
39
+ .update(segments.join("\x00")) // \x00 separator prevents segment-boundary collisions
40
+ .digest("hex")
41
+ .slice(0, 8);
42
+ }
@@ -20,6 +20,15 @@ export interface RunLog {
20
20
  provider?: string;
21
21
  model?: string;
22
22
  specPath?: string;
23
+ /**
24
+ * 8-char hex hash of the key prompt strings used in this run.
25
+ * Changes whenever any of: codegen, DSL, spec, or review prompts are edited.
26
+ * Use this to correlate RunLogs across runs and measure whether a prompt
27
+ * change improved or degraded harnessScore (Harness Engineering observability).
28
+ */
29
+ promptHash?: string;
30
+ /** Harness self-evaluation score recorded at end of `create` (0-10). */
31
+ harnessScore?: number;
23
32
  entries: LogEntry[];
24
33
  filesWritten: string[];
25
34
  errors: string[];
@@ -73,6 +82,18 @@ export class RunLogger {
73
82
  this.flush();
74
83
  }
75
84
 
85
+ /** Record the prompt hash for this run (call once at run start). */
86
+ setPromptHash(hash: string): void {
87
+ this.log.promptHash = hash;
88
+ this.flush();
89
+ }
90
+
91
+ /** Record the harness self-eval score (call once at run end). */
92
+ setHarnessScore(score: number): void {
93
+ this.log.harnessScore = score;
94
+ this.flush();
95
+ }
96
+
76
97
  fileWritten(filePath: string): void {
77
98
  if (!this.log.filesWritten.includes(filePath)) {
78
99
  this.log.filesWritten.push(filePath);
@@ -0,0 +1,241 @@
1
+ import * as fs from "fs-extra";
2
+ import * as path from "path";
3
+ import chalk from "chalk";
4
+ import { RunLog } from "./run-logger";
5
+
6
+ const LOG_DIR = ".ai-spec-logs";
7
+
8
+ // ─── Types ────────────────────────────────────────────────────────────────────
9
+
10
+ export interface TrendEntry {
11
+ runId: string;
12
+ startedAt: string;
13
+ promptHash: string | null;
14
+ harnessScore: number | null;
15
+ specPath: string | null;
16
+ provider: string | null;
17
+ model: string | null;
18
+ filesWritten: number;
19
+ totalDurationMs: number | null;
20
+ errors: number;
21
+ }
22
+
23
+ export interface PromptGroupSummary {
24
+ promptHash: string;
25
+ runs: number;
26
+ avg: number;
27
+ best: number;
28
+ worst: number;
29
+ firstSeen: string;
30
+ lastSeen: string;
31
+ /** true if this is the most recently used prompt hash */
32
+ isCurrent: boolean;
33
+ }
34
+
35
+ export interface TrendReport {
36
+ entries: TrendEntry[];
37
+ promptGroups: PromptGroupSummary[];
38
+ totalRuns: number;
39
+ }
40
+
41
+ // ─── Loader ──────────────────────────────────────────────────────────────────
42
+
43
+ /**
44
+ * Read all RunLog JSON files from `.ai-spec-logs/`, sorted newest-first.
45
+ * Silently skips unreadable / corrupt files.
46
+ */
47
+ export async function loadRunLogs(workingDir: string): Promise<RunLog[]> {
48
+ const logDir = path.join(workingDir, LOG_DIR);
49
+ if (!(await fs.pathExists(logDir))) return [];
50
+
51
+ const files = await fs.readdir(logDir);
52
+ const jsonFiles = files.filter((f) => f.endsWith(".json")).sort().reverse();
53
+
54
+ const logs: RunLog[] = [];
55
+ for (const file of jsonFiles) {
56
+ try {
57
+ const log: RunLog = await fs.readJson(path.join(logDir, file));
58
+ // only include runs that have a startedAt (minimal validity check)
59
+ if (log.runId && log.startedAt) {
60
+ logs.push(log);
61
+ }
62
+ } catch {
63
+ // corrupt file — skip silently
64
+ }
65
+ }
66
+ return logs;
67
+ }
68
+
69
+ // ─── Aggregation ─────────────────────────────────────────────────────────────
70
+
71
+ export function buildTrendReport(
72
+ logs: RunLog[],
73
+ opts: { last?: number; promptFilter?: string } = {}
74
+ ): TrendReport {
75
+ let entries: TrendEntry[] = logs.map((log) => ({
76
+ runId: log.runId,
77
+ startedAt: log.startedAt,
78
+ promptHash: log.promptHash ?? null,
79
+ harnessScore: log.harnessScore ?? null,
80
+ specPath: log.specPath ?? null,
81
+ provider: log.provider ?? null,
82
+ model: log.model ?? null,
83
+ filesWritten: log.filesWritten?.length ?? 0,
84
+ totalDurationMs: log.totalDurationMs ?? null,
85
+ errors: log.errors?.length ?? 0,
86
+ }));
87
+
88
+ // filter: only runs with a harnessScore (create runs)
89
+ entries = entries.filter((e) => e.harnessScore !== null);
90
+
91
+ // filter by prompt hash if requested
92
+ if (opts.promptFilter) {
93
+ entries = entries.filter((e) =>
94
+ e.promptHash?.startsWith(opts.promptFilter!)
95
+ );
96
+ }
97
+
98
+ // limit to last N
99
+ if (opts.last && opts.last > 0) {
100
+ entries = entries.slice(0, opts.last);
101
+ }
102
+
103
+ // build prompt group summaries (only from filtered entries)
104
+ const groupMap = new Map<string, TrendEntry[]>();
105
+ for (const e of entries) {
106
+ const key = e.promptHash ?? "(none)";
107
+ if (!groupMap.has(key)) groupMap.set(key, []);
108
+ groupMap.get(key)!.push(e);
109
+ }
110
+
111
+ // determine "current" = the prompt hash of the most recent run
112
+ const currentHash = entries[0]?.promptHash ?? null;
113
+
114
+ const promptGroups: PromptGroupSummary[] = [];
115
+ for (const [hash, group] of groupMap.entries()) {
116
+ const scores = group.map((e) => e.harnessScore as number);
117
+ promptGroups.push({
118
+ promptHash: hash,
119
+ runs: group.length,
120
+ avg: Math.round((scores.reduce((a, b) => a + b, 0) / scores.length) * 10) / 10,
121
+ best: Math.max(...scores),
122
+ worst: Math.min(...scores),
123
+ firstSeen: group[group.length - 1].startedAt,
124
+ lastSeen: group[0].startedAt,
125
+ isCurrent: hash === currentHash,
126
+ });
127
+ }
128
+
129
+ // sort groups: most recently used first
130
+ promptGroups.sort((a, b) => b.lastSeen.localeCompare(a.lastSeen));
131
+
132
+ return { entries, promptGroups, totalRuns: entries.length };
133
+ }
134
+
135
+ // ─── Display ─────────────────────────────────────────────────────────────────
136
+
137
+ function scoreBar(score: number): string {
138
+ const filled = Math.round(score);
139
+ return "█".repeat(filled) + "░".repeat(10 - filled);
140
+ }
141
+
142
+ function scoreColor(score: number, text: string): string {
143
+ if (score >= 8) return chalk.green(text);
144
+ if (score >= 6) return chalk.yellow(text);
145
+ return chalk.red(text);
146
+ }
147
+
148
+ function formatDate(iso: string): string {
149
+ return iso.slice(0, 10); // YYYY-MM-DD
150
+ }
151
+
152
+ function formatDuration(ms: number | null): string {
153
+ if (ms === null) return " — ";
154
+ const s = Math.round(ms / 1000);
155
+ if (s < 60) return `${s}s`;
156
+ return `${Math.floor(s / 60)}m${s % 60}s`;
157
+ }
158
+
159
+ function shortSpec(specPath: string | null): string {
160
+ if (!specPath) return chalk.gray("—");
161
+ return path.basename(specPath);
162
+ }
163
+
164
+ export function printTrendReport(report: TrendReport, workingDir: string): void {
165
+ const { entries, promptGroups } = report;
166
+
167
+ console.log(chalk.cyan("\n─── Harness Trend ───────────────────────────────────────────"));
168
+
169
+ if (entries.length === 0) {
170
+ console.log(chalk.gray(" No scored runs found. Run `ai-spec create` to start tracking."));
171
+ console.log(chalk.cyan("─".repeat(63)));
172
+ return;
173
+ }
174
+
175
+ // ── Prompt Version Summary ────────────────────────────────────────
176
+ if (promptGroups.length > 0) {
177
+ console.log(chalk.bold("\n Prompt Versions:\n"));
178
+
179
+ const colWidths = {
180
+ hash: 10,
181
+ runs: 5,
182
+ avg: 5,
183
+ best: 5,
184
+ worst: 5,
185
+ };
186
+
187
+ // header
188
+ console.log(
189
+ chalk.gray(
190
+ " " +
191
+ "Hash ".padEnd(colWidths.hash) + " " +
192
+ "Runs ".padStart(colWidths.runs) + " " +
193
+ " Avg" + " " +
194
+ " Best" + " " +
195
+ "Worst" + " " +
196
+ "Last seen"
197
+ )
198
+ );
199
+ console.log(chalk.gray(" " + "─".repeat(55)));
200
+
201
+ for (const g of promptGroups) {
202
+ const currentMark = g.isCurrent ? chalk.cyan(" ◀ current") : "";
203
+ const avgStr = scoreColor(g.avg, g.avg.toFixed(1).padStart(5));
204
+ const bestStr = chalk.green(g.best.toFixed(1).padStart(5));
205
+ const worstStr = g.worst < 6 ? chalk.red(g.worst.toFixed(1).padStart(5)) : chalk.yellow(g.worst.toFixed(1).padStart(5));
206
+
207
+ console.log(
208
+ " " +
209
+ chalk.white(g.promptHash.padEnd(colWidths.hash)) + " " +
210
+ chalk.gray(String(g.runs).padStart(colWidths.runs)) + " " +
211
+ avgStr + " " +
212
+ bestStr + " " +
213
+ worstStr + " " +
214
+ chalk.gray(formatDate(g.lastSeen)) +
215
+ currentMark
216
+ );
217
+ }
218
+ }
219
+
220
+ // ── Run History ───────────────────────────────────────────────────
221
+ console.log(chalk.bold("\n Run History:\n"));
222
+
223
+ for (const e of entries) {
224
+ const score = e.harnessScore as number;
225
+ const bar = scoreColor(score, `[${scoreBar(score)}]`);
226
+ const scoreStr = scoreColor(score, score.toFixed(1).padStart(4));
227
+ const hash = e.promptHash ? chalk.gray(e.promptHash) : chalk.gray("(no hash)");
228
+ const dur = chalk.gray(formatDuration(e.totalDurationMs));
229
+ const errMark = e.errors > 0 ? chalk.yellow(` ⚠${e.errors}err`) : "";
230
+ const spec = chalk.gray(shortSpec(e.specPath));
231
+
232
+ console.log(
233
+ ` ${chalk.gray(formatDate(e.startedAt))} ${bar}${scoreStr} ${hash} ${dur}${errMark} ${spec}`
234
+ );
235
+ }
236
+
237
+ // ── Footer ────────────────────────────────────────────────────────
238
+ const logRelDir = path.relative(workingDir, path.join(workingDir, LOG_DIR));
239
+ console.log(chalk.gray(`\n ${entries.length} run(s) shown · logs: ${logRelDir}/`));
240
+ console.log(chalk.cyan("─".repeat(63)));
241
+ }