ai-spec-dev 0.31.0 → 0.35.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/add-lesson.md +34 -0
- package/.claude/commands/check-layers.md +65 -0
- package/.claude/commands/installed-deps.md +35 -0
- package/.claude/commands/recall-lessons.md +40 -0
- package/.claude/commands/scan-singletons.md +45 -0
- package/.claude/commands/verify-imports.md +48 -0
- package/.claude/settings.local.json +15 -1
- package/README.md +531 -213
- package/RELEASE_LOG.md +460 -0
- package/cli/commands/config.ts +93 -0
- package/cli/commands/create.ts +1233 -0
- package/cli/commands/dashboard.ts +62 -0
- package/cli/commands/export.ts +66 -0
- package/cli/commands/init.ts +190 -0
- package/cli/commands/learn.ts +30 -0
- package/cli/commands/logs.ts +106 -0
- package/cli/commands/mock.ts +175 -0
- package/cli/commands/model.ts +156 -0
- package/cli/commands/restore.ts +22 -0
- package/cli/commands/review.ts +63 -0
- package/cli/commands/scan.ts +99 -0
- package/cli/commands/trend.ts +36 -0
- package/cli/commands/types.ts +69 -0
- package/cli/commands/update.ts +178 -0
- package/cli/commands/vcr.ts +70 -0
- package/cli/commands/workspace.ts +219 -0
- package/cli/index.ts +34 -2240
- package/cli/utils.ts +83 -0
- package/core/combined-generator.ts +13 -3
- package/core/dashboard-generator.ts +340 -0
- package/core/design-dialogue.ts +124 -0
- package/core/dsl-feedback.ts +285 -0
- package/core/error-feedback.ts +46 -2
- package/core/project-index.ts +301 -0
- package/core/reviewer.ts +84 -6
- package/core/run-logger.ts +109 -3
- package/core/run-trend.ts +261 -0
- package/core/self-evaluator.ts +139 -7
- package/core/spec-generator.ts +14 -8
- package/core/task-generator.ts +17 -0
- package/core/types-generator.ts +219 -0
- package/core/vcr.ts +210 -0
- package/dist/cli/index.js +6692 -4512
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/index.mjs +6692 -4512
- package/dist/cli/index.mjs.map +1 -1
- package/dist/index.d.mts +19 -5
- package/dist/index.d.ts +19 -5
- package/dist/index.js +420 -224
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +418 -224
- package/dist/index.mjs.map +1 -1
- package/docs-assets/purpose/architecture-overview.svg +64 -0
- package/docs-assets/purpose/create-pipeline.svg +113 -0
- package/docs-assets/purpose/task-layering.svg +74 -0
- package/package.json +6 -3
- package/prompts/codegen.prompt.ts +97 -9
- package/prompts/design.prompt.ts +59 -0
- package/prompts/spec.prompt.ts +8 -1
- package/prompts/tasks.prompt.ts +27 -2
- package/purpose.md +600 -174
- package/tests/dsl-extractor.test.ts +264 -0
- package/tests/dsl-feedback.test.ts +266 -0
- package/tests/dsl-validator.test.ts +283 -0
- package/tests/error-feedback.test.ts +292 -0
- package/tests/provider-utils.test.ts +173 -0
- package/tests/run-trend.test.ts +186 -0
- package/tests/self-evaluator.test.ts +339 -0
- package/tests/spec-assessor.test.ts +142 -0
- package/tests/task-generator.test.ts +230 -0
package/cli/utils.ts
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import * as path from "path";
|
|
2
|
+
import * as fs from "fs-extra";
|
|
3
|
+
import chalk from "chalk";
|
|
4
|
+
import { input, select } from "@inquirer/prompts";
|
|
5
|
+
import { CodeGenMode } from "../core/code-generator";
|
|
6
|
+
import { ENV_KEY_MAP } from "../core/spec-generator";
|
|
7
|
+
import { getSavedKey, saveKey, KEY_STORE_FILE } from "../core/key-store";
|
|
8
|
+
|
|
9
|
+
// ─── Config ───────────────────────────────────────────────────────────────────
|
|
10
|
+
|
|
11
|
+
export interface AiSpecConfig {
|
|
12
|
+
provider?: string;
|
|
13
|
+
model?: string;
|
|
14
|
+
codegen?: CodeGenMode;
|
|
15
|
+
codegenProvider?: string;
|
|
16
|
+
codegenModel?: string;
|
|
17
|
+
/** Minimum overall spec score (1-10) required to pass Approval Gate. 0 = disabled (default). */
|
|
18
|
+
minSpecScore?: number;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export const CONFIG_FILE = ".ai-spec.json";
|
|
22
|
+
|
|
23
|
+
export async function loadConfig(dir: string): Promise<AiSpecConfig> {
|
|
24
|
+
const p = path.join(dir, CONFIG_FILE);
|
|
25
|
+
if (await fs.pathExists(p)) {
|
|
26
|
+
return fs.readJson(p);
|
|
27
|
+
}
|
|
28
|
+
return {};
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// ─── API Key Resolution ───────────────────────────────────────────────────────
|
|
32
|
+
|
|
33
|
+
export async function resolveApiKey(
|
|
34
|
+
providerName: string,
|
|
35
|
+
cliKey?: string
|
|
36
|
+
): Promise<string> {
|
|
37
|
+
if (cliKey) return cliKey;
|
|
38
|
+
|
|
39
|
+
const envVar = ENV_KEY_MAP[providerName];
|
|
40
|
+
if (envVar && process.env[envVar]) return process.env[envVar]!;
|
|
41
|
+
|
|
42
|
+
const savedKey = await getSavedKey(providerName);
|
|
43
|
+
if (savedKey) {
|
|
44
|
+
const masked = savedKey.slice(0, 6) + "..." + savedKey.slice(-4);
|
|
45
|
+
const choice = await select({
|
|
46
|
+
message: `${providerName} API key (saved: ${masked}):`,
|
|
47
|
+
choices: [
|
|
48
|
+
{ name: "Use saved key", value: "reuse" },
|
|
49
|
+
{ name: "Enter a new key", value: "new" },
|
|
50
|
+
],
|
|
51
|
+
});
|
|
52
|
+
if (choice === "reuse") return savedKey;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const newKey = await input({
|
|
56
|
+
message: `Enter your ${providerName} API key${envVar ? ` (or set ${envVar} env var)` : ""}:`,
|
|
57
|
+
validate: (v) => v.trim().length > 0 || "API key cannot be empty",
|
|
58
|
+
});
|
|
59
|
+
await saveKey(providerName, newKey.trim());
|
|
60
|
+
console.log(chalk.gray(` Key saved to ${KEY_STORE_FILE}`));
|
|
61
|
+
return newKey.trim();
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// ─── Banner ───────────────────────────────────────────────────────────────────
|
|
65
|
+
|
|
66
|
+
export function printBanner(opts: {
|
|
67
|
+
specProvider: string;
|
|
68
|
+
specModel: string;
|
|
69
|
+
codegenMode: string;
|
|
70
|
+
codegenProvider: string;
|
|
71
|
+
codegenModel: string;
|
|
72
|
+
}) {
|
|
73
|
+
console.log(chalk.blue("\n" + "─".repeat(52)));
|
|
74
|
+
console.log(chalk.bold(" ai-spec — AI-driven Development Orchestrator"));
|
|
75
|
+
console.log(chalk.blue("─".repeat(52)));
|
|
76
|
+
console.log(chalk.gray(` Spec : ${opts.specProvider} / ${opts.specModel}`));
|
|
77
|
+
console.log(
|
|
78
|
+
chalk.gray(
|
|
79
|
+
` Codegen : ${opts.codegenMode} (${opts.codegenProvider} / ${opts.codegenModel})`
|
|
80
|
+
)
|
|
81
|
+
);
|
|
82
|
+
console.log(chalk.blue("─".repeat(52) + "\n"));
|
|
83
|
+
}
|
|
@@ -20,18 +20,28 @@ const tasksInstruction = `
|
|
|
20
20
|
After outputting the complete spec above, append EXACTLY this line on its own (no extra text before or after it):
|
|
21
21
|
${TASKS_SEPARATOR}
|
|
22
22
|
Then output a valid JSON array of implementation tasks. Each element must have these exact fields:
|
|
23
|
-
{"id":"TASK-001","title":"...","description":"1-2 sentences, specific","layer":"data|infra|service|api|test","filesToTouch":["src/..."],"acceptanceCriteria":["
|
|
23
|
+
{"id":"TASK-001","title":"...","description":"1-2 sentences, specific","layer":"data|infra|service|api|test","filesToTouch":["src/..."],"acceptanceCriteria":["behavioral condition"],"verificationSteps":["concrete runnable check → expected result"],"dependencies":[],"priority":"high|medium|low"}
|
|
24
|
+
verificationSteps rules: each step is a specific command or action with observable expected output (e.g. "POST /api/orders → 201 {id, status:'pending'}"). At least 2 per task, max 5. Never vague.
|
|
24
25
|
Layer order: data → infra → service → api → test. 4-10 tasks total. filesToTouch must use real paths from the project context.`;
|
|
25
26
|
|
|
26
27
|
export async function generateSpecWithTasks(
|
|
27
28
|
provider: AIProvider,
|
|
28
29
|
idea: string,
|
|
29
|
-
context?: ProjectContext
|
|
30
|
+
context?: ProjectContext,
|
|
31
|
+
architectureDecision?: string
|
|
30
32
|
): Promise<{ spec: string; tasks: SpecTask[] }> {
|
|
31
33
|
// Use buildTaskPrompt to get the full verified-inventory context,
|
|
32
34
|
// then prepend the idea so the spec generator also sees it.
|
|
33
35
|
const contextBlock = buildTaskPrompt("", context).trim();
|
|
34
|
-
|
|
36
|
+
|
|
37
|
+
const parts: string[] = [idea];
|
|
38
|
+
if (architectureDecision) {
|
|
39
|
+
parts.push(
|
|
40
|
+
`\n=== Architecture Decision (MUST follow this approach in the spec) ===\n${architectureDecision}`
|
|
41
|
+
);
|
|
42
|
+
}
|
|
43
|
+
if (contextBlock) parts.push(contextBlock);
|
|
44
|
+
const fullPrompt = parts.join("\n\n");
|
|
35
45
|
|
|
36
46
|
const combinedSystemPrompt = specPrompt + tasksInstruction;
|
|
37
47
|
const raw = await provider.generate(fullPrompt, combinedSystemPrompt);
|
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
import { RunLog } from "./run-logger";
|
|
2
|
+
import { buildTrendReport } from "./run-trend";
|
|
3
|
+
import { computePromptHash } from "./prompt-hasher";
|
|
4
|
+
|
|
5
|
+
// ─── SVG chart helpers ────────────────────────────────────────────────────────
|
|
6
|
+
|
|
7
|
+
function scoreColor(score: number): string {
|
|
8
|
+
if (score >= 8) return "#22c55e"; // green
|
|
9
|
+
if (score >= 6) return "#f59e0b"; // amber
|
|
10
|
+
return "#ef4444"; // red
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
function renderSparkline(scores: number[], width = 400, height = 60): string {
|
|
14
|
+
if (scores.length < 2) return `<svg width="${width}" height="${height}"></svg>`;
|
|
15
|
+
const pad = 6;
|
|
16
|
+
const w = width - pad * 2;
|
|
17
|
+
const h = height - pad * 2;
|
|
18
|
+
const max = Math.max(...scores, 10);
|
|
19
|
+
const min = Math.min(...scores, 0);
|
|
20
|
+
const range = max - min || 1;
|
|
21
|
+
|
|
22
|
+
const points = scores.map((s, i) => {
|
|
23
|
+
const x = pad + (i / (scores.length - 1)) * w;
|
|
24
|
+
const y = pad + h - ((s - min) / range) * h;
|
|
25
|
+
return `${x.toFixed(1)},${y.toFixed(1)}`;
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
const lastX = parseFloat(points[points.length - 1].split(",")[0]);
|
|
29
|
+
const lastY = parseFloat(points[points.length - 1].split(",")[1]);
|
|
30
|
+
const lastScore = scores[scores.length - 1];
|
|
31
|
+
|
|
32
|
+
return `<svg width="${width}" height="${height}" viewBox="0 0 ${width} ${height}" xmlns="http://www.w3.org/2000/svg">
|
|
33
|
+
<polyline points="${points.join(" ")}" fill="none" stroke="${scoreColor(lastScore)}" stroke-width="2" stroke-linejoin="round"/>
|
|
34
|
+
<circle cx="${lastX}" cy="${lastY}" r="3" fill="${scoreColor(lastScore)}"/>
|
|
35
|
+
</svg>`;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function renderBarChart(
|
|
39
|
+
entries: { label: string; value: number; color?: string }[],
|
|
40
|
+
maxWidth = 280
|
|
41
|
+
): string {
|
|
42
|
+
const maxVal = Math.max(...entries.map((e) => e.value), 1);
|
|
43
|
+
const rows = entries
|
|
44
|
+
.map((e) => {
|
|
45
|
+
const pct = (e.value / maxVal) * maxWidth;
|
|
46
|
+
const color = e.color ?? "#6366f1";
|
|
47
|
+
return `
|
|
48
|
+
<div style="display:flex;align-items:center;gap:8px;margin:4px 0">
|
|
49
|
+
<div style="width:110px;font-size:11px;color:#94a3b8;text-align:right;flex-shrink:0;overflow:hidden;text-overflow:ellipsis;white-space:nowrap" title="${e.label}">${e.label}</div>
|
|
50
|
+
<div style="height:14px;width:${pct.toFixed(0)}px;background:${color};border-radius:2px;min-width:2px"></div>
|
|
51
|
+
<div style="font-size:11px;color:#e2e8f0">${e.value.toFixed(1)}</div>
|
|
52
|
+
</div>`;
|
|
53
|
+
})
|
|
54
|
+
.join("");
|
|
55
|
+
return `<div style="margin:0">${rows}</div>`;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// ─── Data preparation ─────────────────────────────────────────────────────────
|
|
59
|
+
|
|
60
|
+
interface DashboardData {
|
|
61
|
+
totalRuns: number;
|
|
62
|
+
scoredRuns: number;
|
|
63
|
+
avgScore: number | null;
|
|
64
|
+
compilePassRate: number | null;
|
|
65
|
+
currentPromptHash: string;
|
|
66
|
+
recentScores: { runId: string; date: string; score: number; hash: string }[];
|
|
67
|
+
promptGroups: { hash: string; runs: number; avg: number; best: number; worst: number; isCurrent: boolean }[];
|
|
68
|
+
stageDurations: { stage: string; avgMs: number }[];
|
|
69
|
+
topErrors: { message: string; count: number }[];
|
|
70
|
+
lastRunAt: string | null;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function prepareDashboardData(logs: RunLog[]): DashboardData {
|
|
74
|
+
const currentHash = computePromptHash();
|
|
75
|
+
const report = buildTrendReport(logs, { last: 50 });
|
|
76
|
+
|
|
77
|
+
// Recent scored runs (last 30)
|
|
78
|
+
const recentScores = logs
|
|
79
|
+
.filter((l) => l.harnessScore !== undefined)
|
|
80
|
+
.slice(0, 30)
|
|
81
|
+
.reverse()
|
|
82
|
+
.map((l) => ({
|
|
83
|
+
runId: l.runId,
|
|
84
|
+
date: l.startedAt.slice(0, 10),
|
|
85
|
+
score: l.harnessScore!,
|
|
86
|
+
hash: l.promptHash ?? "(no hash)",
|
|
87
|
+
}));
|
|
88
|
+
|
|
89
|
+
// Stage duration aggregation
|
|
90
|
+
const stageAccum: Record<string, { total: number; count: number }> = {};
|
|
91
|
+
for (const log of logs.slice(0, 20)) {
|
|
92
|
+
const stages: Record<string, number> = {};
|
|
93
|
+
for (const entry of log.entries ?? []) {
|
|
94
|
+
if (entry.event.endsWith(":done") || entry.event.endsWith(":failed")) {
|
|
95
|
+
const stageName = entry.event.replace(/:done$|:failed$/, "");
|
|
96
|
+
const ms = entry.data?.durationMs;
|
|
97
|
+
if (typeof ms === "number") stages[stageName] = ms;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
for (const [stage, ms] of Object.entries(stages)) {
|
|
101
|
+
if (!stageAccum[stage]) stageAccum[stage] = { total: 0, count: 0 };
|
|
102
|
+
stageAccum[stage].total += ms;
|
|
103
|
+
stageAccum[stage].count++;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
const stageDurations = Object.entries(stageAccum)
|
|
107
|
+
.map(([stage, { total, count }]) => ({ stage, avgMs: total / count }))
|
|
108
|
+
.sort((a, b) => b.avgMs - a.avgMs)
|
|
109
|
+
.slice(0, 8);
|
|
110
|
+
|
|
111
|
+
// Top errors
|
|
112
|
+
const errorCounts: Record<string, number> = {};
|
|
113
|
+
for (const log of logs) {
|
|
114
|
+
for (const err of log.errors ?? []) {
|
|
115
|
+
const key = err.slice(0, 80);
|
|
116
|
+
errorCounts[key] = (errorCounts[key] ?? 0) + 1;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
const topErrors = Object.entries(errorCounts)
|
|
120
|
+
.sort((a, b) => b[1] - a[1])
|
|
121
|
+
.slice(0, 5)
|
|
122
|
+
.map(([message, count]) => ({ message, count }));
|
|
123
|
+
|
|
124
|
+
// Compile pass rate (from error_feedback stage)
|
|
125
|
+
let compilePassed = 0, compileTotal = 0;
|
|
126
|
+
for (const log of logs) {
|
|
127
|
+
const feedback = log.entries?.find((e) => e.event === "error_feedback:done" || e.event === "error_feedback:failed");
|
|
128
|
+
if (feedback) {
|
|
129
|
+
compileTotal++;
|
|
130
|
+
if (feedback.event.endsWith(":done")) compilePassed++;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const scoredLogs = logs.filter((l) => l.harnessScore !== undefined);
|
|
135
|
+
const avgScore = scoredLogs.length > 0
|
|
136
|
+
? scoredLogs.reduce((s, l) => s + l.harnessScore!, 0) / scoredLogs.length
|
|
137
|
+
: null;
|
|
138
|
+
|
|
139
|
+
return {
|
|
140
|
+
totalRuns: logs.length,
|
|
141
|
+
scoredRuns: scoredLogs.length,
|
|
142
|
+
avgScore,
|
|
143
|
+
compilePassRate: compileTotal > 0 ? compilePassed / compileTotal : null,
|
|
144
|
+
currentPromptHash: currentHash,
|
|
145
|
+
recentScores,
|
|
146
|
+
promptGroups: report.promptGroups.map((g) => ({
|
|
147
|
+
hash: g.promptHash,
|
|
148
|
+
runs: g.runs,
|
|
149
|
+
avg: g.avg,
|
|
150
|
+
best: g.best,
|
|
151
|
+
worst: g.worst,
|
|
152
|
+
isCurrent: g.isCurrent,
|
|
153
|
+
})),
|
|
154
|
+
stageDurations,
|
|
155
|
+
topErrors,
|
|
156
|
+
lastRunAt: logs[0]?.startedAt ?? null,
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// ─── HTML renderer ────────────────────────────────────────────────────────────
|
|
161
|
+
|
|
162
|
+
function renderHtml(data: DashboardData, generatedAt: string, totalLogsAnalyzed: number): string {
|
|
163
|
+
const sparkline = renderSparkline(data.recentScores.map((r) => r.score));
|
|
164
|
+
|
|
165
|
+
const stageBar = renderBarChart(
|
|
166
|
+
data.stageDurations.map((s) => ({
|
|
167
|
+
label: s.stage.replace(/_/g, " "),
|
|
168
|
+
value: Math.round(s.avgMs / 100) / 10, // seconds
|
|
169
|
+
color: "#6366f1",
|
|
170
|
+
})),
|
|
171
|
+
240
|
|
172
|
+
);
|
|
173
|
+
|
|
174
|
+
const promptRows = data.promptGroups
|
|
175
|
+
.map((g) => {
|
|
176
|
+
const isCurrent = g.isCurrent;
|
|
177
|
+
const badge = isCurrent ? `<span style="background:#4f46e5;color:#fff;font-size:9px;padding:1px 6px;border-radius:9px;margin-left:6px">current</span>` : "";
|
|
178
|
+
const avgColor = scoreColor(g.avg);
|
|
179
|
+
return `<tr style="${isCurrent ? "background:#1e1b4b" : ""}">
|
|
180
|
+
<td style="font-family:monospace;font-size:12px;color:#a5b4fc">${g.hash}${badge}</td>
|
|
181
|
+
<td style="text-align:center;color:#94a3b8">${g.runs}</td>
|
|
182
|
+
<td style="text-align:center;color:${avgColor};font-weight:600">${g.avg.toFixed(1)}</td>
|
|
183
|
+
<td style="text-align:center;color:#22c55e">${g.best.toFixed(1)}</td>
|
|
184
|
+
<td style="text-align:center;color:#ef4444">${g.worst.toFixed(1)}</td>
|
|
185
|
+
</tr>`;
|
|
186
|
+
})
|
|
187
|
+
.join("");
|
|
188
|
+
|
|
189
|
+
const recentRows = data.recentScores
|
|
190
|
+
.slice()
|
|
191
|
+
.reverse()
|
|
192
|
+
.slice(-10)
|
|
193
|
+
.map((r) => {
|
|
194
|
+
const color = scoreColor(r.score);
|
|
195
|
+
const bar = "█".repeat(Math.round(r.score)) + "░".repeat(10 - Math.round(r.score));
|
|
196
|
+
return `<tr>
|
|
197
|
+
<td style="font-family:monospace;font-size:11px;color:#64748b">${r.date}</td>
|
|
198
|
+
<td style="font-family:monospace;font-size:11px;color:#94a3b8">${r.runId.slice(-12)}</td>
|
|
199
|
+
<td style="color:${color};font-size:12px;font-family:monospace">${bar}</td>
|
|
200
|
+
<td style="text-align:right;color:${color};font-weight:600">${r.score.toFixed(1)}</td>
|
|
201
|
+
</tr>`;
|
|
202
|
+
})
|
|
203
|
+
.join("");
|
|
204
|
+
|
|
205
|
+
const avgDisplay = data.avgScore !== null
|
|
206
|
+
? `<span style="color:${scoreColor(data.avgScore)}">${data.avgScore.toFixed(1)}</span>`
|
|
207
|
+
: `<span style="color:#475569">—</span>`;
|
|
208
|
+
|
|
209
|
+
const compileDisplay = data.compilePassRate !== null
|
|
210
|
+
? `<span style="color:${data.compilePassRate >= 0.8 ? "#22c55e" : "#f59e0b"}">${Math.round(data.compilePassRate * 100)}%</span>`
|
|
211
|
+
: `<span style="color:#475569">—</span>`;
|
|
212
|
+
|
|
213
|
+
const errorRows = data.topErrors.length > 0
|
|
214
|
+
? data.topErrors.map((e) =>
|
|
215
|
+
`<div style="display:flex;gap:8px;align-items:flex-start;margin:4px 0">
|
|
216
|
+
<span style="color:#ef4444;font-weight:600;flex-shrink:0">${e.count}×</span>
|
|
217
|
+
<span style="color:#94a3b8;font-size:11px;font-family:monospace;word-break:break-all">${e.message.replace(/</g, "<")}</span>
|
|
218
|
+
</div>`
|
|
219
|
+
).join("")
|
|
220
|
+
: `<div style="color:#475569;font-size:12px">No errors recorded</div>`;
|
|
221
|
+
|
|
222
|
+
return `<!DOCTYPE html>
|
|
223
|
+
<html lang="en">
|
|
224
|
+
<head>
|
|
225
|
+
<meta charset="UTF-8">
|
|
226
|
+
<meta name="viewport" content="width=device-width,initial-scale=1">
|
|
227
|
+
<title>ai-spec Harness Dashboard</title>
|
|
228
|
+
<style>
|
|
229
|
+
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
|
|
230
|
+
body { background: #0f172a; color: #e2e8f0; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; font-size: 14px; line-height: 1.5; }
|
|
231
|
+
.header { background: #1e293b; border-bottom: 1px solid #334155; padding: 16px 24px; display: flex; align-items: center; justify-content: space-between; }
|
|
232
|
+
.header h1 { font-size: 16px; font-weight: 600; color: #f1f5f9; }
|
|
233
|
+
.header .meta { font-size: 11px; color: #475569; }
|
|
234
|
+
.grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); gap: 16px; padding: 20px 24px; }
|
|
235
|
+
.card { background: #1e293b; border: 1px solid #334155; border-radius: 8px; padding: 16px; }
|
|
236
|
+
.card h2 { font-size: 11px; font-weight: 600; text-transform: uppercase; letter-spacing: 0.08em; color: #64748b; margin-bottom: 12px; }
|
|
237
|
+
.stat-row { display: flex; gap: 24px; flex-wrap: wrap; }
|
|
238
|
+
.stat { text-align: center; }
|
|
239
|
+
.stat .value { font-size: 28px; font-weight: 700; line-height: 1; }
|
|
240
|
+
.stat .label { font-size: 11px; color: #64748b; margin-top: 4px; }
|
|
241
|
+
table { width: 100%; border-collapse: collapse; }
|
|
242
|
+
th { font-size: 10px; text-transform: uppercase; letter-spacing: 0.05em; color: #475569; padding: 4px 8px; text-align: left; border-bottom: 1px solid #334155; }
|
|
243
|
+
td { padding: 6px 8px; border-bottom: 1px solid #1e293b; }
|
|
244
|
+
tr:last-child td { border-bottom: none; }
|
|
245
|
+
.sparkline-wrap { margin: 8px 0; }
|
|
246
|
+
.full { grid-column: 1 / -1; }
|
|
247
|
+
</style>
|
|
248
|
+
</head>
|
|
249
|
+
<body>
|
|
250
|
+
<div class="header">
|
|
251
|
+
<h1>ai-spec · Harness Dashboard</h1>
|
|
252
|
+
<div class="meta">Generated ${generatedAt} · Current prompt: <code style="color:#a5b4fc">${data.currentPromptHash}</code></div>
|
|
253
|
+
</div>
|
|
254
|
+
|
|
255
|
+
<div class="grid">
|
|
256
|
+
|
|
257
|
+
<!-- Overview stats -->
|
|
258
|
+
<div class="card">
|
|
259
|
+
<h2>Overview</h2>
|
|
260
|
+
<div class="stat-row">
|
|
261
|
+
<div class="stat">
|
|
262
|
+
<div class="value" style="color:#e2e8f0">${data.totalRuns}</div>
|
|
263
|
+
<div class="label">Total Runs</div>
|
|
264
|
+
</div>
|
|
265
|
+
<div class="stat">
|
|
266
|
+
<div class="value">${avgDisplay}</div>
|
|
267
|
+
<div class="label">Avg Score</div>
|
|
268
|
+
</div>
|
|
269
|
+
<div class="stat">
|
|
270
|
+
<div class="value">${compileDisplay}</div>
|
|
271
|
+
<div class="label">Compile Pass</div>
|
|
272
|
+
</div>
|
|
273
|
+
<div class="stat">
|
|
274
|
+
<div class="value" style="color:#e2e8f0">${data.scoredRuns}</div>
|
|
275
|
+
<div class="label">Scored Runs</div>
|
|
276
|
+
</div>
|
|
277
|
+
</div>
|
|
278
|
+
</div>
|
|
279
|
+
|
|
280
|
+
<!-- Score trend sparkline -->
|
|
281
|
+
<div class="card">
|
|
282
|
+
<h2>Score Trend (last ${data.recentScores.length} runs)</h2>
|
|
283
|
+
<div class="sparkline-wrap">${sparkline}</div>
|
|
284
|
+
${data.recentScores.length === 0 ? '<div style="color:#475569;font-size:12px">No scored runs yet</div>' : ""}
|
|
285
|
+
</div>
|
|
286
|
+
|
|
287
|
+
<!-- Prompt version comparison -->
|
|
288
|
+
<div class="card full">
|
|
289
|
+
<h2>Prompt Version Performance</h2>
|
|
290
|
+
${data.promptGroups.length === 0
|
|
291
|
+
? '<div style="color:#475569;font-size:12px">No runs with prompt hash yet</div>'
|
|
292
|
+
: `<table>
|
|
293
|
+
<thead><tr>
|
|
294
|
+
<th>Prompt Hash</th>
|
|
295
|
+
<th style="text-align:center">Runs</th>
|
|
296
|
+
<th style="text-align:center">Avg</th>
|
|
297
|
+
<th style="text-align:center">Best</th>
|
|
298
|
+
<th style="text-align:center">Worst</th>
|
|
299
|
+
</tr></thead>
|
|
300
|
+
<tbody>${promptRows}</tbody>
|
|
301
|
+
</table>`}
|
|
302
|
+
</div>
|
|
303
|
+
|
|
304
|
+
<!-- Recent run history -->
|
|
305
|
+
<div class="card">
|
|
306
|
+
<h2>Recent Runs</h2>
|
|
307
|
+
${data.recentScores.length === 0
|
|
308
|
+
? '<div style="color:#475569;font-size:12px">No scored runs yet</div>'
|
|
309
|
+
: `<table>
|
|
310
|
+
<thead><tr><th>Date</th><th>Run ID</th><th>Score</th><th style="text-align:right">/10</th></tr></thead>
|
|
311
|
+
<tbody>${recentRows}</tbody>
|
|
312
|
+
</table>`}
|
|
313
|
+
</div>
|
|
314
|
+
|
|
315
|
+
<!-- Stage durations -->
|
|
316
|
+
<div class="card">
|
|
317
|
+
<h2>Avg Stage Duration (seconds)</h2>
|
|
318
|
+
${data.stageDurations.length === 0
|
|
319
|
+
? '<div style="color:#475569;font-size:12px">No stage data yet</div>'
|
|
320
|
+
: stageBar}
|
|
321
|
+
</div>
|
|
322
|
+
|
|
323
|
+
<!-- Top errors -->
|
|
324
|
+
<div class="card">
|
|
325
|
+
<h2>Top Errors (last ${Math.min(totalLogsAnalyzed, 20)} runs)</h2>
|
|
326
|
+
${errorRows}
|
|
327
|
+
</div>
|
|
328
|
+
|
|
329
|
+
</div>
|
|
330
|
+
</body>
|
|
331
|
+
</html>`;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
// ─── Public API ───────────────────────────────────────────────────────────────
|
|
335
|
+
|
|
336
|
+
export function generateDashboard(logs: RunLog[]): string {
|
|
337
|
+
const data = prepareDashboardData(logs);
|
|
338
|
+
const generatedAt = new Date().toISOString().replace("T", " ").slice(0, 19) + " UTC";
|
|
339
|
+
return renderHtml(data, generatedAt, logs.length);
|
|
340
|
+
}
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* design-dialogue.ts — Pre-spec architectural option proposal.
|
|
3
|
+
*
|
|
4
|
+
* Inspired by Superpowers' brainstorming phase: before writing a full spec,
|
|
5
|
+
* present 2-3 distinct architectural approaches with trade-offs and let the
|
|
6
|
+
* developer choose. The chosen approach is then injected into the spec prompt
|
|
7
|
+
* as a binding architectural decision, preventing mid-spec drift.
|
|
8
|
+
*
|
|
9
|
+
* Skipped in --fast and --auto modes.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { select } from "@inquirer/prompts";
|
|
13
|
+
import chalk from "chalk";
|
|
14
|
+
import { AIProvider } from "./spec-generator";
|
|
15
|
+
import { buildDesignOptionsPrompt, designOptionsSystemPrompt } from "../prompts/design.prompt";
|
|
16
|
+
|
|
17
|
+
export interface DesignChoice {
|
|
18
|
+
/** The full AI-generated options text, displayed to the user */
|
|
19
|
+
optionsText: string;
|
|
20
|
+
/**
|
|
21
|
+
* The selected approach label + description, injected into the spec prompt.
|
|
22
|
+
* e.g. "Option B — Event-driven approach: ..."
|
|
23
|
+
* null = user skipped the dialogue
|
|
24
|
+
*/
|
|
25
|
+
selectedApproach: string | null;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export class DesignDialogue {
|
|
29
|
+
constructor(private provider: AIProvider) {}
|
|
30
|
+
|
|
31
|
+
async run(
|
|
32
|
+
idea: string,
|
|
33
|
+
contextHints: { techStack: string[]; repoType: string; constitution?: string }
|
|
34
|
+
): Promise<DesignChoice> {
|
|
35
|
+
console.log(chalk.blue("\n[1.5/6] Design options..."));
|
|
36
|
+
console.log(
|
|
37
|
+
chalk.gray(` Proposing architectural approaches with ${this.provider.providerName}/${this.provider.modelName}...`)
|
|
38
|
+
);
|
|
39
|
+
|
|
40
|
+
const prompt = buildDesignOptionsPrompt(idea, contextHints);
|
|
41
|
+
let optionsText: string;
|
|
42
|
+
|
|
43
|
+
try {
|
|
44
|
+
optionsText = await this.provider.generate(prompt, designOptionsSystemPrompt);
|
|
45
|
+
} catch (err) {
|
|
46
|
+
console.log(chalk.yellow(` ⚠ Design options failed (${(err as Error).message}), skipping.`));
|
|
47
|
+
return { optionsText: "", selectedApproach: null };
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// Display the options
|
|
51
|
+
console.log(chalk.cyan("\n" + "─".repeat(52)));
|
|
52
|
+
console.log(optionsText);
|
|
53
|
+
console.log(chalk.cyan("─".repeat(52) + "\n"));
|
|
54
|
+
|
|
55
|
+
// Parse option labels from the text (Option A / B / C)
|
|
56
|
+
const optionMatches = [...optionsText.matchAll(/###\s+(Option\s+[A-C][^:\n]*)/gi)];
|
|
57
|
+
const parsedOptions = optionMatches.map((m) => m[1].trim());
|
|
58
|
+
|
|
59
|
+
// Build choices for the select prompt
|
|
60
|
+
const choices: Array<{ name: string; value: string }> = parsedOptions.map((label) => ({
|
|
61
|
+
name: label,
|
|
62
|
+
value: label,
|
|
63
|
+
}));
|
|
64
|
+
|
|
65
|
+
choices.push(
|
|
66
|
+
{ name: "🔀 Blend — let AI combine the best of all options", value: "__blend__" },
|
|
67
|
+
{ name: "⏭️ Skip — proceed to spec without an architecture decision", value: "__skip__" }
|
|
68
|
+
);
|
|
69
|
+
|
|
70
|
+
const selected = await select({
|
|
71
|
+
message: "Which approach should the spec follow?",
|
|
72
|
+
choices,
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
if (selected === "__skip__") {
|
|
76
|
+
console.log(chalk.gray(" Architecture decision skipped — spec will be generated freely."));
|
|
77
|
+
return { optionsText, selectedApproach: null };
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
if (selected === "__blend__") {
|
|
81
|
+
console.log(chalk.blue(" Blending approaches..."));
|
|
82
|
+
try {
|
|
83
|
+
const blendPrompt = `The developer wants to blend the best aspects of all options below.
|
|
84
|
+
Write a single-paragraph architectural decision that combines their strengths.
|
|
85
|
+
Output ONLY the blended approach description (2-4 sentences, no headers).
|
|
86
|
+
|
|
87
|
+
${optionsText}`;
|
|
88
|
+
const blended = await this.provider.generate(
|
|
89
|
+
blendPrompt,
|
|
90
|
+
"You are a Senior Architect. Output only the blended architectural approach, 2-4 sentences."
|
|
91
|
+
);
|
|
92
|
+
const blendedApproach = `Blended approach: ${blended.trim()}`;
|
|
93
|
+
console.log(chalk.cyan(`\n Selected: ${blendedApproach.slice(0, 80)}...`));
|
|
94
|
+
return { optionsText, selectedApproach: blendedApproach };
|
|
95
|
+
} catch {
|
|
96
|
+
console.log(chalk.yellow(" Blend failed, proceeding without architecture decision."));
|
|
97
|
+
return { optionsText, selectedApproach: null };
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Find the full description of the selected option
|
|
102
|
+
const selectedIdx = parsedOptions.indexOf(selected);
|
|
103
|
+
let selectedApproach = selected;
|
|
104
|
+
|
|
105
|
+
if (selectedIdx !== -1 && selectedIdx < parsedOptions.length - 1) {
|
|
106
|
+
// Extract text between this option header and the next
|
|
107
|
+
const startMarker = `### ${parsedOptions[selectedIdx]}`;
|
|
108
|
+
const endMarker = selectedIdx + 1 < parsedOptions.length
|
|
109
|
+
? `### ${parsedOptions[selectedIdx + 1]}`
|
|
110
|
+
: "---";
|
|
111
|
+
const start = optionsText.indexOf(startMarker);
|
|
112
|
+
const end = optionsText.indexOf(endMarker, start + 1);
|
|
113
|
+
if (start !== -1) {
|
|
114
|
+
const excerpt = end !== -1
|
|
115
|
+
? optionsText.slice(start, end).trim()
|
|
116
|
+
: optionsText.slice(start).trim();
|
|
117
|
+
selectedApproach = excerpt.slice(0, 400); // cap to avoid bloating spec prompt
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
console.log(chalk.green(` ✔ Architecture decision locked: ${selected}`));
|
|
122
|
+
return { optionsText, selectedApproach };
|
|
123
|
+
}
|
|
124
|
+
}
|