ai-spec-dev 0.33.0 โ†’ 0.36.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/.claude/commands/add-lesson.md +34 -0
  2. package/.claude/commands/check-layers.md +65 -0
  3. package/.claude/commands/installed-deps.md +35 -0
  4. package/.claude/commands/recall-lessons.md +40 -0
  5. package/.claude/commands/scan-singletons.md +45 -0
  6. package/.claude/commands/verify-imports.md +48 -0
  7. package/.claude/settings.local.json +11 -1
  8. package/README.md +531 -213
  9. package/RELEASE_LOG.md +424 -0
  10. package/cli/commands/config.ts +18 -0
  11. package/cli/commands/create.ts +1248 -0
  12. package/cli/commands/dashboard.ts +62 -0
  13. package/cli/commands/init.ts +45 -8
  14. package/cli/commands/mock.ts +175 -0
  15. package/cli/commands/scan.ts +99 -0
  16. package/cli/commands/types.ts +69 -0
  17. package/cli/commands/vcr.ts +70 -0
  18. package/cli/index.ts +34 -2517
  19. package/cli/utils.ts +4 -0
  20. package/core/code-generator.ts +6 -4
  21. package/core/combined-generator.ts +13 -3
  22. package/core/dashboard-generator.ts +340 -0
  23. package/core/design-dialogue.ts +124 -0
  24. package/core/dsl-extractor.ts +9 -1
  25. package/core/dsl-feedback.ts +41 -5
  26. package/core/dsl-validator.ts +32 -0
  27. package/core/error-feedback.ts +46 -2
  28. package/core/key-store.ts +5 -4
  29. package/core/project-index.ts +301 -0
  30. package/core/provider-utils.ts +39 -4
  31. package/core/reviewer.ts +84 -6
  32. package/core/run-logger.ts +109 -3
  33. package/core/run-trend.ts +24 -4
  34. package/core/self-evaluator.ts +39 -11
  35. package/core/spec-generator.ts +14 -8
  36. package/core/task-generator.ts +17 -0
  37. package/core/types-generator.ts +219 -0
  38. package/core/vcr.ts +210 -0
  39. package/dist/cli/index.js +7407 -5643
  40. package/dist/cli/index.js.map +1 -1
  41. package/dist/cli/index.mjs +7401 -5637
  42. package/dist/cli/index.mjs.map +1 -1
  43. package/dist/index.d.mts +34 -5
  44. package/dist/index.d.ts +34 -5
  45. package/dist/index.js +497 -232
  46. package/dist/index.js.map +1 -1
  47. package/dist/index.mjs +495 -233
  48. package/dist/index.mjs.map +1 -1
  49. package/docs-assets/purpose/architecture-overview.svg +64 -0
  50. package/docs-assets/purpose/create-pipeline.svg +113 -0
  51. package/docs-assets/purpose/task-layering.svg +74 -0
  52. package/package.json +1 -1
  53. package/prompts/codegen.prompt.ts +97 -9
  54. package/prompts/design.prompt.ts +59 -0
  55. package/prompts/spec.prompt.ts +8 -1
  56. package/prompts/tasks.prompt.ts +27 -2
  57. package/purpose.md +600 -174
  58. package/tests/code-generator.test.ts +253 -0
  59. package/tests/context-loader.test.ts +207 -0
  60. package/tests/dsl-validator.test.ts +105 -0
  61. package/tests/openapi-exporter.test.ts +310 -0
  62. package/tests/reviewer.test.ts +214 -0
  63. package/tests/spec-generator.test.ts +228 -0
  64. package/tests/spec-versioning.test.ts +205 -0
package/cli/utils.ts CHANGED
@@ -16,6 +16,10 @@ export interface AiSpecConfig {
16
16
  codegenModel?: string;
17
17
  /** Minimum overall spec score (1-10) required to pass Approval Gate. 0 = disabled (default). */
18
18
  minSpecScore?: number;
19
+ /** Minimum harness score (1-10) required for pipeline success. 0 = disabled (default). */
20
+ minHarnessScore?: number;
21
+ /** Maximum error-feedback cycles before giving up (default: 2, TDD default: 3). */
22
+ maxErrorCycles?: number;
19
23
  }
20
24
 
21
25
  export const CONFIG_FILE = ".ai-spec.json";
@@ -1,5 +1,5 @@
1
1
  import chalk from "chalk";
2
- import { execSync } from "child_process";
2
+ import { execSync, spawnSync } from "child_process";
3
3
  import * as path from "path";
4
4
  import * as fs from "fs-extra";
5
5
  import { AIProvider } from "./spec-generator";
@@ -49,7 +49,7 @@ function buildInstalledPackagesSection(context?: ProjectContext): string {
49
49
  *
50
50
  * Falls back to first 3000 chars for CommonJS files with no explicit exports.
51
51
  */
52
- function extractBehavioralContract(content: string): string {
52
+ export function extractBehavioralContract(content: string): string {
53
53
  const lines = content.split("\n");
54
54
  const contractLines: string[] = [];
55
55
  const throwLines: string[] = [];
@@ -349,9 +349,10 @@ export class CodeGenerator {
349
349
  console.log(chalk.cyan(` ๐Ÿค– Auto mode: running claude -p (non-interactive)...`));
350
350
  console.log(chalk.gray(` Spec: ${specFilePath}`));
351
351
  try {
352
- execSync(`${claudeCmd} -p "${promptContent.replace(/"/g, '\\"')}"`, {
352
+ spawnSync(claudeCmd, ["-p", promptContent], {
353
353
  cwd: workingDir,
354
354
  stdio: "inherit",
355
+ shell: false,
355
356
  });
356
357
  console.log(chalk.green("\n โœ” Claude Code completed."));
357
358
  } catch {
@@ -413,9 +414,10 @@ export class CodeGenerator {
413
414
 
414
415
  let taskStatus: "done" | "failed" = "done";
415
416
  try {
416
- execSync(`${claudeCmd} -p "${taskPrompt.replace(/"/g, '\\"').replace(/\n/g, "\\n")}"`, {
417
+ spawnSync(claudeCmd, ["-p", taskPrompt], {
417
418
  cwd: workingDir,
418
419
  stdio: "inherit",
420
+ shell: false,
419
421
  });
420
422
  completed++;
421
423
  } catch {
@@ -20,18 +20,28 @@ const tasksInstruction = `
20
20
  After outputting the complete spec above, append EXACTLY this line on its own (no extra text before or after it):
21
21
  ${TASKS_SEPARATOR}
22
22
  Then output a valid JSON array of implementation tasks. Each element must have these exact fields:
23
- {"id":"TASK-001","title":"...","description":"1-2 sentences, specific","layer":"data|infra|service|api|test","filesToTouch":["src/..."],"acceptanceCriteria":["verifiable condition"],"dependencies":[],"priority":"high|medium|low"}
23
+ {"id":"TASK-001","title":"...","description":"1-2 sentences, specific","layer":"data|infra|service|api|test","filesToTouch":["src/..."],"acceptanceCriteria":["behavioral condition"],"verificationSteps":["concrete runnable check โ†’ expected result"],"dependencies":[],"priority":"high|medium|low"}
24
+ verificationSteps rules: each step is a specific command or action with observable expected output (e.g. "POST /api/orders โ†’ 201 {id, status:'pending'}"). At least 2 per task, max 5. Never vague.
24
25
  Layer order: data โ†’ infra โ†’ service โ†’ api โ†’ test. 4-10 tasks total. filesToTouch must use real paths from the project context.`;
25
26
 
26
27
  export async function generateSpecWithTasks(
27
28
  provider: AIProvider,
28
29
  idea: string,
29
- context?: ProjectContext
30
+ context?: ProjectContext,
31
+ architectureDecision?: string
30
32
  ): Promise<{ spec: string; tasks: SpecTask[] }> {
31
33
  // Use buildTaskPrompt to get the full verified-inventory context,
32
34
  // then prepend the idea so the spec generator also sees it.
33
35
  const contextBlock = buildTaskPrompt("", context).trim();
34
- const fullPrompt = [idea, contextBlock].filter(Boolean).join("\n\n");
36
+
37
+ const parts: string[] = [idea];
38
+ if (architectureDecision) {
39
+ parts.push(
40
+ `\n=== Architecture Decision (MUST follow this approach in the spec) ===\n${architectureDecision}`
41
+ );
42
+ }
43
+ if (contextBlock) parts.push(contextBlock);
44
+ const fullPrompt = parts.join("\n\n");
35
45
 
36
46
  const combinedSystemPrompt = specPrompt + tasksInstruction;
37
47
  const raw = await provider.generate(fullPrompt, combinedSystemPrompt);
@@ -0,0 +1,340 @@
1
+ import { RunLog } from "./run-logger";
2
+ import { buildTrendReport } from "./run-trend";
3
+ import { computePromptHash } from "./prompt-hasher";
4
+
5
+ // โ”€โ”€โ”€ SVG chart helpers โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
6
+
7
+ function scoreColor(score: number): string {
8
+ if (score >= 8) return "#22c55e"; // green
9
+ if (score >= 6) return "#f59e0b"; // amber
10
+ return "#ef4444"; // red
11
+ }
12
+
13
+ function renderSparkline(scores: number[], width = 400, height = 60): string {
14
+ if (scores.length < 2) return `<svg width="${width}" height="${height}"></svg>`;
15
+ const pad = 6;
16
+ const w = width - pad * 2;
17
+ const h = height - pad * 2;
18
+ const max = Math.max(...scores, 10);
19
+ const min = Math.min(...scores, 0);
20
+ const range = max - min || 1;
21
+
22
+ const points = scores.map((s, i) => {
23
+ const x = pad + (i / (scores.length - 1)) * w;
24
+ const y = pad + h - ((s - min) / range) * h;
25
+ return `${x.toFixed(1)},${y.toFixed(1)}`;
26
+ });
27
+
28
+ const lastX = parseFloat(points[points.length - 1].split(",")[0]);
29
+ const lastY = parseFloat(points[points.length - 1].split(",")[1]);
30
+ const lastScore = scores[scores.length - 1];
31
+
32
+ return `<svg width="${width}" height="${height}" viewBox="0 0 ${width} ${height}" xmlns="http://www.w3.org/2000/svg">
33
+ <polyline points="${points.join(" ")}" fill="none" stroke="${scoreColor(lastScore)}" stroke-width="2" stroke-linejoin="round"/>
34
+ <circle cx="${lastX}" cy="${lastY}" r="3" fill="${scoreColor(lastScore)}"/>
35
+ </svg>`;
36
+ }
37
+
38
+ function renderBarChart(
39
+ entries: { label: string; value: number; color?: string }[],
40
+ maxWidth = 280
41
+ ): string {
42
+ const maxVal = Math.max(...entries.map((e) => e.value), 1);
43
+ const rows = entries
44
+ .map((e) => {
45
+ const pct = (e.value / maxVal) * maxWidth;
46
+ const color = e.color ?? "#6366f1";
47
+ return `
48
+ <div style="display:flex;align-items:center;gap:8px;margin:4px 0">
49
+ <div style="width:110px;font-size:11px;color:#94a3b8;text-align:right;flex-shrink:0;overflow:hidden;text-overflow:ellipsis;white-space:nowrap" title="${e.label}">${e.label}</div>
50
+ <div style="height:14px;width:${pct.toFixed(0)}px;background:${color};border-radius:2px;min-width:2px"></div>
51
+ <div style="font-size:11px;color:#e2e8f0">${e.value.toFixed(1)}</div>
52
+ </div>`;
53
+ })
54
+ .join("");
55
+ return `<div style="margin:0">${rows}</div>`;
56
+ }
57
+
58
+ // โ”€โ”€โ”€ Data preparation โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
59
+
60
+ interface DashboardData {
61
+ totalRuns: number;
62
+ scoredRuns: number;
63
+ avgScore: number | null;
64
+ compilePassRate: number | null;
65
+ currentPromptHash: string;
66
+ recentScores: { runId: string; date: string; score: number; hash: string }[];
67
+ promptGroups: { hash: string; runs: number; avg: number; best: number; worst: number; isCurrent: boolean }[];
68
+ stageDurations: { stage: string; avgMs: number }[];
69
+ topErrors: { message: string; count: number }[];
70
+ lastRunAt: string | null;
71
+ }
72
+
73
+ function prepareDashboardData(logs: RunLog[]): DashboardData {
74
+ const currentHash = computePromptHash();
75
+ const report = buildTrendReport(logs, { last: 50 });
76
+
77
+ // Recent scored runs (last 30)
78
+ const recentScores = logs
79
+ .filter((l) => l.harnessScore !== undefined)
80
+ .slice(0, 30)
81
+ .reverse()
82
+ .map((l) => ({
83
+ runId: l.runId,
84
+ date: l.startedAt.slice(0, 10),
85
+ score: l.harnessScore!,
86
+ hash: l.promptHash ?? "(no hash)",
87
+ }));
88
+
89
+ // Stage duration aggregation
90
+ const stageAccum: Record<string, { total: number; count: number }> = {};
91
+ for (const log of logs.slice(0, 20)) {
92
+ const stages: Record<string, number> = {};
93
+ for (const entry of log.entries ?? []) {
94
+ if (entry.event.endsWith(":done") || entry.event.endsWith(":failed")) {
95
+ const stageName = entry.event.replace(/:done$|:failed$/, "");
96
+ const ms = entry.data?.durationMs;
97
+ if (typeof ms === "number") stages[stageName] = ms;
98
+ }
99
+ }
100
+ for (const [stage, ms] of Object.entries(stages)) {
101
+ if (!stageAccum[stage]) stageAccum[stage] = { total: 0, count: 0 };
102
+ stageAccum[stage].total += ms;
103
+ stageAccum[stage].count++;
104
+ }
105
+ }
106
+ const stageDurations = Object.entries(stageAccum)
107
+ .map(([stage, { total, count }]) => ({ stage, avgMs: total / count }))
108
+ .sort((a, b) => b.avgMs - a.avgMs)
109
+ .slice(0, 8);
110
+
111
+ // Top errors
112
+ const errorCounts: Record<string, number> = {};
113
+ for (const log of logs) {
114
+ for (const err of log.errors ?? []) {
115
+ const key = err.slice(0, 80);
116
+ errorCounts[key] = (errorCounts[key] ?? 0) + 1;
117
+ }
118
+ }
119
+ const topErrors = Object.entries(errorCounts)
120
+ .sort((a, b) => b[1] - a[1])
121
+ .slice(0, 5)
122
+ .map(([message, count]) => ({ message, count }));
123
+
124
+ // Compile pass rate (from error_feedback stage)
125
+ let compilePassed = 0, compileTotal = 0;
126
+ for (const log of logs) {
127
+ const feedback = log.entries?.find((e) => e.event === "error_feedback:done" || e.event === "error_feedback:failed");
128
+ if (feedback) {
129
+ compileTotal++;
130
+ if (feedback.event.endsWith(":done")) compilePassed++;
131
+ }
132
+ }
133
+
134
+ const scoredLogs = logs.filter((l) => l.harnessScore !== undefined);
135
+ const avgScore = scoredLogs.length > 0
136
+ ? scoredLogs.reduce((s, l) => s + l.harnessScore!, 0) / scoredLogs.length
137
+ : null;
138
+
139
+ return {
140
+ totalRuns: logs.length,
141
+ scoredRuns: scoredLogs.length,
142
+ avgScore,
143
+ compilePassRate: compileTotal > 0 ? compilePassed / compileTotal : null,
144
+ currentPromptHash: currentHash,
145
+ recentScores,
146
+ promptGroups: report.promptGroups.map((g) => ({
147
+ hash: g.promptHash,
148
+ runs: g.runs,
149
+ avg: g.avg,
150
+ best: g.best,
151
+ worst: g.worst,
152
+ isCurrent: g.isCurrent,
153
+ })),
154
+ stageDurations,
155
+ topErrors,
156
+ lastRunAt: logs[0]?.startedAt ?? null,
157
+ };
158
+ }
159
+
160
+ // โ”€โ”€โ”€ HTML renderer โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
161
+
162
+ function renderHtml(data: DashboardData, generatedAt: string, totalLogsAnalyzed: number): string {
163
+ const sparkline = renderSparkline(data.recentScores.map((r) => r.score));
164
+
165
+ const stageBar = renderBarChart(
166
+ data.stageDurations.map((s) => ({
167
+ label: s.stage.replace(/_/g, " "),
168
+ value: Math.round(s.avgMs / 100) / 10, // seconds
169
+ color: "#6366f1",
170
+ })),
171
+ 240
172
+ );
173
+
174
+ const promptRows = data.promptGroups
175
+ .map((g) => {
176
+ const isCurrent = g.isCurrent;
177
+ const badge = isCurrent ? `<span style="background:#4f46e5;color:#fff;font-size:9px;padding:1px 6px;border-radius:9px;margin-left:6px">current</span>` : "";
178
+ const avgColor = scoreColor(g.avg);
179
+ return `<tr style="${isCurrent ? "background:#1e1b4b" : ""}">
180
+ <td style="font-family:monospace;font-size:12px;color:#a5b4fc">${g.hash}${badge}</td>
181
+ <td style="text-align:center;color:#94a3b8">${g.runs}</td>
182
+ <td style="text-align:center;color:${avgColor};font-weight:600">${g.avg.toFixed(1)}</td>
183
+ <td style="text-align:center;color:#22c55e">${g.best.toFixed(1)}</td>
184
+ <td style="text-align:center;color:#ef4444">${g.worst.toFixed(1)}</td>
185
+ </tr>`;
186
+ })
187
+ .join("");
188
+
189
+ const recentRows = data.recentScores
190
+ .slice()
191
+ .reverse()
192
+ .slice(-10)
193
+ .map((r) => {
194
+ const color = scoreColor(r.score);
195
+ const bar = "โ–ˆ".repeat(Math.round(r.score)) + "โ–‘".repeat(10 - Math.round(r.score));
196
+ return `<tr>
197
+ <td style="font-family:monospace;font-size:11px;color:#64748b">${r.date}</td>
198
+ <td style="font-family:monospace;font-size:11px;color:#94a3b8">${r.runId.slice(-12)}</td>
199
+ <td style="color:${color};font-size:12px;font-family:monospace">${bar}</td>
200
+ <td style="text-align:right;color:${color};font-weight:600">${r.score.toFixed(1)}</td>
201
+ </tr>`;
202
+ })
203
+ .join("");
204
+
205
+ const avgDisplay = data.avgScore !== null
206
+ ? `<span style="color:${scoreColor(data.avgScore)}">${data.avgScore.toFixed(1)}</span>`
207
+ : `<span style="color:#475569">โ€”</span>`;
208
+
209
+ const compileDisplay = data.compilePassRate !== null
210
+ ? `<span style="color:${data.compilePassRate >= 0.8 ? "#22c55e" : "#f59e0b"}">${Math.round(data.compilePassRate * 100)}%</span>`
211
+ : `<span style="color:#475569">โ€”</span>`;
212
+
213
+ const errorRows = data.topErrors.length > 0
214
+ ? data.topErrors.map((e) =>
215
+ `<div style="display:flex;gap:8px;align-items:flex-start;margin:4px 0">
216
+ <span style="color:#ef4444;font-weight:600;flex-shrink:0">${e.count}ร—</span>
217
+ <span style="color:#94a3b8;font-size:11px;font-family:monospace;word-break:break-all">${e.message.replace(/</g, "&lt;")}</span>
218
+ </div>`
219
+ ).join("")
220
+ : `<div style="color:#475569;font-size:12px">No errors recorded</div>`;
221
+
222
+ return `<!DOCTYPE html>
223
+ <html lang="en">
224
+ <head>
225
+ <meta charset="UTF-8">
226
+ <meta name="viewport" content="width=device-width,initial-scale=1">
227
+ <title>ai-spec Harness Dashboard</title>
228
+ <style>
229
+ *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
230
+ body { background: #0f172a; color: #e2e8f0; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; font-size: 14px; line-height: 1.5; }
231
+ .header { background: #1e293b; border-bottom: 1px solid #334155; padding: 16px 24px; display: flex; align-items: center; justify-content: space-between; }
232
+ .header h1 { font-size: 16px; font-weight: 600; color: #f1f5f9; }
233
+ .header .meta { font-size: 11px; color: #475569; }
234
+ .grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); gap: 16px; padding: 20px 24px; }
235
+ .card { background: #1e293b; border: 1px solid #334155; border-radius: 8px; padding: 16px; }
236
+ .card h2 { font-size: 11px; font-weight: 600; text-transform: uppercase; letter-spacing: 0.08em; color: #64748b; margin-bottom: 12px; }
237
+ .stat-row { display: flex; gap: 24px; flex-wrap: wrap; }
238
+ .stat { text-align: center; }
239
+ .stat .value { font-size: 28px; font-weight: 700; line-height: 1; }
240
+ .stat .label { font-size: 11px; color: #64748b; margin-top: 4px; }
241
+ table { width: 100%; border-collapse: collapse; }
242
+ th { font-size: 10px; text-transform: uppercase; letter-spacing: 0.05em; color: #475569; padding: 4px 8px; text-align: left; border-bottom: 1px solid #334155; }
243
+ td { padding: 6px 8px; border-bottom: 1px solid #1e293b; }
244
+ tr:last-child td { border-bottom: none; }
245
+ .sparkline-wrap { margin: 8px 0; }
246
+ .full { grid-column: 1 / -1; }
247
+ </style>
248
+ </head>
249
+ <body>
250
+ <div class="header">
251
+ <h1>ai-spec ยท Harness Dashboard</h1>
252
+ <div class="meta">Generated ${generatedAt} ยท Current prompt: <code style="color:#a5b4fc">${data.currentPromptHash}</code></div>
253
+ </div>
254
+
255
+ <div class="grid">
256
+
257
+ <!-- Overview stats -->
258
+ <div class="card">
259
+ <h2>Overview</h2>
260
+ <div class="stat-row">
261
+ <div class="stat">
262
+ <div class="value" style="color:#e2e8f0">${data.totalRuns}</div>
263
+ <div class="label">Total Runs</div>
264
+ </div>
265
+ <div class="stat">
266
+ <div class="value">${avgDisplay}</div>
267
+ <div class="label">Avg Score</div>
268
+ </div>
269
+ <div class="stat">
270
+ <div class="value">${compileDisplay}</div>
271
+ <div class="label">Compile Pass</div>
272
+ </div>
273
+ <div class="stat">
274
+ <div class="value" style="color:#e2e8f0">${data.scoredRuns}</div>
275
+ <div class="label">Scored Runs</div>
276
+ </div>
277
+ </div>
278
+ </div>
279
+
280
+ <!-- Score trend sparkline -->
281
+ <div class="card">
282
+ <h2>Score Trend (last ${data.recentScores.length} runs)</h2>
283
+ <div class="sparkline-wrap">${sparkline}</div>
284
+ ${data.recentScores.length === 0 ? '<div style="color:#475569;font-size:12px">No scored runs yet</div>' : ""}
285
+ </div>
286
+
287
+ <!-- Prompt version comparison -->
288
+ <div class="card full">
289
+ <h2>Prompt Version Performance</h2>
290
+ ${data.promptGroups.length === 0
291
+ ? '<div style="color:#475569;font-size:12px">No runs with prompt hash yet</div>'
292
+ : `<table>
293
+ <thead><tr>
294
+ <th>Prompt Hash</th>
295
+ <th style="text-align:center">Runs</th>
296
+ <th style="text-align:center">Avg</th>
297
+ <th style="text-align:center">Best</th>
298
+ <th style="text-align:center">Worst</th>
299
+ </tr></thead>
300
+ <tbody>${promptRows}</tbody>
301
+ </table>`}
302
+ </div>
303
+
304
+ <!-- Recent run history -->
305
+ <div class="card">
306
+ <h2>Recent Runs</h2>
307
+ ${data.recentScores.length === 0
308
+ ? '<div style="color:#475569;font-size:12px">No scored runs yet</div>'
309
+ : `<table>
310
+ <thead><tr><th>Date</th><th>Run ID</th><th>Score</th><th style="text-align:right">/10</th></tr></thead>
311
+ <tbody>${recentRows}</tbody>
312
+ </table>`}
313
+ </div>
314
+
315
+ <!-- Stage durations -->
316
+ <div class="card">
317
+ <h2>Avg Stage Duration (seconds)</h2>
318
+ ${data.stageDurations.length === 0
319
+ ? '<div style="color:#475569;font-size:12px">No stage data yet</div>'
320
+ : stageBar}
321
+ </div>
322
+
323
+ <!-- Top errors -->
324
+ <div class="card">
325
+ <h2>Top Errors (last ${Math.min(totalLogsAnalyzed, 20)} runs)</h2>
326
+ ${errorRows}
327
+ </div>
328
+
329
+ </div>
330
+ </body>
331
+ </html>`;
332
+ }
333
+
334
+ // โ”€โ”€โ”€ Public API โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
335
+
336
+ export function generateDashboard(logs: RunLog[]): string {
337
+ const data = prepareDashboardData(logs);
338
+ const generatedAt = new Date().toISOString().replace("T", " ").slice(0, 19) + " UTC";
339
+ return renderHtml(data, generatedAt, logs.length);
340
+ }
@@ -0,0 +1,124 @@
1
+ /**
2
+ * design-dialogue.ts โ€” Pre-spec architectural option proposal.
3
+ *
4
+ * Inspired by Superpowers' brainstorming phase: before writing a full spec,
5
+ * present 2-3 distinct architectural approaches with trade-offs and let the
6
+ * developer choose. The chosen approach is then injected into the spec prompt
7
+ * as a binding architectural decision, preventing mid-spec drift.
8
+ *
9
+ * Skipped in --fast and --auto modes.
10
+ */
11
+
12
+ import { select } from "@inquirer/prompts";
13
+ import chalk from "chalk";
14
+ import { AIProvider } from "./spec-generator";
15
+ import { buildDesignOptionsPrompt, designOptionsSystemPrompt } from "../prompts/design.prompt";
16
+
17
+ export interface DesignChoice {
18
+ /** The full AI-generated options text, displayed to the user */
19
+ optionsText: string;
20
+ /**
21
+ * The selected approach label + description, injected into the spec prompt.
22
+ * e.g. "Option B โ€” Event-driven approach: ..."
23
+ * null = user skipped the dialogue
24
+ */
25
+ selectedApproach: string | null;
26
+ }
27
+
28
+ export class DesignDialogue {
29
+ constructor(private provider: AIProvider) {}
30
+
31
+ async run(
32
+ idea: string,
33
+ contextHints: { techStack: string[]; repoType: string; constitution?: string }
34
+ ): Promise<DesignChoice> {
35
+ console.log(chalk.blue("\n[1.5/6] Design options..."));
36
+ console.log(
37
+ chalk.gray(` Proposing architectural approaches with ${this.provider.providerName}/${this.provider.modelName}...`)
38
+ );
39
+
40
+ const prompt = buildDesignOptionsPrompt(idea, contextHints);
41
+ let optionsText: string;
42
+
43
+ try {
44
+ optionsText = await this.provider.generate(prompt, designOptionsSystemPrompt);
45
+ } catch (err) {
46
+ console.log(chalk.yellow(` โš  Design options failed (${(err as Error).message}), skipping.`));
47
+ return { optionsText: "", selectedApproach: null };
48
+ }
49
+
50
+ // Display the options
51
+ console.log(chalk.cyan("\n" + "โ”€".repeat(52)));
52
+ console.log(optionsText);
53
+ console.log(chalk.cyan("โ”€".repeat(52) + "\n"));
54
+
55
+ // Parse option labels from the text (Option A / B / C)
56
+ const optionMatches = [...optionsText.matchAll(/###\s+(Option\s+[A-C][^:\n]*)/gi)];
57
+ const parsedOptions = optionMatches.map((m) => m[1].trim());
58
+
59
+ // Build choices for the select prompt
60
+ const choices: Array<{ name: string; value: string }> = parsedOptions.map((label) => ({
61
+ name: label,
62
+ value: label,
63
+ }));
64
+
65
+ choices.push(
66
+ { name: "๐Ÿ”€ Blend โ€” let AI combine the best of all options", value: "__blend__" },
67
+ { name: "โญ๏ธ Skip โ€” proceed to spec without an architecture decision", value: "__skip__" }
68
+ );
69
+
70
+ const selected = await select({
71
+ message: "Which approach should the spec follow?",
72
+ choices,
73
+ });
74
+
75
+ if (selected === "__skip__") {
76
+ console.log(chalk.gray(" Architecture decision skipped โ€” spec will be generated freely."));
77
+ return { optionsText, selectedApproach: null };
78
+ }
79
+
80
+ if (selected === "__blend__") {
81
+ console.log(chalk.blue(" Blending approaches..."));
82
+ try {
83
+ const blendPrompt = `The developer wants to blend the best aspects of all options below.
84
+ Write a single-paragraph architectural decision that combines their strengths.
85
+ Output ONLY the blended approach description (2-4 sentences, no headers).
86
+
87
+ ${optionsText}`;
88
+ const blended = await this.provider.generate(
89
+ blendPrompt,
90
+ "You are a Senior Architect. Output only the blended architectural approach, 2-4 sentences."
91
+ );
92
+ const blendedApproach = `Blended approach: ${blended.trim()}`;
93
+ console.log(chalk.cyan(`\n Selected: ${blendedApproach.slice(0, 80)}...`));
94
+ return { optionsText, selectedApproach: blendedApproach };
95
+ } catch {
96
+ console.log(chalk.yellow(" Blend failed, proceeding without architecture decision."));
97
+ return { optionsText, selectedApproach: null };
98
+ }
99
+ }
100
+
101
+ // Find the full description of the selected option
102
+ const selectedIdx = parsedOptions.indexOf(selected);
103
+ let selectedApproach = selected;
104
+
105
+ if (selectedIdx !== -1 && selectedIdx < parsedOptions.length - 1) {
106
+ // Extract text between this option header and the next
107
+ const startMarker = `### ${parsedOptions[selectedIdx]}`;
108
+ const endMarker = selectedIdx + 1 < parsedOptions.length
109
+ ? `### ${parsedOptions[selectedIdx + 1]}`
110
+ : "---";
111
+ const start = optionsText.indexOf(startMarker);
112
+ const end = optionsText.indexOf(endMarker, start + 1);
113
+ if (start !== -1) {
114
+ const excerpt = end !== -1
115
+ ? optionsText.slice(start, end).trim()
116
+ : optionsText.slice(start).trim();
117
+ selectedApproach = excerpt.slice(0, 400); // cap to avoid bloating spec prompt
118
+ }
119
+ }
120
+
121
+ console.log(chalk.green(` โœ” Architecture decision locked: ${selected}`));
122
+ return { optionsText, selectedApproach };
123
+ }
124
+ }
@@ -128,7 +128,10 @@ export class DslExtractor {
128
128
  // Truncate very long specs to avoid token issues
129
129
  const specForAI =
130
130
  specContent.length > MAX_SPEC_CHARS
131
- ? specContent.slice(0, MAX_SPEC_CHARS) + "\n... (truncated for DSL extraction)"
131
+ ? (() => {
132
+ console.log(chalk.yellow(` โš  Spec is ${specContent.length} chars โ€” truncating to ${MAX_SPEC_CHARS} for DSL extraction. Details at the end may be lost.`));
133
+ return specContent.slice(0, MAX_SPEC_CHARS) + "\n... (truncated for DSL extraction)";
134
+ })()
132
135
  : specContent;
133
136
 
134
137
  let lastRawOutput = "";
@@ -165,6 +168,11 @@ export class DslExtractor {
165
168
  parsed = parseJsonFromOutput(rawOutput);
166
169
  } catch (parseErr) {
167
170
  console.log(chalk.red(` โœ˜ Failed to parse JSON from AI output: ${(parseErr as Error).message}`));
171
+ const preview = rawOutput.slice(0, 500).replace(/\n/g, "\\n");
172
+ console.log(chalk.gray(` AI output preview (first 500 chars): ${preview}`));
173
+ if (rawOutput.length > MAX_SPEC_CHARS) {
174
+ console.log(chalk.gray(` Note: spec was truncated to ${MAX_SPEC_CHARS} chars โ€” long specs may lose context`));
175
+ }
168
176
  lastErrors = [{ path: "root", message: "Output is not valid JSON โ€” see raw output above" }];
169
177
 
170
178
  if (attempt < MAX_RETRIES) continue;
@@ -69,10 +69,16 @@ export function assessDslRichness(dsl: SpecDSL): DslGap[] {
69
69
  }
70
70
 
71
71
  // โ”€โ”€ Endpoints with no error definitions (but spec text likely mentions them) โ”€โ”€
72
+ // Only flag when ALL endpoints lack error definitions โ€” if at least one has
73
+ // errors, the author is aware of the pattern and the rest may genuinely not
74
+ // need explicit error cases (e.g. simple GET endpoints).
72
75
  const endpointsWithoutErrors = dsl.endpoints.filter(
73
76
  (ep) => !ep.errors || ep.errors.length === 0
74
77
  );
75
- if (endpointsWithoutErrors.length > 0 && dsl.endpoints.length >= 2) {
78
+ if (
79
+ endpointsWithoutErrors.length === dsl.endpoints.length &&
80
+ dsl.endpoints.length >= 2
81
+ ) {
76
82
  gaps.push({
77
83
  code: "missing_errors",
78
84
  message: `${endpointsWithoutErrors.length}/${dsl.endpoints.length} endpoints have no error definitions`,
@@ -134,6 +140,11 @@ export interface StructuralFinding {
134
140
  * that indicate design-level issues in the Spec/DSL โ€” as opposed to
135
141
  * implementation-level issues that belong in ยง9 knowledge.
136
142
  *
143
+ * Primary path: parse the structured JSON block emitted by the updated
144
+ * reviewArchitectureSystemPrompt (## ๐Ÿ” ็ป“ๆž„ๆ€งๅ‘็Žฐ JSON section).
145
+ * Fallback: legacy regex approach for review texts generated before the
146
+ * structured output format was introduced.
147
+ *
137
148
  * Returns an empty array if no structural issues are found or if the
138
149
  * review score for Pass 1 is high (โ‰ฅ 8), indicating overall approval.
139
150
  */
@@ -147,9 +158,34 @@ export function extractStructuralFindings(reviewText: string): StructuralFinding
147
158
  const pass1Score = extractPassScore(pass1Text);
148
159
  if (pass1Score !== null && pass1Score >= 8) return [];
149
160
 
161
+ // โ”€โ”€ Primary path: parse structured JSON block โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
162
+ // Look for the JSON block within the "๐Ÿ” ็ป“ๆž„ๆ€งๅ‘็Žฐ JSON" section of Pass 1.
163
+ // The block is delimited by ```json ... ``` and always contains a
164
+ // { structuralFindings: [...] } object.
165
+ const jsonBlockMatch = pass1Text.match(/```json\s*(\{[\s\S]*?\})\s*```/);
166
+ if (jsonBlockMatch) {
167
+ try {
168
+ const parsed = JSON.parse(jsonBlockMatch[1]);
169
+ if (Array.isArray(parsed.structuralFindings)) {
170
+ return parsed.structuralFindings.filter(
171
+ (f: unknown): f is StructuralFinding =>
172
+ typeof f === "object" &&
173
+ f !== null &&
174
+ typeof (f as StructuralFinding).category === "string" &&
175
+ typeof (f as StructuralFinding).description === "string"
176
+ );
177
+ }
178
+ } catch {
179
+ // JSON parse failed โ€” fall through to regex fallback
180
+ }
181
+ }
182
+
183
+ // โ”€โ”€ Fallback: legacy regex approach โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
184
+ // Used when review text was generated before the structured JSON format
185
+ // was added to reviewArchitectureSystemPrompt.
150
186
  const findings: StructuralFinding[] = [];
151
187
 
152
- // โ”€โ”€ Auth / ่ฎค่ฏ design issues โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
188
+ // Auth / ่ฎค่ฏ design issues
153
189
  if (
154
190
  /็ผบๅฐ‘่ฎค่ฏ|missing auth|auth.*false|ๆœชๅŠ ่ฎค่ฏ|้‰ดๆƒ.*็ผบ|endpoint.*public.*should/i.test(pass1Text)
155
191
  ) {
@@ -160,7 +196,7 @@ export function extractStructuralFindings(reviewText: string): StructuralFinding
160
196
  });
161
197
  }
162
198
 
163
- // โ”€โ”€ API contract / ๆŽฅๅฃ่ฎพ่ฎก issues โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
199
+ // API contract / ๆŽฅๅฃ่ฎพ่ฎก issues
164
200
  if (
165
201
  /ๆŽฅๅฃ่ฎพ่ฎก.*้—ฎ้ข˜|ๆŽฅๅฃ.*ไธๅˆ็†|API design|response.*missing|request.*missing|ๆŽฅๅฃ.*็ผบๅฐ‘/i.test(pass1Text)
166
202
  ) {
@@ -171,7 +207,7 @@ export function extractStructuralFindings(reviewText: string): StructuralFinding
171
207
  });
172
208
  }
173
209
 
174
- // โ”€โ”€ Model / ๆ•ฐๆฎๆจกๅž‹ design issues โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
210
+ // Model / ๆ•ฐๆฎๆจกๅž‹ design issues
175
211
  if (
176
212
  /ๆจกๅž‹.*็ผบๅฐ‘ๅญ—ๆฎต|model.*missing field|ๆ•ฐๆฎ็ป“ๆž„.*้—ฎ้ข˜|schema.*incomplete|ๅญ—ๆฎต.*missing/i.test(pass1Text)
177
213
  ) {
@@ -182,7 +218,7 @@ export function extractStructuralFindings(reviewText: string): StructuralFinding
182
218
  });
183
219
  }
184
220
 
185
- // โ”€โ”€ Layer separation / ๅฑ‚็บงๅˆ†็ฆป violations โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
221
+ // Layer separation / ๅฑ‚็บงๅˆ†็ฆป violations
186
222
  if (
187
223
  /ๅฑ‚็บง.*่ฟๅ|layer.*violation|business logic.*controller|controller.*service.*ๆทท|ๅˆ†ๅฑ‚.*้—ฎ้ข˜/i.test(pass1Text)
188
224
  ) {