ai-spec-dev 0.31.0 → 0.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/.claude/commands/add-lesson.md +34 -0
  2. package/.claude/commands/check-layers.md +65 -0
  3. package/.claude/commands/installed-deps.md +35 -0
  4. package/.claude/commands/recall-lessons.md +40 -0
  5. package/.claude/commands/scan-singletons.md +45 -0
  6. package/.claude/commands/verify-imports.md +48 -0
  7. package/.claude/settings.local.json +15 -1
  8. package/README.md +531 -213
  9. package/RELEASE_LOG.md +460 -0
  10. package/cli/commands/config.ts +93 -0
  11. package/cli/commands/create.ts +1233 -0
  12. package/cli/commands/dashboard.ts +62 -0
  13. package/cli/commands/export.ts +66 -0
  14. package/cli/commands/init.ts +190 -0
  15. package/cli/commands/learn.ts +30 -0
  16. package/cli/commands/logs.ts +106 -0
  17. package/cli/commands/mock.ts +175 -0
  18. package/cli/commands/model.ts +156 -0
  19. package/cli/commands/restore.ts +22 -0
  20. package/cli/commands/review.ts +63 -0
  21. package/cli/commands/scan.ts +99 -0
  22. package/cli/commands/trend.ts +36 -0
  23. package/cli/commands/types.ts +69 -0
  24. package/cli/commands/update.ts +178 -0
  25. package/cli/commands/vcr.ts +70 -0
  26. package/cli/commands/workspace.ts +219 -0
  27. package/cli/index.ts +34 -2240
  28. package/cli/utils.ts +83 -0
  29. package/core/combined-generator.ts +13 -3
  30. package/core/dashboard-generator.ts +340 -0
  31. package/core/design-dialogue.ts +124 -0
  32. package/core/dsl-feedback.ts +285 -0
  33. package/core/error-feedback.ts +46 -2
  34. package/core/project-index.ts +301 -0
  35. package/core/reviewer.ts +84 -6
  36. package/core/run-logger.ts +109 -3
  37. package/core/run-trend.ts +261 -0
  38. package/core/self-evaluator.ts +139 -7
  39. package/core/spec-generator.ts +14 -8
  40. package/core/task-generator.ts +17 -0
  41. package/core/types-generator.ts +219 -0
  42. package/core/vcr.ts +210 -0
  43. package/dist/cli/index.js +6692 -4512
  44. package/dist/cli/index.js.map +1 -1
  45. package/dist/cli/index.mjs +6692 -4512
  46. package/dist/cli/index.mjs.map +1 -1
  47. package/dist/index.d.mts +19 -5
  48. package/dist/index.d.ts +19 -5
  49. package/dist/index.js +420 -224
  50. package/dist/index.js.map +1 -1
  51. package/dist/index.mjs +418 -224
  52. package/dist/index.mjs.map +1 -1
  53. package/docs-assets/purpose/architecture-overview.svg +64 -0
  54. package/docs-assets/purpose/create-pipeline.svg +113 -0
  55. package/docs-assets/purpose/task-layering.svg +74 -0
  56. package/package.json +6 -3
  57. package/prompts/codegen.prompt.ts +97 -9
  58. package/prompts/design.prompt.ts +59 -0
  59. package/prompts/spec.prompt.ts +8 -1
  60. package/prompts/tasks.prompt.ts +27 -2
  61. package/purpose.md +600 -174
  62. package/tests/dsl-extractor.test.ts +264 -0
  63. package/tests/dsl-feedback.test.ts +266 -0
  64. package/tests/dsl-validator.test.ts +283 -0
  65. package/tests/error-feedback.test.ts +292 -0
  66. package/tests/provider-utils.test.ts +173 -0
  67. package/tests/run-trend.test.ts +186 -0
  68. package/tests/self-evaluator.test.ts +339 -0
  69. package/tests/spec-assessor.test.ts +142 -0
  70. package/tests/task-generator.test.ts +230 -0
@@ -0,0 +1,173 @@
1
+ import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
2
+ import { ProviderError, withReliability } from "../core/provider-utils";
3
+
4
+ // ─── ProviderError ────────────────────────────────────────────────────────────
5
+
6
+ describe("ProviderError", () => {
7
+ it("sets name, kind, and message", () => {
8
+ const err = new ProviderError("bad key", "auth");
9
+ expect(err.name).toBe("ProviderError");
10
+ expect(err.kind).toBe("auth");
11
+ expect(err.message).toBe("bad key");
12
+ expect(err instanceof Error).toBe(true);
13
+ });
14
+
15
+ it("stores the original error", () => {
16
+ const original = new Error("root cause");
17
+ const err = new ProviderError("wrapped", "network", original);
18
+ expect(err.originalError).toBe(original);
19
+ });
20
+
21
+ it("originalError is undefined when not provided", () => {
22
+ const err = new ProviderError("msg", "timeout");
23
+ expect(err.originalError).toBeUndefined();
24
+ });
25
+
26
+ it("supports all error kinds", () => {
27
+ const kinds = ["auth", "rate_limit", "timeout", "network", "provider"] as const;
28
+ for (const kind of kinds) {
29
+ expect(new ProviderError("msg", kind).kind).toBe(kind);
30
+ }
31
+ });
32
+ });
33
+
34
+ // ─── withReliability — success ────────────────────────────────────────────────
35
+
36
+ describe("withReliability — success path", () => {
37
+ it("returns result immediately on first success", async () => {
38
+ const fn = vi.fn().mockResolvedValue("hello");
39
+ const result = await withReliability(fn, { timeoutMs: 5_000, retries: 0 });
40
+ expect(result).toBe("hello");
41
+ expect(fn).toHaveBeenCalledTimes(1);
42
+ });
43
+
44
+ it("passes through arbitrary return types", async () => {
45
+ const payload = { id: 1, data: [1, 2, 3] };
46
+ const fn = vi.fn().mockResolvedValue(payload);
47
+ const result = await withReliability(fn, { timeoutMs: 5_000, retries: 0 });
48
+ expect(result).toEqual(payload);
49
+ });
50
+ });
51
+
52
+ // ─── withReliability — error classification ───────────────────────────────────
53
+
54
+ describe("withReliability — error classification (no retries)", () => {
55
+ it("classifies 401 as auth", async () => {
56
+ const fn = vi.fn().mockRejectedValue(Object.assign(new Error("unauthorized"), { status: 401 }));
57
+ await expect(withReliability(fn, { retries: 0, timeoutMs: 5_000 })).rejects.toMatchObject({
58
+ kind: "auth",
59
+ });
60
+ });
61
+
62
+ it("classifies 403 as auth", async () => {
63
+ const fn = vi.fn().mockRejectedValue(Object.assign(new Error("forbidden"), { status: 403 }));
64
+ await expect(withReliability(fn, { retries: 0, timeoutMs: 5_000 })).rejects.toMatchObject({
65
+ kind: "auth",
66
+ });
67
+ });
68
+
69
+ it("classifies 429 as rate_limit", async () => {
70
+ const fn = vi.fn().mockRejectedValue(Object.assign(new Error("too many"), { status: 429 }));
71
+ await expect(withReliability(fn, { retries: 0, timeoutMs: 5_000 })).rejects.toMatchObject({
72
+ kind: "rate_limit",
73
+ });
74
+ });
75
+
76
+ it("classifies ECONNRESET as network", async () => {
77
+ const fn = vi.fn().mockRejectedValue(Object.assign(new Error("reset"), { code: "ECONNRESET" }));
78
+ await expect(withReliability(fn, { retries: 0, timeoutMs: 5_000 })).rejects.toMatchObject({
79
+ kind: "network",
80
+ });
81
+ });
82
+
83
+ it("classifies ENOTFOUND as network", async () => {
84
+ const fn = vi.fn().mockRejectedValue(Object.assign(new Error("not found"), { code: "ENOTFOUND" }));
85
+ await expect(withReliability(fn, { retries: 0, timeoutMs: 5_000 })).rejects.toMatchObject({
86
+ kind: "network",
87
+ });
88
+ });
89
+
90
+ it("classifies 500 as provider", async () => {
91
+ const fn = vi.fn().mockRejectedValue(Object.assign(new Error("server error"), { status: 500 }));
92
+ await expect(withReliability(fn, { retries: 0, timeoutMs: 5_000 })).rejects.toMatchObject({
93
+ kind: "provider",
94
+ });
95
+ });
96
+
97
+ it("throws ProviderError (not raw error)", async () => {
98
+ const fn = vi.fn().mockRejectedValue(new Error("raw"));
99
+ const thrown = await withReliability(fn, { retries: 0, timeoutMs: 5_000 }).catch((e) => e);
100
+ expect(thrown).toBeInstanceOf(ProviderError);
101
+ });
102
+ });
103
+
104
+ // ─── withReliability — timeout ────────────────────────────────────────────────
105
+
106
+ describe("withReliability — timeout", () => {
107
+ it("rejects with ProviderError when fn exceeds timeoutMs", async () => {
108
+ const fn = vi.fn().mockImplementation(() => new Promise((r) => setTimeout(r, 10_000)));
109
+ const err = await withReliability(fn, { retries: 0, timeoutMs: 30 }).catch((e) => e);
110
+ expect(err).toBeInstanceOf(ProviderError);
111
+ // timeout errors show up as "provider" or "timeout" kind
112
+ expect(["timeout", "provider"]).toContain(err.kind);
113
+ }, 3_000);
114
+ });
115
+
116
+ // ─── withReliability — retry behaviour ───────────────────────────────────────
117
+
118
+ describe("withReliability — retry behaviour", () => {
119
+ beforeEach(() => vi.useFakeTimers());
120
+ afterEach(() => vi.useRealTimers());
121
+
122
+ it("does NOT retry auth errors (401)", async () => {
123
+ const fn = vi.fn().mockRejectedValue(Object.assign(new Error("auth"), { status: 401 }));
124
+ const promise = withReliability(fn, { retries: 3, timeoutMs: 999_999 });
125
+ // Attach rejection handler BEFORE running timers to avoid unhandled rejection warning
126
+ const settled = promise.catch((e) => e);
127
+ await vi.runAllTimersAsync();
128
+ const err = await settled;
129
+ expect(err).toBeInstanceOf(ProviderError);
130
+ expect(fn).toHaveBeenCalledTimes(1);
131
+ });
132
+
133
+ it("retries on 500 and succeeds on second attempt", async () => {
134
+ const fn = vi.fn()
135
+ .mockRejectedValueOnce(Object.assign(new Error("server error"), { status: 500 }))
136
+ .mockResolvedValueOnce("recovered");
137
+ const promise = withReliability(fn, { retries: 1, timeoutMs: 999_999 });
138
+ await vi.runAllTimersAsync();
139
+ await expect(promise).resolves.toBe("recovered");
140
+ expect(fn).toHaveBeenCalledTimes(2);
141
+ });
142
+
143
+ it("retries on ECONNRESET and succeeds", async () => {
144
+ const fn = vi.fn()
145
+ .mockRejectedValueOnce(Object.assign(new Error("reset"), { code: "ECONNRESET" }))
146
+ .mockResolvedValueOnce("ok");
147
+ const promise = withReliability(fn, { retries: 1, timeoutMs: 999_999 });
148
+ await vi.runAllTimersAsync();
149
+ await expect(promise).resolves.toBe("ok");
150
+ expect(fn).toHaveBeenCalledTimes(2);
151
+ });
152
+
153
+ it("calls onRetry callback with attempt number", async () => {
154
+ const onRetry = vi.fn();
155
+ const fn = vi.fn()
156
+ .mockRejectedValueOnce(Object.assign(new Error("fail"), { status: 500 }))
157
+ .mockResolvedValueOnce("ok");
158
+ const promise = withReliability(fn, { retries: 1, timeoutMs: 999_999, onRetry });
159
+ await vi.runAllTimersAsync();
160
+ await promise;
161
+ expect(onRetry).toHaveBeenCalledWith(1, expect.any(Error));
162
+ });
163
+
164
+ it("exhausts all retries and throws", async () => {
165
+ const fn = vi.fn().mockRejectedValue(Object.assign(new Error("always fails"), { status: 500 }));
166
+ const promise = withReliability(fn, { retries: 2, timeoutMs: 999_999 });
167
+ const settled = promise.catch((e) => e);
168
+ await vi.runAllTimersAsync();
169
+ const err = await settled;
170
+ expect(err).toBeInstanceOf(ProviderError);
171
+ expect(fn).toHaveBeenCalledTimes(3); // 1 initial + 2 retries
172
+ });
173
+ });
@@ -0,0 +1,186 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { buildTrendReport } from "../core/run-trend";
3
+ import type { RunLog } from "../core/run-logger";
4
+
5
+ // ─── Fixtures ─────────────────────────────────────────────────────────────────
6
+
7
+ function makeLog(overrides: Partial<RunLog> = {}): RunLog {
8
+ return {
9
+ runId: `20260330-120000-aaaa`,
10
+ startedAt: "2026-03-30T12:00:00.000Z",
11
+ workingDir: "/project",
12
+ provider: "gemini",
13
+ model: "gemini-2.5-pro",
14
+ promptHash: "a3f2c1d8",
15
+ harnessScore: 7.5,
16
+ entries: [],
17
+ filesWritten: ["src/api/order.ts", "src/models/order.ts"],
18
+ errors: [],
19
+ endedAt: "2026-03-30T12:01:30.000Z",
20
+ totalDurationMs: 90000,
21
+ ...overrides,
22
+ };
23
+ }
24
+
25
+ // ─── buildTrendReport — basic shape ──────────────────────────────────────────
26
+
27
+ describe("buildTrendReport — basic shape", () => {
28
+ it("returns empty entries and groups for empty log list", () => {
29
+ const report = buildTrendReport([]);
30
+ expect(report.entries).toHaveLength(0);
31
+ expect(report.promptGroups).toHaveLength(0);
32
+ expect(report.totalRuns).toBe(0);
33
+ });
34
+
35
+ it("filters out logs without harnessScore (non-create runs)", () => {
36
+ const noScore = makeLog({ harnessScore: undefined });
37
+ const report = buildTrendReport([noScore]);
38
+ expect(report.entries).toHaveLength(0);
39
+ });
40
+
41
+ it("includes logs that have a harnessScore", () => {
42
+ const log = makeLog({ harnessScore: 8.2 });
43
+ const report = buildTrendReport([log]);
44
+ expect(report.entries).toHaveLength(1);
45
+ expect(report.entries[0].harnessScore).toBe(8.2);
46
+ });
47
+
48
+ it("maps log fields to entry correctly", () => {
49
+ const log = makeLog({
50
+ runId: "20260330-120000-test",
51
+ harnessScore: 7.0,
52
+ promptHash: "cafebabe",
53
+ specPath: "specs/feature-v1.md",
54
+ provider: "claude",
55
+ model: "claude-sonnet-4-6",
56
+ filesWritten: ["a.ts", "b.ts", "c.ts"],
57
+ errors: ["err1"],
58
+ totalDurationMs: 60000,
59
+ });
60
+ const { entries } = buildTrendReport([log]);
61
+ expect(entries[0].runId).toBe("20260330-120000-test");
62
+ expect(entries[0].harnessScore).toBe(7.0);
63
+ expect(entries[0].promptHash).toBe("cafebabe");
64
+ expect(entries[0].provider).toBe("claude");
65
+ expect(entries[0].filesWritten).toBe(3);
66
+ expect(entries[0].errors).toBe(1);
67
+ expect(entries[0].totalDurationMs).toBe(60000);
68
+ });
69
+ });
70
+
71
+ // ─── buildTrendReport — filtering ────────────────────────────────────────────
72
+
73
+ describe("buildTrendReport — last N filter", () => {
74
+ it("limits entries to last N", () => {
75
+ const logs = Array.from({ length: 10 }, (_, i) =>
76
+ makeLog({ runId: `run-${i}`, harnessScore: 5 + i * 0.3 })
77
+ );
78
+ const report = buildTrendReport(logs, { last: 3 });
79
+ expect(report.entries).toHaveLength(3);
80
+ });
81
+
82
+ it("returns all entries when last > total", () => {
83
+ const logs = [makeLog(), makeLog({ runId: "run-2" })];
84
+ const report = buildTrendReport(logs, { last: 100 });
85
+ expect(report.entries).toHaveLength(2);
86
+ });
87
+ });
88
+
89
+ describe("buildTrendReport — promptFilter", () => {
90
+ it("filters entries to matching prompt hash prefix", () => {
91
+ const logs = [
92
+ makeLog({ promptHash: "a3f2c1d8", harnessScore: 7 }),
93
+ makeLog({ runId: "run-2", promptHash: "b1e4a2f0", harnessScore: 8 }),
94
+ makeLog({ runId: "run-3", promptHash: "a3f2ffff", harnessScore: 6 }),
95
+ ];
96
+ const report = buildTrendReport(logs, { promptFilter: "a3f2" });
97
+ expect(report.entries).toHaveLength(2);
98
+ expect(report.entries.every((e) => e.promptHash?.startsWith("a3f2"))).toBe(true);
99
+ });
100
+
101
+ it("returns empty when prompt filter matches nothing", () => {
102
+ const logs = [makeLog({ promptHash: "a3f2c1d8" })];
103
+ const report = buildTrendReport(logs, { promptFilter: "zzzz" });
104
+ expect(report.entries).toHaveLength(0);
105
+ });
106
+ });
107
+
108
+ // ─── buildTrendReport — prompt groups ────────────────────────────────────────
109
+
110
+ describe("buildTrendReport — promptGroups aggregation", () => {
111
+ it("groups runs by promptHash", () => {
112
+ const logs = [
113
+ makeLog({ promptHash: "aaa", harnessScore: 7 }),
114
+ makeLog({ runId: "run-2", promptHash: "aaa", harnessScore: 9 }),
115
+ makeLog({ runId: "run-3", promptHash: "bbb", harnessScore: 6 }),
116
+ ];
117
+ const { promptGroups } = buildTrendReport(logs);
118
+ expect(promptGroups).toHaveLength(2);
119
+ const aaa = promptGroups.find((g) => g.promptHash === "aaa");
120
+ expect(aaa?.runs).toBe(2);
121
+ });
122
+
123
+ it("computes avg, best, worst correctly", () => {
124
+ const logs = [
125
+ makeLog({ promptHash: "aaa", harnessScore: 6 }),
126
+ makeLog({ runId: "run-2", promptHash: "aaa", harnessScore: 8 }),
127
+ makeLog({ runId: "run-3", promptHash: "aaa", harnessScore: 7 }),
128
+ ];
129
+ const { promptGroups } = buildTrendReport(logs);
130
+ const aaa = promptGroups.find((g) => g.promptHash === "aaa")!;
131
+ expect(aaa.best).toBe(8);
132
+ expect(aaa.worst).toBe(6);
133
+ expect(aaa.avg).toBeCloseTo(7.0, 1);
134
+ });
135
+
136
+ it("marks the most recently used promptHash as isCurrent", () => {
137
+ // Logs are already sorted newest-first by loadRunLogs; we pass them in order
138
+ const logs = [
139
+ makeLog({ runId: "newer", promptHash: "new-hash", startedAt: "2026-03-30T14:00:00.000Z", harnessScore: 7 }),
140
+ makeLog({ runId: "older", promptHash: "old-hash", startedAt: "2026-03-29T10:00:00.000Z", harnessScore: 6 }),
141
+ ];
142
+ const { promptGroups } = buildTrendReport(logs);
143
+ const current = promptGroups.find((g) => g.isCurrent);
144
+ expect(current?.promptHash).toBe("new-hash");
145
+ });
146
+
147
+ it("does NOT mark non-current groups as isCurrent", () => {
148
+ const logs = [
149
+ makeLog({ promptHash: "new", harnessScore: 7 }),
150
+ makeLog({ runId: "r2", promptHash: "old", harnessScore: 6 }),
151
+ ];
152
+ const { promptGroups } = buildTrendReport(logs);
153
+ const nonCurrent = promptGroups.filter((g) => !g.isCurrent);
154
+ expect(nonCurrent.every((g) => g.isCurrent === false)).toBe(true);
155
+ });
156
+
157
+ it("handles (none) group for runs without a promptHash", () => {
158
+ const log = makeLog({ promptHash: undefined, harnessScore: 5 });
159
+ const { promptGroups } = buildTrendReport([log]);
160
+ expect(promptGroups[0].promptHash).toBe("(none)");
161
+ });
162
+
163
+ it("sorts groups by lastSeen descending (most recent first)", () => {
164
+ const logs = [
165
+ makeLog({ promptHash: "aaa", startedAt: "2026-03-30T10:00:00.000Z", harnessScore: 7 }),
166
+ makeLog({ runId: "r2", promptHash: "bbb", startedAt: "2026-03-28T10:00:00.000Z", harnessScore: 6 }),
167
+ ];
168
+ const { promptGroups } = buildTrendReport(logs);
169
+ expect(promptGroups[0].promptHash).toBe("aaa"); // most recent first
170
+ expect(promptGroups[1].promptHash).toBe("bbb");
171
+ });
172
+ });
173
+
174
+ // ─── buildTrendReport — totalRuns ────────────────────────────────────────────
175
+
176
+ describe("buildTrendReport — totalRuns", () => {
177
+ it("totalRuns equals number of entries after filtering", () => {
178
+ const logs = [
179
+ makeLog({ harnessScore: 7 }),
180
+ makeLog({ runId: "r2", harnessScore: undefined }), // filtered out
181
+ makeLog({ runId: "r3", harnessScore: 6 }),
182
+ ];
183
+ const { totalRuns } = buildTrendReport(logs);
184
+ expect(totalRuns).toBe(2);
185
+ });
186
+ });