@mainahq/core 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/README.md +31 -0
  2. package/package.json +37 -0
  3. package/src/ai/__tests__/ai.test.ts +207 -0
  4. package/src/ai/__tests__/design-approaches.test.ts +192 -0
  5. package/src/ai/__tests__/spec-questions.test.ts +191 -0
  6. package/src/ai/__tests__/tiers.test.ts +110 -0
  7. package/src/ai/commit-msg.ts +28 -0
  8. package/src/ai/design-approaches.ts +76 -0
  9. package/src/ai/index.ts +205 -0
  10. package/src/ai/pr-summary.ts +60 -0
  11. package/src/ai/spec-questions.ts +74 -0
  12. package/src/ai/tiers.ts +52 -0
  13. package/src/ai/try-generate.ts +89 -0
  14. package/src/ai/validate.ts +66 -0
  15. package/src/benchmark/__tests__/reporter.test.ts +525 -0
  16. package/src/benchmark/__tests__/runner.test.ts +113 -0
  17. package/src/benchmark/__tests__/story-loader.test.ts +152 -0
  18. package/src/benchmark/reporter.ts +332 -0
  19. package/src/benchmark/runner.ts +91 -0
  20. package/src/benchmark/story-loader.ts +88 -0
  21. package/src/benchmark/types.ts +95 -0
  22. package/src/cache/__tests__/keys.test.ts +97 -0
  23. package/src/cache/__tests__/manager.test.ts +312 -0
  24. package/src/cache/__tests__/ttl.test.ts +94 -0
  25. package/src/cache/keys.ts +44 -0
  26. package/src/cache/manager.ts +231 -0
  27. package/src/cache/ttl.ts +77 -0
  28. package/src/config/__tests__/config.test.ts +376 -0
  29. package/src/config/index.ts +198 -0
  30. package/src/context/__tests__/budget.test.ts +179 -0
  31. package/src/context/__tests__/engine.test.ts +163 -0
  32. package/src/context/__tests__/episodic.test.ts +291 -0
  33. package/src/context/__tests__/relevance.test.ts +323 -0
  34. package/src/context/__tests__/retrieval.test.ts +143 -0
  35. package/src/context/__tests__/selector.test.ts +174 -0
  36. package/src/context/__tests__/semantic.test.ts +252 -0
  37. package/src/context/__tests__/treesitter.test.ts +229 -0
  38. package/src/context/__tests__/working.test.ts +236 -0
  39. package/src/context/budget.ts +130 -0
  40. package/src/context/engine.ts +394 -0
  41. package/src/context/episodic.ts +251 -0
  42. package/src/context/relevance.ts +325 -0
  43. package/src/context/retrieval.ts +325 -0
  44. package/src/context/selector.ts +93 -0
  45. package/src/context/semantic.ts +331 -0
  46. package/src/context/treesitter.ts +216 -0
  47. package/src/context/working.ts +192 -0
  48. package/src/db/__tests__/db.test.ts +151 -0
  49. package/src/db/index.ts +211 -0
  50. package/src/db/schema.ts +84 -0
  51. package/src/design/__tests__/design.test.ts +310 -0
  52. package/src/design/__tests__/generate-hld-lld.test.ts +109 -0
  53. package/src/design/__tests__/review.test.ts +561 -0
  54. package/src/design/index.ts +297 -0
  55. package/src/design/review.ts +327 -0
  56. package/src/explain/__tests__/explain.test.ts +173 -0
  57. package/src/explain/index.ts +181 -0
  58. package/src/features/__tests__/analyzer.test.ts +358 -0
  59. package/src/features/__tests__/checklist.test.ts +454 -0
  60. package/src/features/__tests__/numbering.test.ts +319 -0
  61. package/src/features/__tests__/quality.test.ts +295 -0
  62. package/src/features/__tests__/traceability.test.ts +147 -0
  63. package/src/features/analyzer.ts +445 -0
  64. package/src/features/checklist.ts +366 -0
  65. package/src/features/index.ts +18 -0
  66. package/src/features/numbering.ts +404 -0
  67. package/src/features/quality.ts +349 -0
  68. package/src/features/test-stubs.ts +157 -0
  69. package/src/features/traceability.ts +260 -0
  70. package/src/feedback/__tests__/async-feedback.test.ts +52 -0
  71. package/src/feedback/__tests__/collector.test.ts +219 -0
  72. package/src/feedback/__tests__/compress.test.ts +150 -0
  73. package/src/feedback/__tests__/preferences.test.ts +169 -0
  74. package/src/feedback/collector.ts +135 -0
  75. package/src/feedback/compress.ts +92 -0
  76. package/src/feedback/preferences.ts +108 -0
  77. package/src/git/__tests__/git.test.ts +62 -0
  78. package/src/git/index.ts +110 -0
  79. package/src/hooks/__tests__/runner.test.ts +266 -0
  80. package/src/hooks/index.ts +8 -0
  81. package/src/hooks/runner.ts +130 -0
  82. package/src/index.ts +356 -0
  83. package/src/init/__tests__/init.test.ts +228 -0
  84. package/src/init/index.ts +364 -0
  85. package/src/language/__tests__/detect.test.ts +77 -0
  86. package/src/language/__tests__/profile.test.ts +51 -0
  87. package/src/language/detect.ts +70 -0
  88. package/src/language/profile.ts +110 -0
  89. package/src/prompts/__tests__/defaults.test.ts +52 -0
  90. package/src/prompts/__tests__/engine.test.ts +183 -0
  91. package/src/prompts/__tests__/evolution-resolve.test.ts +169 -0
  92. package/src/prompts/__tests__/evolution.test.ts +187 -0
  93. package/src/prompts/__tests__/loader.test.ts +105 -0
  94. package/src/prompts/candidates/review-v2.md +55 -0
  95. package/src/prompts/defaults/ai-review.md +49 -0
  96. package/src/prompts/defaults/commit.md +30 -0
  97. package/src/prompts/defaults/context.md +26 -0
  98. package/src/prompts/defaults/design-approaches.md +57 -0
  99. package/src/prompts/defaults/design-hld-lld.md +55 -0
  100. package/src/prompts/defaults/design.md +53 -0
  101. package/src/prompts/defaults/explain.md +31 -0
  102. package/src/prompts/defaults/fix.md +32 -0
  103. package/src/prompts/defaults/index.ts +38 -0
  104. package/src/prompts/defaults/review.md +41 -0
  105. package/src/prompts/defaults/spec-questions.md +59 -0
  106. package/src/prompts/defaults/tests.md +72 -0
  107. package/src/prompts/engine.ts +137 -0
  108. package/src/prompts/evolution.ts +409 -0
  109. package/src/prompts/loader.ts +71 -0
  110. package/src/review/__tests__/review.test.ts +288 -0
  111. package/src/review/comprehensive.ts +362 -0
  112. package/src/review/index.ts +417 -0
  113. package/src/stats/__tests__/tracker.test.ts +323 -0
  114. package/src/stats/index.ts +11 -0
  115. package/src/stats/tracker.ts +492 -0
  116. package/src/ticket/__tests__/ticket.test.ts +273 -0
  117. package/src/ticket/index.ts +185 -0
  118. package/src/utils.ts +87 -0
  119. package/src/verify/__tests__/ai-review.test.ts +242 -0
  120. package/src/verify/__tests__/coverage.test.ts +83 -0
  121. package/src/verify/__tests__/detect.test.ts +175 -0
  122. package/src/verify/__tests__/diff-filter.test.ts +338 -0
  123. package/src/verify/__tests__/fix.test.ts +478 -0
  124. package/src/verify/__tests__/linters/clippy.test.ts +45 -0
  125. package/src/verify/__tests__/linters/go-vet.test.ts +27 -0
  126. package/src/verify/__tests__/linters/ruff.test.ts +64 -0
  127. package/src/verify/__tests__/mutation.test.ts +141 -0
  128. package/src/verify/__tests__/pipeline.test.ts +553 -0
  129. package/src/verify/__tests__/proof.test.ts +97 -0
  130. package/src/verify/__tests__/secretlint.test.ts +190 -0
  131. package/src/verify/__tests__/semgrep.test.ts +217 -0
  132. package/src/verify/__tests__/slop.test.ts +366 -0
  133. package/src/verify/__tests__/sonar.test.ts +113 -0
  134. package/src/verify/__tests__/syntax-guard.test.ts +227 -0
  135. package/src/verify/__tests__/trivy.test.ts +191 -0
  136. package/src/verify/__tests__/visual.test.ts +139 -0
  137. package/src/verify/ai-review.ts +276 -0
  138. package/src/verify/coverage.ts +134 -0
  139. package/src/verify/detect.ts +171 -0
  140. package/src/verify/diff-filter.ts +183 -0
  141. package/src/verify/fix.ts +317 -0
  142. package/src/verify/linters/clippy.ts +52 -0
  143. package/src/verify/linters/go-vet.ts +32 -0
  144. package/src/verify/linters/ruff.ts +47 -0
  145. package/src/verify/mutation.ts +143 -0
  146. package/src/verify/pipeline.ts +328 -0
  147. package/src/verify/proof.ts +277 -0
  148. package/src/verify/secretlint.ts +168 -0
  149. package/src/verify/semgrep.ts +170 -0
  150. package/src/verify/slop.ts +493 -0
  151. package/src/verify/sonar.ts +146 -0
  152. package/src/verify/syntax-guard.ts +251 -0
  153. package/src/verify/trivy.ts +161 -0
  154. package/src/verify/visual.ts +460 -0
  155. package/src/workflow/__tests__/context.test.ts +110 -0
  156. package/src/workflow/context.ts +81 -0
@@ -0,0 +1,52 @@
1
+ import { describe, expect, test } from "bun:test";
2
+ import { loadDefault, type PromptTask } from "../defaults/index";
3
+
4
+ const ALL_TASKS: PromptTask[] = [
5
+ "review",
6
+ "commit",
7
+ "tests",
8
+ "fix",
9
+ "explain",
10
+ "design",
11
+ "context",
12
+ ];
13
+
14
+ describe("loadDefault", () => {
15
+ test("loadDefault('review') returns string containing '{{constitution}}'", async () => {
16
+ const template = await loadDefault("review");
17
+ expect(template).toContain("{{constitution}}");
18
+ });
19
+
20
+ test("loadDefault('commit') returns string containing '{{diff}}'", async () => {
21
+ const template = await loadDefault("commit");
22
+ expect(template).toContain("{{diff}}");
23
+ });
24
+
25
+ test("loadDefault('tests') returns string containing '{{plan}}'", async () => {
26
+ const template = await loadDefault("tests");
27
+ expect(template).toContain("{{plan}}");
28
+ });
29
+
30
+ test("every default prompt contains [NEEDS CLARIFICATION]", async () => {
31
+ for (const task of ALL_TASKS) {
32
+ const template = await loadDefault(task);
33
+ expect(template).toContain("[NEEDS CLARIFICATION");
34
+ }
35
+ });
36
+
37
+ test("loadDefault for all 7 task types returns non-empty strings", async () => {
38
+ for (const task of ALL_TASKS) {
39
+ const template = await loadDefault(task);
40
+ expect(typeof template).toBe("string");
41
+ expect(template.length).toBeGreaterThan(0);
42
+ }
43
+ });
44
+
45
+ test("loadDefault for unknown task returns fallback template", async () => {
46
+ // Cast to PromptTask to simulate unknown task
47
+ const template = await loadDefault("unknown" as PromptTask);
48
+ expect(typeof template).toBe("string");
49
+ expect(template.length).toBeGreaterThan(0);
50
+ expect(template).toContain("{{task}}");
51
+ });
52
+ });
@@ -0,0 +1,183 @@
1
+ import { afterEach, beforeEach, describe, expect, test } from "bun:test";
2
+ import { mkdirSync, writeFileSync } from "node:fs";
3
+ import { join } from "node:path";
4
+ import { hashContent } from "../../cache/keys";
5
+ import { getFeedbackDb } from "../../db/index";
6
+ import { buildSystemPrompt, getPromptStats, recordOutcome } from "../engine";
7
+
8
+ let tmpDir: string;
9
+
10
+ beforeEach(() => {
11
+ tmpDir = join(
12
+ import.meta.dir,
13
+ `tmp-engine-${Date.now()}-${Math.random().toString(36).slice(2)}`,
14
+ );
15
+ mkdirSync(tmpDir, { recursive: true });
16
+ });
17
+
18
+ afterEach(() => {
19
+ try {
20
+ const { rmSync } = require("node:fs");
21
+ rmSync(tmpDir, { recursive: true, force: true });
22
+ } catch {
23
+ // ignore
24
+ }
25
+ });
26
+
27
+ describe("buildSystemPrompt", () => {
28
+ test("includes constitution content when present", async () => {
29
+ const constitutionContent = "Always be concise.";
30
+ writeFileSync(join(tmpDir, "constitution.md"), constitutionContent);
31
+
32
+ // Use a user override with {{constitution}} to ensure test is independent
33
+ // of which default template loadDefault returns
34
+ const promptsDir = join(tmpDir, "prompts");
35
+ mkdirSync(promptsDir, { recursive: true });
36
+ writeFileSync(
37
+ join(promptsDir, "commit.md"),
38
+ "Task prompt.\n\n## Constitution\n{{constitution}}\n\n## Diff\n{{diff}}",
39
+ );
40
+
41
+ const result = await buildSystemPrompt("commit", tmpDir, {
42
+ diff: "some diff",
43
+ });
44
+
45
+ expect(result.prompt).toContain(constitutionContent);
46
+ });
47
+
48
+ test("replaces template variables", async () => {
49
+ // Use a user override with {{diff}} to ensure deterministic template
50
+ const promptsDir = join(tmpDir, "prompts");
51
+ mkdirSync(promptsDir, { recursive: true });
52
+ writeFileSync(
53
+ join(promptsDir, "commit.md"),
54
+ "Generate commit for:\n{{diff}}",
55
+ );
56
+
57
+ const result = await buildSystemPrompt("commit", tmpDir, {
58
+ diff: "my test diff content",
59
+ });
60
+
61
+ expect(result.prompt).toContain("my test diff content");
62
+ expect(result.prompt).not.toContain("{{diff}}");
63
+ });
64
+
65
+ test("returns consistent hash for same content", async () => {
66
+ // Use a user override so hash is deterministic regardless of default template
67
+ const promptsDir = join(tmpDir, "prompts");
68
+ mkdirSync(promptsDir, { recursive: true });
69
+ writeFileSync(join(promptsDir, "commit.md"), "Stable prompt: {{diff}}");
70
+
71
+ const context = { diff: "same diff" };
72
+
73
+ const result1 = await buildSystemPrompt("commit", tmpDir, context);
74
+ const result2 = await buildSystemPrompt("commit", tmpDir, context);
75
+
76
+ expect(result1.hash).toBe(result2.hash);
77
+ expect(result1.hash).toBe(hashContent(result1.prompt));
78
+ });
79
+
80
+ test("uses user override when available", async () => {
81
+ const promptsDir = join(tmpDir, "prompts");
82
+ mkdirSync(promptsDir, { recursive: true });
83
+ const customPrompt = "Custom commit prompt: {{diff}}";
84
+ writeFileSync(join(promptsDir, "commit.md"), customPrompt);
85
+
86
+ const result = await buildSystemPrompt("commit", tmpDir, {
87
+ diff: "override diff",
88
+ });
89
+
90
+ expect(result.prompt).toContain("Custom commit prompt:");
91
+ expect(result.prompt).toContain("override diff");
92
+ });
93
+ });
94
+
95
+ describe("recordOutcome", () => {
96
+ test("writes to feedback database", async () => {
97
+ const promptHash = "abc123hash";
98
+ const outcome = {
99
+ accepted: true,
100
+ command: "git commit -m 'feat: add feature'",
101
+ context: "some context",
102
+ };
103
+
104
+ recordOutcome(tmpDir, promptHash, outcome);
105
+
106
+ // verify by reading back from db
107
+ const dbResult = getFeedbackDb(tmpDir);
108
+ expect(dbResult.ok).toBe(true);
109
+ if (!dbResult.ok) return;
110
+
111
+ const { db } = dbResult.value;
112
+ const rows = db
113
+ .query("SELECT * FROM feedback WHERE prompt_hash = ?")
114
+ .all(promptHash) as Array<{
115
+ id: string;
116
+ prompt_hash: string;
117
+ command: string;
118
+ accepted: number;
119
+ context: string;
120
+ created_at: string;
121
+ }>;
122
+
123
+ expect(rows.length).toBe(1);
124
+ const row = rows[0];
125
+ expect(row).toBeDefined();
126
+ expect(row?.prompt_hash).toBe(promptHash);
127
+ expect(row?.command).toBe(outcome.command);
128
+ expect(row?.accepted).toBe(1);
129
+ expect(row?.context).toBe("some context");
130
+ expect(row?.created_at).toBeTruthy();
131
+ });
132
+
133
+ test("handles rejected outcome", async () => {
134
+ const promptHash = "rejected-hash";
135
+ const outcome = {
136
+ accepted: false,
137
+ command: "git commit -m 'bad commit'",
138
+ };
139
+
140
+ recordOutcome(tmpDir, promptHash, outcome);
141
+
142
+ const dbResult = getFeedbackDb(tmpDir);
143
+ expect(dbResult.ok).toBe(true);
144
+ if (!dbResult.ok) return;
145
+
146
+ const { db } = dbResult.value;
147
+ const rows = db
148
+ .query("SELECT * FROM feedback WHERE prompt_hash = ?")
149
+ .all(promptHash) as Array<{ accepted: number }>;
150
+
151
+ expect(rows.length).toBe(1);
152
+ expect(rows[0]?.accepted).toBe(0);
153
+ });
154
+ });
155
+
156
+ describe("getPromptStats", () => {
157
+ test("returns correct accept rates", async () => {
158
+ // record some outcomes
159
+ recordOutcome(tmpDir, "hash-A", { accepted: true, command: "cmd1" });
160
+ recordOutcome(tmpDir, "hash-A", { accepted: true, command: "cmd2" });
161
+ recordOutcome(tmpDir, "hash-A", { accepted: false, command: "cmd3" });
162
+ recordOutcome(tmpDir, "hash-B", { accepted: false, command: "cmd4" });
163
+
164
+ const stats = getPromptStats(tmpDir);
165
+
166
+ const statA = stats.find((s) => s.promptHash === "hash-A");
167
+ const statB = stats.find((s) => s.promptHash === "hash-B");
168
+
169
+ expect(statA).toBeDefined();
170
+ expect(statA?.totalUsage).toBe(3);
171
+ // 2 accepted out of 3
172
+ expect(statA?.acceptRate).toBeCloseTo(2 / 3, 5);
173
+
174
+ expect(statB).toBeDefined();
175
+ expect(statB?.totalUsage).toBe(1);
176
+ expect(statB?.acceptRate).toBe(0);
177
+ });
178
+
179
+ test("returns empty array when no feedback recorded", async () => {
180
+ const stats = getPromptStats(tmpDir);
181
+ expect(stats).toEqual([]);
182
+ });
183
+ });
@@ -0,0 +1,169 @@
1
+ import { afterEach, beforeEach, describe, expect, test } from "bun:test";
2
+ import { mkdirSync } from "node:fs";
3
+ import { join } from "node:path";
4
+ import { recordOutcome } from "../engine";
5
+ import { createCandidate, resolveABTests } from "../evolution";
6
+
7
+ let tmpDir: string;
8
+
9
+ beforeEach(() => {
10
+ tmpDir = join(
11
+ import.meta.dir,
12
+ `tmp-resolve-${Date.now()}-${Math.random().toString(36).slice(2)}`,
13
+ );
14
+ mkdirSync(tmpDir, { recursive: true });
15
+ });
16
+
17
+ afterEach(() => {
18
+ try {
19
+ const { rmSync } = require("node:fs");
20
+ rmSync(tmpDir, { recursive: true, force: true });
21
+ } catch {
22
+ // ignore
23
+ }
24
+ });
25
+
26
+ describe("resolveABTests", () => {
27
+ test("returns empty array when no candidates exist", () => {
28
+ const resolutions = resolveABTests(tmpDir);
29
+ expect(resolutions).toEqual([]);
30
+ });
31
+
32
+ test("promotes candidate that outperforms incumbent by >5%", () => {
33
+ // Create a candidate for "review"
34
+ const candidate = createCandidate(tmpDir, "review", "Better review prompt");
35
+
36
+ // Record 35 samples for the candidate hash: 30 accepted, 5 rejected (85.7% accept)
37
+ for (let i = 0; i < 30; i++) {
38
+ recordOutcome(tmpDir, candidate.hash, {
39
+ accepted: true,
40
+ command: "review",
41
+ });
42
+ }
43
+ for (let i = 0; i < 5; i++) {
44
+ recordOutcome(tmpDir, candidate.hash, {
45
+ accepted: false,
46
+ command: "review",
47
+ });
48
+ }
49
+
50
+ // Record 35 samples for the incumbent hash: 20 accepted, 15 rejected (57.1% accept)
51
+ for (let i = 0; i < 20; i++) {
52
+ recordOutcome(tmpDir, "incumbent-hash", {
53
+ accepted: true,
54
+ command: "review",
55
+ });
56
+ }
57
+ for (let i = 0; i < 15; i++) {
58
+ recordOutcome(tmpDir, "incumbent-hash", {
59
+ accepted: false,
60
+ command: "review",
61
+ });
62
+ }
63
+
64
+ const resolutions = resolveABTests(tmpDir);
65
+ expect(resolutions.length).toBe(1);
66
+ expect(resolutions[0]?.action).toBe("promoted");
67
+ expect(resolutions[0]?.task).toBe("review");
68
+ expect(resolutions[0]?.candidateAcceptRate).toBeGreaterThan(0.8);
69
+ });
70
+
71
+ test("retires candidate that underperforms incumbent by >5%", () => {
72
+ const candidate = createCandidate(tmpDir, "commit", "Worse commit prompt");
73
+
74
+ // Record 35 samples for the candidate: 10 accepted, 25 rejected (28.6% accept)
75
+ for (let i = 0; i < 10; i++) {
76
+ recordOutcome(tmpDir, candidate.hash, {
77
+ accepted: true,
78
+ command: "commit",
79
+ });
80
+ }
81
+ for (let i = 0; i < 25; i++) {
82
+ recordOutcome(tmpDir, candidate.hash, {
83
+ accepted: false,
84
+ command: "commit",
85
+ });
86
+ }
87
+
88
+ // Record 35 samples for the incumbent: 25 accepted, 10 rejected (71.4% accept)
89
+ for (let i = 0; i < 25; i++) {
90
+ recordOutcome(tmpDir, "incumbent-commit", {
91
+ accepted: true,
92
+ command: "commit",
93
+ });
94
+ }
95
+ for (let i = 0; i < 10; i++) {
96
+ recordOutcome(tmpDir, "incumbent-commit", {
97
+ accepted: false,
98
+ command: "commit",
99
+ });
100
+ }
101
+
102
+ const resolutions = resolveABTests(tmpDir);
103
+ expect(resolutions.length).toBe(1);
104
+ expect(resolutions[0]?.action).toBe("retired");
105
+ expect(resolutions[0]?.task).toBe("commit");
106
+ expect(resolutions[0]?.candidateAcceptRate).toBeLessThan(0.35);
107
+ });
108
+
109
+ test("continues testing with insufficient samples (<30)", () => {
110
+ const candidate = createCandidate(tmpDir, "fix", "Trial fix prompt");
111
+
112
+ // Only 10 samples for candidate
113
+ for (let i = 0; i < 8; i++) {
114
+ recordOutcome(tmpDir, candidate.hash, {
115
+ accepted: true,
116
+ command: "fix",
117
+ });
118
+ }
119
+ for (let i = 0; i < 2; i++) {
120
+ recordOutcome(tmpDir, candidate.hash, {
121
+ accepted: false,
122
+ command: "fix",
123
+ });
124
+ }
125
+
126
+ const resolutions = resolveABTests(tmpDir);
127
+ expect(resolutions.length).toBe(1);
128
+ expect(resolutions[0]?.action).toBe("continuing");
129
+ expect(resolutions[0]?.task).toBe("fix");
130
+ expect(resolutions[0]?.reason).toContain("sample");
131
+ });
132
+
133
+ test("continues testing when difference is within 5% margin", () => {
134
+ const candidate = createCandidate(tmpDir, "tests", "Similar tests prompt");
135
+
136
+ // Record 35 samples for candidate: 25 accepted, 10 rejected (71.4%)
137
+ for (let i = 0; i < 25; i++) {
138
+ recordOutcome(tmpDir, candidate.hash, {
139
+ accepted: true,
140
+ command: "tests",
141
+ });
142
+ }
143
+ for (let i = 0; i < 10; i++) {
144
+ recordOutcome(tmpDir, candidate.hash, {
145
+ accepted: false,
146
+ command: "tests",
147
+ });
148
+ }
149
+
150
+ // Record 35 samples for incumbent: 24 accepted, 11 rejected (68.6%)
151
+ for (let i = 0; i < 24; i++) {
152
+ recordOutcome(tmpDir, "incumbent-tests", {
153
+ accepted: true,
154
+ command: "tests",
155
+ });
156
+ }
157
+ for (let i = 0; i < 11; i++) {
158
+ recordOutcome(tmpDir, "incumbent-tests", {
159
+ accepted: false,
160
+ command: "tests",
161
+ });
162
+ }
163
+
164
+ const resolutions = resolveABTests(tmpDir);
165
+ expect(resolutions.length).toBe(1);
166
+ expect(resolutions[0]?.action).toBe("continuing");
167
+ expect(resolutions[0]?.reason).toContain("margin");
168
+ });
169
+ });
@@ -0,0 +1,187 @@
1
+ import { afterEach, beforeEach, describe, expect, it, test } from "bun:test";
2
+ import { mkdirSync } from "node:fs";
3
+ import { join } from "node:path";
4
+ import { recordOutcome } from "../engine";
5
+ import {
6
+ abTest,
7
+ analyseFeedback,
8
+ analyseWorkflowFeedback,
9
+ analyseWorkflowRuns,
10
+ createCandidate,
11
+ promote,
12
+ retire,
13
+ } from "../evolution";
14
+
15
+ let tmpDir: string;
16
+
17
+ beforeEach(() => {
18
+ tmpDir = join(
19
+ import.meta.dir,
20
+ `tmp-evolution-${Date.now()}-${Math.random().toString(36).slice(2)}`,
21
+ );
22
+ mkdirSync(tmpDir, { recursive: true });
23
+ });
24
+
25
+ afterEach(() => {
26
+ try {
27
+ const { rmSync } = require("node:fs");
28
+ rmSync(tmpDir, { recursive: true, force: true });
29
+ } catch {
30
+ // ignore
31
+ }
32
+ });
33
+
34
+ describe("analyseFeedback", () => {
35
+ test("returns low accept rate for task with many rejections", () => {
36
+ // Record 30 rejections and 5 accepts for "review"
37
+ for (let i = 0; i < 30; i++) {
38
+ recordOutcome(tmpDir, "review-hash", {
39
+ accepted: false,
40
+ command: "review",
41
+ });
42
+ }
43
+ for (let i = 0; i < 5; i++) {
44
+ recordOutcome(tmpDir, "review-hash", {
45
+ accepted: true,
46
+ command: "review",
47
+ });
48
+ }
49
+
50
+ const analysis = analyseFeedback(tmpDir, "review");
51
+ expect(analysis.totalSamples).toBe(35);
52
+ expect(analysis.acceptRate).toBeCloseTo(5 / 35, 5);
53
+ expect(analysis.needsImprovement).toBe(true);
54
+ });
55
+
56
+ test("returns high accept rate for well-performing task", () => {
57
+ for (let i = 0; i < 20; i++) {
58
+ recordOutcome(tmpDir, "commit-hash", {
59
+ accepted: true,
60
+ command: "commit",
61
+ });
62
+ }
63
+ for (let i = 0; i < 2; i++) {
64
+ recordOutcome(tmpDir, "commit-hash", {
65
+ accepted: false,
66
+ command: "commit",
67
+ });
68
+ }
69
+
70
+ const analysis = analyseFeedback(tmpDir, "commit");
71
+ expect(analysis.acceptRate).toBeCloseTo(20 / 22, 5);
72
+ expect(analysis.needsImprovement).toBe(false);
73
+ });
74
+
75
+ test("returns empty analysis for task with no feedback", () => {
76
+ const analysis = analyseFeedback(tmpDir, "explain");
77
+ expect(analysis.totalSamples).toBe(0);
78
+ expect(analysis.acceptRate).toBe(0);
79
+ expect(analysis.needsImprovement).toBe(false);
80
+ });
81
+ });
82
+
83
+ describe("createCandidate", () => {
84
+ test("stores candidate prompt version", () => {
85
+ const candidate = createCandidate(
86
+ tmpDir,
87
+ "review",
88
+ "Improved review prompt content",
89
+ );
90
+
91
+ expect(candidate.task).toBe("review");
92
+ expect(candidate.content).toBe("Improved review prompt content");
93
+ expect(candidate.status).toBe("candidate");
94
+ expect(candidate.hash).toBeTruthy();
95
+ });
96
+ });
97
+
98
+ describe("abTest", () => {
99
+ test("returns active prompt when no candidate exists", () => {
100
+ const result = abTest(tmpDir, "commit");
101
+ expect(result.variant).toBe("active");
102
+ });
103
+
104
+ test("returns candidate ~20% of time when candidate exists", () => {
105
+ createCandidate(tmpDir, "review", "Candidate review prompt");
106
+
107
+ let candidateCount = 0;
108
+ const trials = 1000;
109
+ for (let i = 0; i < trials; i++) {
110
+ const result = abTest(tmpDir, "review");
111
+ if (result.variant === "candidate") {
112
+ candidateCount++;
113
+ }
114
+ }
115
+
116
+ // Should be roughly 20% ± tolerance
117
+ const ratio = candidateCount / trials;
118
+ expect(ratio).toBeGreaterThan(0.1);
119
+ expect(ratio).toBeLessThan(0.35);
120
+ });
121
+ });
122
+
123
+ describe("promote", () => {
124
+ test("promotes candidate to active", () => {
125
+ const candidate = createCandidate(tmpDir, "review", "Better review prompt");
126
+ const promoted = promote(tmpDir, candidate.hash);
127
+
128
+ expect(promoted).toBe(true);
129
+
130
+ // After promotion, abTest should always return active (no candidate left)
131
+ const result = abTest(tmpDir, "review");
132
+ expect(result.variant).toBe("active");
133
+ });
134
+ });
135
+
136
+ describe("retire", () => {
137
+ test("retires candidate without promoting", () => {
138
+ const candidate = createCandidate(tmpDir, "review", "Bad review prompt");
139
+ const retired = retire(tmpDir, candidate.hash);
140
+
141
+ expect(retired).toBe(true);
142
+
143
+ // No candidate should exist
144
+ const result = abTest(tmpDir, "review");
145
+ expect(result.variant).toBe("active");
146
+ });
147
+ });
148
+
149
+ describe("analyseWorkflowFeedback", () => {
150
+ it("should return an array", () => {
151
+ const result = analyseWorkflowFeedback(tmpDir);
152
+ expect(Array.isArray(result)).toBe(true);
153
+ });
154
+
155
+ it("should have step and acceptRate fields when data exists", () => {
156
+ const result = analyseWorkflowFeedback(tmpDir);
157
+ for (const entry of result) {
158
+ expect(typeof entry.step).toBe("string");
159
+ expect(typeof entry.totalSamples).toBe("number");
160
+ expect(typeof entry.acceptRate).toBe("number");
161
+ expect(typeof entry.needsImprovement).toBe("boolean");
162
+ }
163
+ });
164
+ });
165
+
166
+ describe("analyseWorkflowRuns", () => {
167
+ it("should return an array", () => {
168
+ const result = analyseWorkflowRuns(tmpDir);
169
+ expect(Array.isArray(result)).toBe(true);
170
+ });
171
+
172
+ it("should respect limit parameter", () => {
173
+ const result = analyseWorkflowRuns(tmpDir, 3);
174
+ expect(result.length).toBeLessThanOrEqual(3);
175
+ });
176
+
177
+ it("should have correct fields when data exists", () => {
178
+ const result = analyseWorkflowRuns(tmpDir);
179
+ for (const entry of result) {
180
+ expect(typeof entry.workflowId).toBe("string");
181
+ expect(typeof entry.totalSteps).toBe("number");
182
+ expect(typeof entry.passedSteps).toBe("number");
183
+ expect(typeof entry.successRate).toBe("number");
184
+ expect(typeof entry.createdAt).toBe("string");
185
+ }
186
+ });
187
+ });
@@ -0,0 +1,105 @@
1
+ import { afterEach, beforeEach, describe, expect, test } from "bun:test";
2
+ import { mkdirSync, writeFileSync } from "node:fs";
3
+ import { join } from "node:path";
4
+ import {
5
+ loadConstitution,
6
+ loadUserOverride,
7
+ mergePrompts,
8
+ renderTemplate,
9
+ } from "../loader";
10
+
11
+ let tmpDir: string;
12
+
13
+ beforeEach(() => {
14
+ tmpDir = join(
15
+ import.meta.dir,
16
+ `tmp-loader-${Date.now()}-${Math.random().toString(36).slice(2)}`,
17
+ );
18
+ mkdirSync(tmpDir, { recursive: true });
19
+ });
20
+
21
+ afterEach(() => {
22
+ // cleanup is best-effort
23
+ try {
24
+ const { rmSync } = require("node:fs");
25
+ rmSync(tmpDir, { recursive: true, force: true });
26
+ } catch {
27
+ // ignore
28
+ }
29
+ });
30
+
31
+ describe("loadConstitution", () => {
32
+ test("returns content when file exists", async () => {
33
+ const constitutionContent = "# My Constitution\nBe helpful.";
34
+ writeFileSync(join(tmpDir, "constitution.md"), constitutionContent);
35
+
36
+ const result = await loadConstitution(tmpDir);
37
+ expect(result).toBe(constitutionContent);
38
+ });
39
+
40
+ test("returns empty string when file does not exist", async () => {
41
+ const result = await loadConstitution(tmpDir);
42
+ expect(result).toBe("");
43
+ });
44
+ });
45
+
46
+ describe("loadUserOverride", () => {
47
+ test("returns content when file exists", async () => {
48
+ const promptsDir = join(tmpDir, "prompts");
49
+ mkdirSync(promptsDir, { recursive: true });
50
+ const overrideContent = "# Custom Review Prompt\nReview carefully.";
51
+ writeFileSync(join(promptsDir, "review.md"), overrideContent);
52
+
53
+ const result = await loadUserOverride(tmpDir, "review");
54
+ expect(result).toBe(overrideContent);
55
+ });
56
+
57
+ test("returns null when file does not exist", async () => {
58
+ const result = await loadUserOverride(tmpDir, "review");
59
+ expect(result).toBeNull();
60
+ });
61
+ });
62
+
63
+ describe("mergePrompts", () => {
64
+ test("returns default when no override", () => {
65
+ const defaultPrompt = "Default prompt content";
66
+ const result = mergePrompts(defaultPrompt, null);
67
+ expect(result).toBe(defaultPrompt);
68
+ });
69
+
70
+ test("returns override when provided (user has full control)", () => {
71
+ const defaultPrompt = "Default prompt content";
72
+ const userOverride = "User override content";
73
+ const result = mergePrompts(defaultPrompt, userOverride);
74
+ expect(result).toBe(userOverride);
75
+ });
76
+ });
77
+
78
+ describe("renderTemplate", () => {
79
+ test("replaces {{variables}} with values", () => {
80
+ const template = "Hello {{name}}, your task is {{task}}.";
81
+ const variables = { name: "Alice", task: "review" };
82
+ const result = renderTemplate(template, variables);
83
+ expect(result).toBe("Hello Alice, your task is review.");
84
+ });
85
+
86
+ test("leaves unreplaced variables as-is", () => {
87
+ const template = "Hello {{name}}, your {{missing}} is here.";
88
+ const variables = { name: "Alice" };
89
+ const result = renderTemplate(template, variables);
90
+ expect(result).toBe("Hello Alice, your {{missing}} is here.");
91
+ });
92
+
93
+ test("replaces multiple occurrences of same variable", () => {
94
+ const template = "{{task}} is the task. Do {{task}} well.";
95
+ const variables = { task: "review" };
96
+ const result = renderTemplate(template, variables);
97
+ expect(result).toBe("review is the task. Do review well.");
98
+ });
99
+
100
+ test("handles empty variables object", () => {
101
+ const template = "Hello {{name}}.";
102
+ const result = renderTemplate(template, {});
103
+ expect(result).toBe("Hello {{name}}.");
104
+ });
105
+ });