claude-mycelium 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. package/.claude/settings.local.json +14 -0
  2. package/README.md +304 -0
  3. package/dist/coordination/gradient-cache.d.ts +48 -0
  4. package/dist/coordination/gradient-cache.d.ts.map +1 -0
  5. package/dist/coordination/gradient-cache.js +145 -0
  6. package/dist/coordination/gradient-cache.js.map +1 -0
  7. package/dist/coordination/index.d.ts +10 -0
  8. package/dist/coordination/index.d.ts.map +1 -0
  9. package/dist/coordination/index.js +10 -0
  10. package/dist/coordination/index.js.map +1 -0
  11. package/dist/core/agent-executor.d.ts +31 -0
  12. package/dist/core/agent-executor.d.ts.map +1 -0
  13. package/dist/core/agent-executor.js +257 -0
  14. package/dist/core/agent-executor.js.map +1 -0
  15. package/dist/core/change-applier.d.ts +10 -0
  16. package/dist/core/change-applier.d.ts.map +1 -0
  17. package/dist/core/change-applier.js +32 -0
  18. package/dist/core/change-applier.js.map +1 -0
  19. package/dist/core/gradient.d.ts +60 -0
  20. package/dist/core/gradient.d.ts.map +1 -0
  21. package/dist/core/gradient.js +191 -0
  22. package/dist/core/gradient.js.map +1 -0
  23. package/dist/core/index.d.ts +24 -0
  24. package/dist/core/index.d.ts.map +1 -0
  25. package/dist/core/index.js +24 -0
  26. package/dist/core/index.js.map +1 -0
  27. package/dist/core/mode-selector.d.ts +44 -0
  28. package/dist/core/mode-selector.d.ts.map +1 -0
  29. package/dist/core/mode-selector.js +208 -0
  30. package/dist/core/mode-selector.js.map +1 -0
  31. package/dist/core/signals/centrality.d.ts +44 -0
  32. package/dist/core/signals/centrality.d.ts.map +1 -0
  33. package/dist/core/signals/centrality.js +264 -0
  34. package/dist/core/signals/centrality.js.map +1 -0
  35. package/dist/core/signals/churn.d.ts +41 -0
  36. package/dist/core/signals/churn.d.ts.map +1 -0
  37. package/dist/core/signals/churn.js +188 -0
  38. package/dist/core/signals/churn.js.map +1 -0
  39. package/dist/core/signals/complexity.d.ts +29 -0
  40. package/dist/core/signals/complexity.d.ts.map +1 -0
  41. package/dist/core/signals/complexity.js +169 -0
  42. package/dist/core/signals/complexity.js.map +1 -0
  43. package/dist/core/signals/debt.d.ts +27 -0
  44. package/dist/core/signals/debt.d.ts.map +1 -0
  45. package/dist/core/signals/debt.js +80 -0
  46. package/dist/core/signals/debt.js.map +1 -0
  47. package/dist/core/signals/errors.d.ts +32 -0
  48. package/dist/core/signals/errors.d.ts.map +1 -0
  49. package/dist/core/signals/errors.js +73 -0
  50. package/dist/core/signals/errors.js.map +1 -0
  51. package/dist/core/signals/index.d.ts +19 -0
  52. package/dist/core/signals/index.d.ts.map +1 -0
  53. package/dist/core/signals/index.js +19 -0
  54. package/dist/core/signals/index.js.map +1 -0
  55. package/dist/cost/cost-tracker.d.ts +90 -0
  56. package/dist/cost/cost-tracker.d.ts.map +1 -0
  57. package/dist/cost/cost-tracker.js +305 -0
  58. package/dist/cost/cost-tracker.js.map +1 -0
  59. package/dist/cost/index.d.ts +56 -0
  60. package/dist/cost/index.d.ts.map +1 -0
  61. package/dist/cost/index.js +111 -0
  62. package/dist/cost/index.js.map +1 -0
  63. package/dist/index.d.ts +35 -0
  64. package/dist/index.d.ts.map +1 -0
  65. package/dist/index.js +40 -0
  66. package/dist/index.js.map +1 -0
  67. package/dist/llm/anthropic-client.d.ts +52 -0
  68. package/dist/llm/anthropic-client.d.ts.map +1 -0
  69. package/dist/llm/anthropic-client.js +310 -0
  70. package/dist/llm/anthropic-client.js.map +1 -0
  71. package/dist/llm/index.d.ts +27 -0
  72. package/dist/llm/index.d.ts.map +1 -0
  73. package/dist/llm/index.js +34 -0
  74. package/dist/llm/index.js.map +1 -0
  75. package/dist/prompts/complexity-reducer.d.ts +7 -0
  76. package/dist/prompts/complexity-reducer.d.ts.map +1 -0
  77. package/dist/prompts/complexity-reducer.js +55 -0
  78. package/dist/prompts/complexity-reducer.js.map +1 -0
  79. package/dist/prompts/debt-payer.d.ts +7 -0
  80. package/dist/prompts/debt-payer.d.ts.map +1 -0
  81. package/dist/prompts/debt-payer.js +55 -0
  82. package/dist/prompts/debt-payer.js.map +1 -0
  83. package/dist/prompts/error-reducer.d.ts +7 -0
  84. package/dist/prompts/error-reducer.d.ts.map +1 -0
  85. package/dist/prompts/error-reducer.js +54 -0
  86. package/dist/prompts/error-reducer.js.map +1 -0
  87. package/dist/prompts/index.d.ts +22 -0
  88. package/dist/prompts/index.d.ts.map +1 -0
  89. package/dist/prompts/index.js +112 -0
  90. package/dist/prompts/index.js.map +1 -0
  91. package/dist/prompts/stabilizer.d.ts +7 -0
  92. package/dist/prompts/stabilizer.d.ts.map +1 -0
  93. package/dist/prompts/stabilizer.js +55 -0
  94. package/dist/prompts/stabilizer.js.map +1 -0
  95. package/dist/prompts/types.d.ts +14 -0
  96. package/dist/prompts/types.d.ts.map +1 -0
  97. package/dist/prompts/types.js +5 -0
  98. package/dist/prompts/types.js.map +1 -0
  99. package/dist/trace/index.d.ts +51 -0
  100. package/dist/trace/index.d.ts.map +1 -0
  101. package/dist/trace/index.js +60 -0
  102. package/dist/trace/index.js.map +1 -0
  103. package/dist/trace/trace-event.d.ts +72 -0
  104. package/dist/trace/trace-event.d.ts.map +1 -0
  105. package/dist/trace/trace-event.js +244 -0
  106. package/dist/trace/trace-event.js.map +1 -0
  107. package/dist/types/index.d.ts +206 -0
  108. package/dist/types/index.d.ts.map +1 -0
  109. package/dist/types/index.js +6 -0
  110. package/dist/types/index.js.map +1 -0
  111. package/dist/utils/ci-provider.d.ts +43 -0
  112. package/dist/utils/ci-provider.d.ts.map +1 -0
  113. package/dist/utils/ci-provider.js +130 -0
  114. package/dist/utils/ci-provider.js.map +1 -0
  115. package/dist/utils/config.d.ts +31 -0
  116. package/dist/utils/config.d.ts.map +1 -0
  117. package/dist/utils/config.js +85 -0
  118. package/dist/utils/config.js.map +1 -0
  119. package/dist/utils/error-provider.d.ts +51 -0
  120. package/dist/utils/error-provider.d.ts.map +1 -0
  121. package/dist/utils/error-provider.js +123 -0
  122. package/dist/utils/error-provider.js.map +1 -0
  123. package/dist/utils/file-utils.d.ts +18 -0
  124. package/dist/utils/file-utils.d.ts.map +1 -0
  125. package/dist/utils/file-utils.js +95 -0
  126. package/dist/utils/file-utils.js.map +1 -0
  127. package/dist/utils/index.d.ts +10 -0
  128. package/dist/utils/index.d.ts.map +1 -0
  129. package/dist/utils/index.js +10 -0
  130. package/dist/utils/index.js.map +1 -0
  131. package/dist/utils/logger.d.ts +36 -0
  132. package/dist/utils/logger.d.ts.map +1 -0
  133. package/dist/utils/logger.js +74 -0
  134. package/dist/utils/logger.js.map +1 -0
  135. package/docs/IMPLEMENTATION-STATUS.md +199 -0
  136. package/docs/PHASE-0-COMPLETE.md +252 -0
  137. package/docs/PHASE-1-COMPLETE.md +204 -0
  138. package/docs/PHASE-2-COMPLETE.md +233 -0
  139. package/docs/PHASE2_COMPLETION_CHECKLIST.md +290 -0
  140. package/docs/PHASE2_INTEGRATION_SUMMARY.md +255 -0
  141. package/docs/PHASE2_QUICK_REFERENCE.md +365 -0
  142. package/docs/PHASE2_TEST_RESULTS.md +282 -0
  143. package/docs/ROADMAP.md +746 -0
  144. package/docs/SNAPSHOT.md +376 -0
  145. package/docs/adrs/ADR-001-signal-computation.md +76 -0
  146. package/docs/adrs/ADR-002-inhibitor-signals.md +108 -0
  147. package/docs/adrs/ADR-003-llm-integration.md +156 -0
  148. package/docs/adrs/ADR-004-process-architecture.md +175 -0
  149. package/docs/adrs/ADR-005-testing-strategy.md +243 -0
  150. package/docs/pitch.md +94 -0
  151. package/docs/specs/fourth-spec.md +1973 -0
  152. package/docs/specs/initial-spec.md +2096 -0
  153. package/docs/specs/second-spec.md +2690 -0
  154. package/package.json +50 -0
  155. package/src/coordination/gradient-cache.ts +185 -0
  156. package/src/coordination/index.ts +10 -0
  157. package/src/core/agent-executor.ts +327 -0
  158. package/src/core/change-applier.ts +338 -0
  159. package/src/core/gradient.ts +258 -0
  160. package/src/core/index.ts +24 -0
  161. package/src/core/mode-selector.ts +243 -0
  162. package/src/core/signals/centrality.ts +328 -0
  163. package/src/core/signals/churn.ts +239 -0
  164. package/src/core/signals/complexity.ts +206 -0
  165. package/src/core/signals/debt.ts +111 -0
  166. package/src/core/signals/errors.ts +93 -0
  167. package/src/core/signals/index.ts +19 -0
  168. package/src/cost/cost-tracker.ts +410 -0
  169. package/src/cost/index.ts +143 -0
  170. package/src/index.ts +43 -0
  171. package/src/llm/anthropic-client.ts +415 -0
  172. package/src/llm/index.ts +43 -0
  173. package/src/prompts/complexity-reducer.ts +59 -0
  174. package/src/prompts/debt-payer.ts +59 -0
  175. package/src/prompts/error-reducer.ts +58 -0
  176. package/src/prompts/index.ts +128 -0
  177. package/src/prompts/stabilizer.ts +59 -0
  178. package/src/prompts/types.ts +15 -0
  179. package/src/trace/README.md +178 -0
  180. package/src/trace/index.ts +88 -0
  181. package/src/trace/trace-event.ts +324 -0
  182. package/src/types/index.ts +271 -0
  183. package/src/utils/ci-provider.ts +145 -0
  184. package/src/utils/config.ts +95 -0
  185. package/src/utils/error-provider.ts +138 -0
  186. package/src/utils/file-utils.ts +111 -0
  187. package/src/utils/index.ts +10 -0
  188. package/src/utils/logger.ts +94 -0
  189. package/test-8d713cc8-f4b7-403d-8153-57573172b94c.ts +3 -0
  190. package/tests/coordination/gradient-cache.test.ts +270 -0
  191. package/tests/core/agent-executor.test.ts +217 -0
  192. package/tests/core/change-applier.test.ts +336 -0
  193. package/tests/core/gradient.test.ts +263 -0
  194. package/tests/core/mode-selector.test.ts +239 -0
  195. package/tests/core/signals/centrality.test.ts +512 -0
  196. package/tests/core/signals/churn.test.ts +355 -0
  197. package/tests/core/signals/complexity.test.ts +284 -0
  198. package/tests/core/signals/debt.test.ts +437 -0
  199. package/tests/core/signals/errors.test.ts +350 -0
  200. package/tests/cost/cost-tracker.test.ts +475 -0
  201. package/tests/integration/phase2.test.ts +405 -0
  202. package/tests/llm/anthropic-client.test.ts +437 -0
  203. package/tests/prompts/prompts.test.ts +266 -0
  204. package/tests/trace/trace-event.test.ts +666 -0
  205. package/tests/utils/file-utils.test.ts +148 -0
  206. package/tsconfig.json +24 -0
  207. package/vitest.config.ts +28 -0
@@ -0,0 +1,270 @@
1
+ import { describe, it, expect, beforeEach, vi } from 'vitest';
2
+ import {
3
+ getCachedGradient,
4
+ setCachedGradient,
5
+ invalidateGradient,
6
+ invalidateGradientCache,
7
+ getGradientCacheStats,
8
+ getOrComputeGradient,
9
+ getOrComputeGradientBatch,
10
+ } from '../../src/coordination/gradient-cache';
11
+ import type { GradientScore } from '../../src/core/gradient';
12
+
13
+ describe('gradient cache', () => {
14
+ const createMockGradient = (file: string, score: number): GradientScore => ({
15
+ file,
16
+ score,
17
+ signals: {
18
+ complexity: 0.5,
19
+ churn: 0.3,
20
+ error_rate: 0.2,
21
+ debt: 0.4,
22
+ centrality: 0.6,
23
+ },
24
+ breakdown: {
25
+ baseScore: 0.4,
26
+ impactMultiplier: 0.75,
27
+ efficiencyPenalty: 0,
28
+ },
29
+ dominantSignal: {
30
+ name: 'complexity',
31
+ value: 0.5,
32
+ },
33
+ });
34
+
35
+ beforeEach(() => {
36
+ // Clear cache before each test
37
+ invalidateGradientCache();
38
+ });
39
+
40
+ describe('basic caching', () => {
41
+ it('returns null for uncached file', () => {
42
+ const result = getCachedGradient('test.ts');
43
+ expect(result).toBeNull();
44
+ });
45
+
46
+ it('caches and retrieves gradient', () => {
47
+ const gradient = createMockGradient('test.ts', 0.5);
48
+ setCachedGradient(gradient);
49
+
50
+ const cached = getCachedGradient('test.ts');
51
+ expect(cached).toEqual(gradient);
52
+ });
53
+
54
+ it('handles multiple files independently', () => {
55
+ const gradient1 = createMockGradient('file1.ts', 0.5);
56
+ const gradient2 = createMockGradient('file2.ts', 0.7);
57
+
58
+ setCachedGradient(gradient1);
59
+ setCachedGradient(gradient2);
60
+
61
+ expect(getCachedGradient('file1.ts')?.score).toBe(0.5);
62
+ expect(getCachedGradient('file2.ts')?.score).toBe(0.7);
63
+ });
64
+
65
+ it('overwrites existing cache entry', () => {
66
+ const gradient1 = createMockGradient('test.ts', 0.5);
67
+ const gradient2 = createMockGradient('test.ts', 0.8);
68
+
69
+ setCachedGradient(gradient1);
70
+ setCachedGradient(gradient2);
71
+
72
+ const cached = getCachedGradient('test.ts');
73
+ expect(cached?.score).toBe(0.8);
74
+ });
75
+ });
76
+
77
+ describe('cache expiration', () => {
78
+ it('expires cache after TTL', async () => {
79
+ const gradient = createMockGradient('test.ts', 0.5);
80
+ setCachedGradient(gradient);
81
+
82
+ // Mock time passing (5 minutes + 1 second)
83
+ vi.useFakeTimers();
84
+ vi.advanceTimersByTime(5 * 60 * 1000 + 1000);
85
+
86
+ const cached = getCachedGradient('test.ts');
87
+ expect(cached).toBeNull();
88
+
89
+ vi.useRealTimers();
90
+ });
91
+
92
+ it('does not expire before TTL', async () => {
93
+ const gradient = createMockGradient('test.ts', 0.5);
94
+ setCachedGradient(gradient);
95
+
96
+ // Mock time passing (4 minutes)
97
+ vi.useFakeTimers();
98
+ vi.advanceTimersByTime(4 * 60 * 1000);
99
+
100
+ const cached = getCachedGradient('test.ts');
101
+ expect(cached).not.toBeNull();
102
+ expect(cached?.score).toBe(0.5);
103
+
104
+ vi.useRealTimers();
105
+ });
106
+ });
107
+
108
+ describe('cache invalidation', () => {
109
+ it('invalidates specific file', () => {
110
+ const gradient1 = createMockGradient('file1.ts', 0.5);
111
+ const gradient2 = createMockGradient('file2.ts', 0.7);
112
+
113
+ setCachedGradient(gradient1);
114
+ setCachedGradient(gradient2);
115
+
116
+ invalidateGradient('file1.ts');
117
+
118
+ expect(getCachedGradient('file1.ts')).toBeNull();
119
+ expect(getCachedGradient('file2.ts')).not.toBeNull();
120
+ });
121
+
122
+ it('invalidates entire cache', () => {
123
+ const gradient1 = createMockGradient('file1.ts', 0.5);
124
+ const gradient2 = createMockGradient('file2.ts', 0.7);
125
+
126
+ setCachedGradient(gradient1);
127
+ setCachedGradient(gradient2);
128
+
129
+ invalidateGradientCache();
130
+
131
+ expect(getCachedGradient('file1.ts')).toBeNull();
132
+ expect(getCachedGradient('file2.ts')).toBeNull();
133
+ });
134
+
135
+ it('handles invalidating non-existent file', () => {
136
+ expect(() => invalidateGradient('nonexistent.ts')).not.toThrow();
137
+ });
138
+ });
139
+
140
+ describe('cache statistics', () => {
141
+ it('returns empty stats for empty cache', () => {
142
+ const stats = getGradientCacheStats();
143
+ expect(stats.size).toBe(0);
144
+ expect(stats.entries).toHaveLength(0);
145
+ });
146
+
147
+ it('returns stats for cached entries', () => {
148
+ const gradient1 = createMockGradient('file1.ts', 0.5);
149
+ const gradient2 = createMockGradient('file2.ts', 0.7);
150
+
151
+ setCachedGradient(gradient1);
152
+ setCachedGradient(gradient2);
153
+
154
+ const stats = getGradientCacheStats();
155
+ expect(stats.size).toBe(2);
156
+ expect(stats.entries).toHaveLength(2);
157
+ });
158
+
159
+ it('includes age in seconds', () => {
160
+ const gradient = createMockGradient('test.ts', 0.5);
161
+ setCachedGradient(gradient);
162
+
163
+ vi.useFakeTimers();
164
+ vi.advanceTimersByTime(30 * 1000); // 30 seconds
165
+
166
+ const stats = getGradientCacheStats();
167
+ expect(stats.entries[0].age).toBe(30);
168
+
169
+ vi.useRealTimers();
170
+ });
171
+
172
+ it('sorts entries by age (oldest first)', () => {
173
+ const gradient1 = createMockGradient('file1.ts', 0.5);
174
+ setCachedGradient(gradient1);
175
+
176
+ vi.useFakeTimers();
177
+ vi.advanceTimersByTime(10 * 1000);
178
+
179
+ const gradient2 = createMockGradient('file2.ts', 0.7);
180
+ setCachedGradient(gradient2);
181
+
182
+ const stats = getGradientCacheStats();
183
+ expect(stats.entries[0].age).toBeGreaterThan(stats.entries[1].age);
184
+
185
+ vi.useRealTimers();
186
+ });
187
+ });
188
+
189
+ describe('getOrComputeGradient', () => {
190
+ it('returns cached gradient if available', async () => {
191
+ const gradient = createMockGradient('test.ts', 0.5);
192
+ setCachedGradient(gradient);
193
+
194
+ const computeFn = vi.fn();
195
+ const result = await getOrComputeGradient('test.ts', computeFn);
196
+
197
+ expect(result).toEqual(gradient);
198
+ expect(computeFn).not.toHaveBeenCalled();
199
+ });
200
+
201
+ it('computes and caches if not cached', async () => {
202
+ const gradient = createMockGradient('test.ts', 0.5);
203
+ const computeFn = vi.fn().mockResolvedValue(gradient);
204
+
205
+ const result = await getOrComputeGradient('test.ts', computeFn);
206
+
207
+ expect(result).toEqual(gradient);
208
+ expect(computeFn).toHaveBeenCalledOnce();
209
+ expect(getCachedGradient('test.ts')).toEqual(gradient);
210
+ });
211
+ });
212
+
213
+ describe('getOrComputeGradientBatch', () => {
214
+ it('returns all cached if all available', async () => {
215
+ const gradient1 = createMockGradient('file1.ts', 0.5);
216
+ const gradient2 = createMockGradient('file2.ts', 0.7);
217
+
218
+ setCachedGradient(gradient1);
219
+ setCachedGradient(gradient2);
220
+
221
+ const computeBatchFn = vi.fn();
222
+ const results = await getOrComputeGradientBatch(
223
+ ['file1.ts', 'file2.ts'],
224
+ computeBatchFn
225
+ );
226
+
227
+ expect(results.size).toBe(2);
228
+ expect(computeBatchFn).not.toHaveBeenCalled();
229
+ });
230
+
231
+ it('computes only uncached files', async () => {
232
+ const gradient1 = createMockGradient('file1.ts', 0.5);
233
+ setCachedGradient(gradient1);
234
+
235
+ const gradient2 = createMockGradient('file2.ts', 0.7);
236
+ const computeBatchFn = vi
237
+ .fn()
238
+ .mockResolvedValue(new Map([['file2.ts', gradient2]]));
239
+
240
+ const results = await getOrComputeGradientBatch(
241
+ ['file1.ts', 'file2.ts'],
242
+ computeBatchFn
243
+ );
244
+
245
+ expect(results.size).toBe(2);
246
+ expect(computeBatchFn).toHaveBeenCalledWith(['file2.ts']);
247
+ expect(results.get('file1.ts')?.score).toBe(0.5);
248
+ expect(results.get('file2.ts')?.score).toBe(0.7);
249
+ });
250
+
251
+ it('caches newly computed gradients', async () => {
252
+ const gradient = createMockGradient('test.ts', 0.5);
253
+ const computeBatchFn = vi
254
+ .fn()
255
+ .mockResolvedValue(new Map([['test.ts', gradient]]));
256
+
257
+ await getOrComputeGradientBatch(['test.ts'], computeBatchFn);
258
+
259
+ expect(getCachedGradient('test.ts')).toEqual(gradient);
260
+ });
261
+
262
+ it('handles empty file list', async () => {
263
+ const computeBatchFn = vi.fn();
264
+ const results = await getOrComputeGradientBatch([], computeBatchFn);
265
+
266
+ expect(results.size).toBe(0);
267
+ expect(computeBatchFn).not.toHaveBeenCalled();
268
+ });
269
+ });
270
+ });
@@ -0,0 +1,217 @@
1
+ /**
2
+ * Agent Executor Tests
3
+ * Tests the main agent execution flow with mocked dependencies
4
+ */
5
+
6
+ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
7
+ import { executeAgent } from '../../src/core/agent-executor.js';
8
+ import type { Mode } from '../../src/types/index.js';
9
+ import { writeFile, readFile, unlink } from 'fs/promises';
10
+ import { randomUUID } from 'crypto';
11
+
12
+ // Test fixture file
13
+ const TEST_FILE = `test-${randomUUID()}.ts`;
14
+ const TEST_CONTENT = `function add(a: number, b: number): number {
15
+ return a + b;
16
+ }`;
17
+
18
+ describe('Agent Executor', () => {
19
+ beforeEach(async () => {
20
+ // Create test file
21
+ await writeFile(TEST_FILE, TEST_CONTENT, 'utf-8');
22
+
23
+ // Mock environment
24
+ process.env.ANTHROPIC_API_KEY = 'test-key';
25
+ });
26
+
27
+ afterEach(async () => {
28
+ // Clean up test file
29
+ try {
30
+ await unlink(TEST_FILE);
31
+ } catch {
32
+ // File may not exist
33
+ }
34
+ });
35
+
36
+ describe('Full execution flow', () => {
37
+ it.skip('should execute agent and return result', async () => {
38
+ // Mock LLM response
39
+ vi.mock('../../src/llm/anthropic-client.js', async (importOriginal) => {
40
+ const actual = await importOriginal();
41
+ return {
42
+ ...actual,
43
+ callLLM: vi.fn().mockResolvedValue({
44
+ content: `Here's the improved code:
45
+
46
+ \`\`\`typescript
47
+ function add(a: number, b: number): number {
48
+ // Validate inputs
49
+ if (typeof a !== 'number' || typeof b !== 'number') {
50
+ throw new TypeError('Arguments must be numbers');
51
+ }
52
+ return a + b;
53
+ }
54
+ \`\`\``,
55
+ usage: {
56
+ inputTokens: 100,
57
+ outputTokens: 150,
58
+ },
59
+ model: 'claude-sonnet-4-20250514',
60
+ }),
61
+ };
62
+ });
63
+
64
+ // Mock CI provider
65
+ vi.mock('../../src/utils/ci-provider.js', () => ({
66
+ ciProvider: {
67
+ run: vi.fn().mockResolvedValue({
68
+ passed: true,
69
+ output: 'All tests passed',
70
+ }),
71
+ },
72
+ }));
73
+
74
+ const result = await executeAgent(TEST_FILE, 'error_reducer', {
75
+ dryRun: false,
76
+ maxRetries: 1,
77
+ });
78
+
79
+ expect(result.success).toBe(true);
80
+ expect(result.changes).toContain(TEST_FILE);
81
+ expect(result.trace).toBeDefined();
82
+ expect(result.trace.mode).toBe('error_reducer');
83
+ expect(result.cost).toBeGreaterThan(0);
84
+ }, 30000); // 30 second timeout for LLM calls
85
+ });
86
+
87
+ describe('Dry run mode', () => {
88
+ it('should not modify files in dry run', async () => {
89
+ const contentBefore = await readFile(TEST_FILE, 'utf-8');
90
+
91
+ await executeAgent(TEST_FILE, 'complexity_reducer', {
92
+ dryRun: true,
93
+ });
94
+
95
+ const contentAfter = await readFile(TEST_FILE, 'utf-8');
96
+ expect(contentAfter).toBe(contentBefore);
97
+ });
98
+ });
99
+
100
+ describe('Error handling', () => {
101
+ it('should retry on LLM failure', async () => {
102
+ vi.mock('../../src/llm/anthropic-client.js', () => ({
103
+ callLLM: vi
104
+ .fn()
105
+ .mockRejectedValueOnce(new Error('API error'))
106
+ .mockResolvedValueOnce({
107
+ content: '```typescript\nfunction test() {}\n```',
108
+ tokensIn: 50,
109
+ tokensOut: 75,
110
+ model: 'claude-sonnet-4-20250514',
111
+ stopReason: 'end_turn',
112
+ }),
113
+ }));
114
+
115
+ const result = await executeAgent(TEST_FILE, 'debt_payer', {
116
+ maxRetries: 2,
117
+ dryRun: true,
118
+ });
119
+
120
+ expect(result.success).toBe(true);
121
+ });
122
+
123
+ it('should rollback on CI failure', async () => {
124
+ const contentBefore = await readFile(TEST_FILE, 'utf-8');
125
+
126
+ vi.mock('../../src/llm/anthropic-client.js', () => ({
127
+ callLLM: vi.fn().mockResolvedValue({
128
+ content: '```typescript\nfunction broken() { syntax error }\n```',
129
+ usage: {
130
+ inputTokens: 50,
131
+ outputTokens: 75,
132
+ },
133
+ model: 'claude-sonnet-4-20250514',
134
+ }),
135
+ }));
136
+
137
+ vi.mock('../../src/utils/ci-provider.js', () => ({
138
+ ciProvider: {
139
+ run: vi.fn().mockResolvedValue({
140
+ passed: false,
141
+ output: 'Syntax error',
142
+ }),
143
+ },
144
+ }));
145
+
146
+ await expect(
147
+ executeAgent(TEST_FILE, 'error_reducer', {
148
+ dryRun: false,
149
+ })
150
+ ).rejects.toThrow();
151
+
152
+ // File should be rolled back
153
+ const contentAfter = await readFile(TEST_FILE, 'utf-8');
154
+ expect(contentAfter).toBe(contentBefore);
155
+ });
156
+
157
+ it.skip('should handle invalid LLM response', async () => {
158
+ vi.mock('../../src/llm/anthropic-client.js', () => ({
159
+ callLLM: vi.fn().mockResolvedValue({
160
+ content: 'No code block here!',
161
+ usage: {
162
+ inputTokens: 50,
163
+ outputTokens: 20,
164
+ },
165
+ model: 'claude-sonnet-4-20250514',
166
+ stopReason: 'end_turn',
167
+ }),
168
+ }));
169
+
170
+ await expect(
171
+ executeAgent(TEST_FILE, 'error_reducer', {
172
+ dryRun: true,
173
+ })
174
+ ).rejects.toThrow('Failed to parse valid code changes');
175
+ });
176
+ });
177
+
178
+ describe('Cost calculation', () => {
179
+ it.skip('should calculate cost correctly', async () => {
180
+ vi.mock('../../src/llm/anthropic-client.js', () => ({
181
+ callLLM: vi.fn().mockResolvedValue({
182
+ content: '```typescript\nfunction test() {}\n```',
183
+ usage: {
184
+ inputTokens: 1000,
185
+ outputTokens: 500,
186
+ },
187
+ model: 'claude-sonnet-4-20250514',
188
+ stopReason: 'end_turn',
189
+ }),
190
+ }));
191
+
192
+ const result = await executeAgent(TEST_FILE, 'error_reducer', {
193
+ dryRun: true,
194
+ });
195
+
196
+ // Sonnet pricing: $3/MTok input, $15/MTok output
197
+ // (1000 * 3 + 500 * 15) / 1_000_000 = $0.0105
198
+ expect(result.cost).toBeCloseTo(0.0105, 4);
199
+ });
200
+ });
201
+
202
+ describe('Trace recording', () => {
203
+ it('should record trace with correct metrics', async () => {
204
+ const result = await executeAgent(TEST_FILE, 'error_reducer', {
205
+ dryRun: true,
206
+ });
207
+
208
+ expect(result.trace.file_path).toBe(TEST_FILE);
209
+ expect(result.trace.mode).toBe('error_reducer');
210
+ expect(result.trace.gradient_before).toBeGreaterThanOrEqual(0);
211
+ expect(result.trace.gradient_after).toBeGreaterThanOrEqual(0);
212
+ expect(result.trace.cost.tokens_in).toBeGreaterThan(0);
213
+ expect(result.trace.cost.tokens_out).toBeGreaterThan(0);
214
+ expect(result.trace.efficiency).toBeDefined();
215
+ });
216
+ });
217
+ });