rafcode 2.1.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/.claude/settings.local.json +4 -1
  2. package/CLAUDE.md +59 -11
  3. package/RAF/ahslfe-config-wizard/decisions.md +34 -0
  4. package/RAF/ahslfe-config-wizard/input.md +1 -0
  5. package/RAF/ahslfe-config-wizard/outcomes/01-define-config-schema.md +38 -0
  6. package/RAF/ahslfe-config-wizard/outcomes/02-refactor-codebase-to-use-config.md +67 -0
  7. package/RAF/ahslfe-config-wizard/outcomes/03-create-config-documentation.md +37 -0
  8. package/RAF/ahslfe-config-wizard/outcomes/04-implement-raf-config-command.md +47 -0
  9. package/RAF/ahslfe-config-wizard/outcomes/05-update-claude-md.md +26 -0
  10. package/RAF/ahslfe-config-wizard/plans/01-define-config-schema.md +73 -0
  11. package/RAF/ahslfe-config-wizard/plans/02-refactor-codebase-to-use-config.md +74 -0
  12. package/RAF/ahslfe-config-wizard/plans/03-create-config-documentation.md +57 -0
  13. package/RAF/ahslfe-config-wizard/plans/04-implement-raf-config-command.md +66 -0
  14. package/RAF/ahslfe-config-wizard/plans/05-update-claude-md.md +60 -0
  15. package/RAF/ahstvo-token-tracker/decisions.md +44 -0
  16. package/RAF/ahstvo-token-tracker/input.md +3 -0
  17. package/RAF/ahstvo-token-tracker/outcomes/01-full-model-id-support.md +43 -0
  18. package/RAF/ahstvo-token-tracker/outcomes/02-name-generation-no-session.md +33 -0
  19. package/RAF/ahstvo-token-tracker/outcomes/03-unify-stream-json-execution.md +48 -0
  20. package/RAF/ahstvo-token-tracker/outcomes/04-token-tracking-cost-calculation.md +53 -0
  21. package/RAF/ahstvo-token-tracker/outcomes/05-token-cost-console-reporting.md +57 -0
  22. package/RAF/ahstvo-token-tracker/outcomes/06-runtime-verbose-toggle.md +53 -0
  23. package/RAF/ahstvo-token-tracker/outcomes/07-readme-config-docs.md +36 -0
  24. package/RAF/ahstvo-token-tracker/plans/01-full-model-id-support.md +35 -0
  25. package/RAF/ahstvo-token-tracker/plans/02-name-generation-no-session.md +36 -0
  26. package/RAF/ahstvo-token-tracker/plans/03-unify-stream-json-execution.md +44 -0
  27. package/RAF/ahstvo-token-tracker/plans/04-token-tracking-cost-calculation.md +56 -0
  28. package/RAF/ahstvo-token-tracker/plans/05-token-cost-console-reporting.md +55 -0
  29. package/RAF/ahstvo-token-tracker/plans/06-runtime-verbose-toggle.md +48 -0
  30. package/RAF/ahstvo-token-tracker/plans/07-readme-config-docs.md +44 -0
  31. package/RAF/ahtahs-token-reaper/decisions.md +37 -0
  32. package/RAF/ahtahs-token-reaper/input.md +20 -0
  33. package/RAF/ahtahs-token-reaper/outcomes/01-extend-token-tracker-data-model.md +42 -0
  34. package/RAF/ahtahs-token-reaper/outcomes/02-accumulate-usage-in-retry-loop.md +31 -0
  35. package/RAF/ahtahs-token-reaper/outcomes/03-per-attempt-display-formatting.md +60 -0
  36. package/RAF/ahtahs-token-reaper/outcomes/04-add-model-name-to-claude-call-logs.md +57 -0
  37. package/RAF/ahtahs-token-reaper/outcomes/05-handle-invalid-config-in-raf-config.md +46 -0
  38. package/RAF/ahtahs-token-reaper/outcomes/06-fix-verbose-toggle-timer-display.md +38 -0
  39. package/RAF/ahtahs-token-reaper/plans/01-extend-token-tracker-data-model.md +36 -0
  40. package/RAF/ahtahs-token-reaper/plans/02-accumulate-usage-in-retry-loop.md +36 -0
  41. package/RAF/ahtahs-token-reaper/plans/03-per-attempt-display-formatting.md +43 -0
  42. package/RAF/ahtahs-token-reaper/plans/04-add-model-name-to-claude-call-logs.md +38 -0
  43. package/RAF/ahtahs-token-reaper/plans/05-handle-invalid-config-in-raf-config.md +36 -0
  44. package/RAF/ahtahs-token-reaper/plans/06-fix-verbose-toggle-timer-display.md +40 -0
  45. package/README.md +34 -0
  46. package/dist/commands/config.d.ts +3 -0
  47. package/dist/commands/config.d.ts.map +1 -0
  48. package/dist/commands/config.js +195 -0
  49. package/dist/commands/config.js.map +1 -0
  50. package/dist/commands/do.d.ts.map +1 -1
  51. package/dist/commands/do.js +55 -7
  52. package/dist/commands/do.js.map +1 -1
  53. package/dist/commands/plan.d.ts.map +1 -1
  54. package/dist/commands/plan.js +5 -3
  55. package/dist/commands/plan.js.map +1 -1
  56. package/dist/core/claude-runner.d.ts +19 -2
  57. package/dist/core/claude-runner.d.ts.map +1 -1
  58. package/dist/core/claude-runner.js +43 -96
  59. package/dist/core/claude-runner.js.map +1 -1
  60. package/dist/core/failure-analyzer.d.ts.map +1 -1
  61. package/dist/core/failure-analyzer.js +6 -3
  62. package/dist/core/failure-analyzer.js.map +1 -1
  63. package/dist/core/git.d.ts.map +1 -1
  64. package/dist/core/git.js +10 -3
  65. package/dist/core/git.js.map +1 -1
  66. package/dist/core/pull-request.d.ts +1 -1
  67. package/dist/core/pull-request.d.ts.map +1 -1
  68. package/dist/core/pull-request.js +9 -4
  69. package/dist/core/pull-request.js.map +1 -1
  70. package/dist/index.js +2 -0
  71. package/dist/index.js.map +1 -1
  72. package/dist/parsers/stream-renderer.d.ts +16 -1
  73. package/dist/parsers/stream-renderer.d.ts.map +1 -1
  74. package/dist/parsers/stream-renderer.js +34 -4
  75. package/dist/parsers/stream-renderer.js.map +1 -1
  76. package/dist/prompts/execution.d.ts.map +1 -1
  77. package/dist/prompts/execution.js +11 -1
  78. package/dist/prompts/execution.js.map +1 -1
  79. package/dist/types/config.d.ts +95 -4
  80. package/dist/types/config.d.ts.map +1 -1
  81. package/dist/types/config.js +63 -3
  82. package/dist/types/config.js.map +1 -1
  83. package/dist/utils/config.d.ts +65 -7
  84. package/dist/utils/config.d.ts.map +1 -1
  85. package/dist/utils/config.js +297 -21
  86. package/dist/utils/config.js.map +1 -1
  87. package/dist/utils/name-generator.d.ts +3 -7
  88. package/dist/utils/name-generator.d.ts.map +1 -1
  89. package/dist/utils/name-generator.js +75 -61
  90. package/dist/utils/name-generator.js.map +1 -1
  91. package/dist/utils/terminal-symbols.d.ts +25 -0
  92. package/dist/utils/terminal-symbols.d.ts.map +1 -1
  93. package/dist/utils/terminal-symbols.js +87 -0
  94. package/dist/utils/terminal-symbols.js.map +1 -1
  95. package/dist/utils/token-tracker.d.ts +55 -0
  96. package/dist/utils/token-tracker.d.ts.map +1 -0
  97. package/dist/utils/token-tracker.js +142 -0
  98. package/dist/utils/token-tracker.js.map +1 -0
  99. package/dist/utils/validation.d.ts +5 -5
  100. package/dist/utils/validation.d.ts.map +1 -1
  101. package/dist/utils/validation.js +10 -6
  102. package/dist/utils/validation.js.map +1 -1
  103. package/dist/utils/verbose-toggle.d.ts +33 -0
  104. package/dist/utils/verbose-toggle.d.ts.map +1 -0
  105. package/dist/utils/verbose-toggle.js +94 -0
  106. package/dist/utils/verbose-toggle.js.map +1 -0
  107. package/package.json +1 -1
  108. package/src/commands/config.ts +230 -0
  109. package/src/commands/do.ts +64 -6
  110. package/src/commands/plan.ts +5 -3
  111. package/src/core/claude-runner.ts +59 -115
  112. package/src/core/failure-analyzer.ts +6 -3
  113. package/src/core/git.ts +10 -3
  114. package/src/core/pull-request.ts +9 -4
  115. package/src/index.ts +2 -0
  116. package/src/parsers/stream-renderer.ts +54 -4
  117. package/src/prompts/config-docs.md +331 -0
  118. package/src/prompts/execution.ts +13 -1
  119. package/src/types/config.ts +156 -7
  120. package/src/utils/config.ts +357 -21
  121. package/src/utils/name-generator.ts +84 -71
  122. package/src/utils/terminal-symbols.ts +103 -0
  123. package/src/utils/token-tracker.ts +177 -0
  124. package/src/utils/validation.ts +15 -10
  125. package/src/utils/verbose-toggle.ts +103 -0
  126. package/tests/unit/claude-runner.test.ts +171 -7
  127. package/tests/unit/config-command.test.ts +242 -0
  128. package/tests/unit/config.test.ts +632 -30
  129. package/tests/unit/name-generator.test.ts +99 -75
  130. package/tests/unit/pull-request.test.ts +2 -0
  131. package/tests/unit/stream-renderer.test.ts +83 -0
  132. package/tests/unit/terminal-symbols.test.ts +245 -0
  133. package/tests/unit/timer-verbose-integration.test.ts +170 -0
  134. package/tests/unit/token-tracker.test.ts +685 -0
  135. package/tests/unit/verbose-toggle.test.ts +204 -0
@@ -0,0 +1,685 @@
1
+ import { TokenTracker, CostBreakdown, accumulateUsage } from '../../src/utils/token-tracker.js';
2
+ import { UsageData, PricingConfig, DEFAULT_CONFIG } from '../../src/types/config.js';
3
+
4
+ function makeUsage(overrides: Partial<UsageData> = {}): UsageData {
5
+ return {
6
+ inputTokens: 0,
7
+ outputTokens: 0,
8
+ cacheReadInputTokens: 0,
9
+ cacheCreationInputTokens: 0,
10
+ modelUsage: {},
11
+ ...overrides,
12
+ };
13
+ }
14
+
15
+ const testPricing: PricingConfig = DEFAULT_CONFIG.pricing;
16
+
17
+ describe('TokenTracker', () => {
18
+ describe('calculateCost', () => {
19
+ it('should calculate cost for opus model usage', () => {
20
+ const tracker = new TokenTracker(testPricing);
21
+ const usage = makeUsage({
22
+ inputTokens: 1_000_000,
23
+ outputTokens: 500_000,
24
+ cacheReadInputTokens: 200_000,
25
+ cacheCreationInputTokens: 100_000,
26
+ modelUsage: {
27
+ 'claude-opus-4-6': {
28
+ inputTokens: 1_000_000,
29
+ outputTokens: 500_000,
30
+ cacheReadInputTokens: 200_000,
31
+ cacheCreationInputTokens: 100_000,
32
+ },
33
+ },
34
+ });
35
+
36
+ const cost = tracker.calculateCost(usage);
37
+ expect(cost.inputCost).toBeCloseTo(15); // 1M * $15/MTok
38
+ expect(cost.outputCost).toBeCloseTo(37.5); // 0.5M * $75/MTok
39
+ expect(cost.cacheReadCost).toBeCloseTo(0.3); // 0.2M * $1.5/MTok
40
+ expect(cost.cacheCreateCost).toBeCloseTo(1.875); // 0.1M * $18.75/MTok
41
+ expect(cost.totalCost).toBeCloseTo(15 + 37.5 + 0.3 + 1.875);
42
+ });
43
+
44
+ it('should calculate cost for sonnet model usage', () => {
45
+ const tracker = new TokenTracker(testPricing);
46
+ const usage = makeUsage({
47
+ inputTokens: 1_000_000,
48
+ outputTokens: 1_000_000,
49
+ modelUsage: {
50
+ 'claude-sonnet-4-5-20250929': {
51
+ inputTokens: 1_000_000,
52
+ outputTokens: 1_000_000,
53
+ cacheReadInputTokens: 0,
54
+ cacheCreationInputTokens: 0,
55
+ },
56
+ },
57
+ });
58
+
59
+ const cost = tracker.calculateCost(usage);
60
+ expect(cost.inputCost).toBeCloseTo(3); // 1M * $3/MTok
61
+ expect(cost.outputCost).toBeCloseTo(15); // 1M * $15/MTok
62
+ expect(cost.totalCost).toBeCloseTo(18);
63
+ });
64
+
65
+ it('should calculate cost for haiku model usage', () => {
66
+ const tracker = new TokenTracker(testPricing);
67
+ const usage = makeUsage({
68
+ inputTokens: 2_000_000,
69
+ outputTokens: 1_000_000,
70
+ modelUsage: {
71
+ 'claude-haiku-4-5-20251001': {
72
+ inputTokens: 2_000_000,
73
+ outputTokens: 1_000_000,
74
+ cacheReadInputTokens: 0,
75
+ cacheCreationInputTokens: 0,
76
+ },
77
+ },
78
+ });
79
+
80
+ const cost = tracker.calculateCost(usage);
81
+ expect(cost.inputCost).toBeCloseTo(2); // 2M * $1/MTok
82
+ expect(cost.outputCost).toBeCloseTo(5); // 1M * $5/MTok
83
+ expect(cost.totalCost).toBeCloseTo(7);
84
+ });
85
+
86
+ it('should handle multi-model usage in a single task', () => {
87
+ const tracker = new TokenTracker(testPricing);
88
+ const usage = makeUsage({
89
+ inputTokens: 2_000_000,
90
+ outputTokens: 1_500_000,
91
+ modelUsage: {
92
+ 'claude-opus-4-6': {
93
+ inputTokens: 1_000_000,
94
+ outputTokens: 500_000,
95
+ cacheReadInputTokens: 0,
96
+ cacheCreationInputTokens: 0,
97
+ },
98
+ 'claude-haiku-4-5-20251001': {
99
+ inputTokens: 1_000_000,
100
+ outputTokens: 1_000_000,
101
+ cacheReadInputTokens: 0,
102
+ cacheCreationInputTokens: 0,
103
+ },
104
+ },
105
+ });
106
+
107
+ const cost = tracker.calculateCost(usage);
108
+ // Opus: 1M*$15 + 0.5M*$75 = $15 + $37.5
109
+ // Haiku: 1M*$1 + 1M*$5 = $1 + $5
110
+ expect(cost.inputCost).toBeCloseTo(16); // 15 + 1
111
+ expect(cost.outputCost).toBeCloseTo(42.5); // 37.5 + 5
112
+ expect(cost.totalCost).toBeCloseTo(58.5);
113
+ });
114
+
115
+ it('should fallback to sonnet pricing when no model breakdown', () => {
116
+ const tracker = new TokenTracker(testPricing);
117
+ const usage = makeUsage({
118
+ inputTokens: 1_000_000,
119
+ outputTokens: 1_000_000,
120
+ modelUsage: {},
121
+ });
122
+
123
+ const cost = tracker.calculateCost(usage);
124
+ expect(cost.inputCost).toBeCloseTo(3); // sonnet fallback
125
+ expect(cost.outputCost).toBeCloseTo(15);
126
+ expect(cost.totalCost).toBeCloseTo(18);
127
+ });
128
+
129
+ it('should fallback to sonnet pricing for unknown model families', () => {
130
+ const tracker = new TokenTracker(testPricing);
131
+ const usage = makeUsage({
132
+ inputTokens: 1_000_000,
133
+ outputTokens: 1_000_000,
134
+ modelUsage: {
135
+ 'claude-unknown-3-0': {
136
+ inputTokens: 1_000_000,
137
+ outputTokens: 1_000_000,
138
+ cacheReadInputTokens: 0,
139
+ cacheCreationInputTokens: 0,
140
+ },
141
+ },
142
+ });
143
+
144
+ const cost = tracker.calculateCost(usage);
145
+ expect(cost.inputCost).toBeCloseTo(3); // sonnet fallback
146
+ expect(cost.outputCost).toBeCloseTo(15);
147
+ });
148
+
149
+ it('should return zero cost for zero tokens', () => {
150
+ const tracker = new TokenTracker(testPricing);
151
+ const usage = makeUsage();
152
+ const cost = tracker.calculateCost(usage);
153
+ expect(cost.totalCost).toBe(0);
154
+ });
155
+
156
+ it('should apply cache read discount correctly', () => {
157
+ const tracker = new TokenTracker(testPricing);
158
+ const usage = makeUsage({
159
+ cacheReadInputTokens: 1_000_000,
160
+ modelUsage: {
161
+ 'claude-sonnet-4-5': {
162
+ inputTokens: 0,
163
+ outputTokens: 0,
164
+ cacheReadInputTokens: 1_000_000,
165
+ cacheCreationInputTokens: 0,
166
+ },
167
+ },
168
+ });
169
+
170
+ const cost = tracker.calculateCost(usage);
171
+ // Cache read: 1M * $0.30/MTok = $0.30 (90% off $3 input price)
172
+ expect(cost.cacheReadCost).toBeCloseTo(0.3);
173
+ expect(cost.totalCost).toBeCloseTo(0.3);
174
+ });
175
+ });
176
+
177
+ describe('addTask and accumulation', () => {
178
+ it('should accumulate usage across multiple tasks', () => {
179
+ const tracker = new TokenTracker(testPricing);
180
+
181
+ tracker.addTask('01', [makeUsage({
182
+ inputTokens: 500_000,
183
+ outputTokens: 200_000,
184
+ modelUsage: {
185
+ 'claude-opus-4-6': {
186
+ inputTokens: 500_000,
187
+ outputTokens: 200_000,
188
+ cacheReadInputTokens: 0,
189
+ cacheCreationInputTokens: 0,
190
+ },
191
+ },
192
+ })]);
193
+
194
+ tracker.addTask('02', [makeUsage({
195
+ inputTokens: 300_000,
196
+ outputTokens: 100_000,
197
+ modelUsage: {
198
+ 'claude-opus-4-6': {
199
+ inputTokens: 300_000,
200
+ outputTokens: 100_000,
201
+ cacheReadInputTokens: 0,
202
+ cacheCreationInputTokens: 0,
203
+ },
204
+ },
205
+ })]);
206
+
207
+ const totals = tracker.getTotals();
208
+ expect(totals.usage.inputTokens).toBe(800_000);
209
+ expect(totals.usage.outputTokens).toBe(300_000);
210
+ expect(totals.usage.modelUsage['claude-opus-4-6']?.inputTokens).toBe(800_000);
211
+ expect(totals.usage.modelUsage['claude-opus-4-6']?.outputTokens).toBe(300_000);
212
+ });
213
+
214
+ it('should accumulate costs across multiple tasks', () => {
215
+ const tracker = new TokenTracker(testPricing);
216
+
217
+ const entry1 = tracker.addTask('01', [makeUsage({
218
+ inputTokens: 1_000_000,
219
+ outputTokens: 1_000_000,
220
+ modelUsage: {
221
+ 'claude-sonnet-4-5': {
222
+ inputTokens: 1_000_000,
223
+ outputTokens: 1_000_000,
224
+ cacheReadInputTokens: 0,
225
+ cacheCreationInputTokens: 0,
226
+ },
227
+ },
228
+ })]);
229
+
230
+ const entry2 = tracker.addTask('02', [makeUsage({
231
+ inputTokens: 1_000_000,
232
+ outputTokens: 1_000_000,
233
+ modelUsage: {
234
+ 'claude-sonnet-4-5': {
235
+ inputTokens: 1_000_000,
236
+ outputTokens: 1_000_000,
237
+ cacheReadInputTokens: 0,
238
+ cacheCreationInputTokens: 0,
239
+ },
240
+ },
241
+ })]);
242
+
243
+ const totals = tracker.getTotals();
244
+ // Each task: $3 input + $15 output = $18
245
+ expect(entry1.cost.totalCost).toBeCloseTo(18);
246
+ expect(entry2.cost.totalCost).toBeCloseTo(18);
247
+ expect(totals.cost.totalCost).toBeCloseTo(36);
248
+ });
249
+
250
+ it('should accumulate multi-model usage across tasks', () => {
251
+ const tracker = new TokenTracker(testPricing);
252
+
253
+ tracker.addTask('01', [makeUsage({
254
+ inputTokens: 1_000_000,
255
+ outputTokens: 500_000,
256
+ modelUsage: {
257
+ 'claude-opus-4-6': {
258
+ inputTokens: 1_000_000,
259
+ outputTokens: 500_000,
260
+ cacheReadInputTokens: 0,
261
+ cacheCreationInputTokens: 0,
262
+ },
263
+ },
264
+ })]);
265
+
266
+ tracker.addTask('02', [makeUsage({
267
+ inputTokens: 500_000,
268
+ outputTokens: 200_000,
269
+ modelUsage: {
270
+ 'claude-haiku-4-5-20251001': {
271
+ inputTokens: 500_000,
272
+ outputTokens: 200_000,
273
+ cacheReadInputTokens: 0,
274
+ cacheCreationInputTokens: 0,
275
+ },
276
+ },
277
+ })]);
278
+
279
+ const totals = tracker.getTotals();
280
+ expect(totals.usage.modelUsage['claude-opus-4-6']?.inputTokens).toBe(1_000_000);
281
+ expect(totals.usage.modelUsage['claude-haiku-4-5-20251001']?.inputTokens).toBe(500_000);
282
+ });
283
+
284
+ it('should return empty totals when no tasks added', () => {
285
+ const tracker = new TokenTracker(testPricing);
286
+ const totals = tracker.getTotals();
287
+ expect(totals.usage.inputTokens).toBe(0);
288
+ expect(totals.usage.outputTokens).toBe(0);
289
+ expect(totals.cost.totalCost).toBe(0);
290
+ expect(Object.keys(totals.usage.modelUsage)).toHaveLength(0);
291
+ });
292
+
293
+ it('should return per-task entries', () => {
294
+ const tracker = new TokenTracker(testPricing);
295
+ tracker.addTask('01', [makeUsage({ inputTokens: 100 })]);
296
+ tracker.addTask('02', [makeUsage({ inputTokens: 200 })]);
297
+
298
+ const entries = tracker.getEntries();
299
+ expect(entries).toHaveLength(2);
300
+ expect(entries[0].taskId).toBe('01');
301
+ expect(entries[1].taskId).toBe('02');
302
+ });
303
+
304
+ it('addTask returns the entry with cost', () => {
305
+ const tracker = new TokenTracker(testPricing);
306
+ const entry = tracker.addTask('01', [makeUsage({
307
+ inputTokens: 1_000_000,
308
+ modelUsage: {
309
+ 'claude-opus-4-6': {
310
+ inputTokens: 1_000_000,
311
+ outputTokens: 0,
312
+ cacheReadInputTokens: 0,
313
+ cacheCreationInputTokens: 0,
314
+ },
315
+ },
316
+ })]);
317
+
318
+ expect(entry.taskId).toBe('01');
319
+ expect(entry.cost.inputCost).toBeCloseTo(15);
320
+ expect(entry.cost.totalCost).toBeCloseTo(15);
321
+ });
322
+
323
+ it('should store attempts array in entry', () => {
324
+ const tracker = new TokenTracker(testPricing);
325
+ const usage = makeUsage({ inputTokens: 100 });
326
+ const entry = tracker.addTask('01', [usage]);
327
+
328
+ expect(entry.attempts).toHaveLength(1);
329
+ expect(entry.attempts[0]).toEqual(usage);
330
+ });
331
+
332
+ it('should accumulate multiple attempts for a single task', () => {
333
+ const tracker = new TokenTracker(testPricing);
334
+ const attempt1 = makeUsage({
335
+ inputTokens: 500_000,
336
+ outputTokens: 100_000,
337
+ modelUsage: {
338
+ 'claude-opus-4-6': {
339
+ inputTokens: 500_000,
340
+ outputTokens: 100_000,
341
+ cacheReadInputTokens: 0,
342
+ cacheCreationInputTokens: 0,
343
+ },
344
+ },
345
+ });
346
+ const attempt2 = makeUsage({
347
+ inputTokens: 600_000,
348
+ outputTokens: 200_000,
349
+ modelUsage: {
350
+ 'claude-opus-4-6': {
351
+ inputTokens: 600_000,
352
+ outputTokens: 200_000,
353
+ cacheReadInputTokens: 0,
354
+ cacheCreationInputTokens: 0,
355
+ },
356
+ },
357
+ });
358
+
359
+ const entry = tracker.addTask('01', [attempt1, attempt2]);
360
+
361
+ expect(entry.usage.inputTokens).toBe(1_100_000);
362
+ expect(entry.usage.outputTokens).toBe(300_000);
363
+ expect(entry.usage.modelUsage['claude-opus-4-6']?.inputTokens).toBe(1_100_000);
364
+ expect(entry.attempts).toHaveLength(2);
365
+ });
366
+
367
+ it('should correctly accumulate multi-attempt costs', () => {
368
+ const tracker = new TokenTracker(testPricing);
369
+ const attempt1 = makeUsage({
370
+ inputTokens: 1_000_000,
371
+ modelUsage: {
372
+ 'claude-sonnet-4-5': {
373
+ inputTokens: 1_000_000,
374
+ outputTokens: 0,
375
+ cacheReadInputTokens: 0,
376
+ cacheCreationInputTokens: 0,
377
+ },
378
+ },
379
+ });
380
+ const attempt2 = makeUsage({
381
+ inputTokens: 1_000_000,
382
+ modelUsage: {
383
+ 'claude-sonnet-4-5': {
384
+ inputTokens: 1_000_000,
385
+ outputTokens: 0,
386
+ cacheReadInputTokens: 0,
387
+ cacheCreationInputTokens: 0,
388
+ },
389
+ },
390
+ });
391
+
392
+ const entry = tracker.addTask('01', [attempt1, attempt2]);
393
+
394
+ // 2M tokens * $3/MTok = $6
395
+ expect(entry.cost.inputCost).toBeCloseTo(6);
396
+ expect(entry.cost.totalCost).toBeCloseTo(6);
397
+ });
398
+ });
399
+
400
+ describe('accumulateUsage', () => {
401
+ it('should return empty usage for empty array', () => {
402
+ const result = accumulateUsage([]);
403
+ expect(result.inputTokens).toBe(0);
404
+ expect(result.outputTokens).toBe(0);
405
+ expect(result.cacheReadInputTokens).toBe(0);
406
+ expect(result.cacheCreationInputTokens).toBe(0);
407
+ expect(Object.keys(result.modelUsage)).toHaveLength(0);
408
+ });
409
+
410
+ it('should return same usage for single-element array', () => {
411
+ const usage = makeUsage({
412
+ inputTokens: 100,
413
+ outputTokens: 200,
414
+ cacheReadInputTokens: 50,
415
+ cacheCreationInputTokens: 25,
416
+ modelUsage: {
417
+ 'claude-opus-4-6': {
418
+ inputTokens: 100,
419
+ outputTokens: 200,
420
+ cacheReadInputTokens: 50,
421
+ cacheCreationInputTokens: 25,
422
+ },
423
+ },
424
+ });
425
+
426
+ const result = accumulateUsage([usage]);
427
+ expect(result.inputTokens).toBe(100);
428
+ expect(result.outputTokens).toBe(200);
429
+ expect(result.cacheReadInputTokens).toBe(50);
430
+ expect(result.cacheCreationInputTokens).toBe(25);
431
+ expect(result.modelUsage['claude-opus-4-6']?.inputTokens).toBe(100);
432
+ });
433
+
434
+ it('should sum all token fields across attempts', () => {
435
+ const attempt1 = makeUsage({
436
+ inputTokens: 100,
437
+ outputTokens: 50,
438
+ cacheReadInputTokens: 10,
439
+ cacheCreationInputTokens: 5,
440
+ });
441
+ const attempt2 = makeUsage({
442
+ inputTokens: 200,
443
+ outputTokens: 100,
444
+ cacheReadInputTokens: 20,
445
+ cacheCreationInputTokens: 10,
446
+ });
447
+
448
+ const result = accumulateUsage([attempt1, attempt2]);
449
+ expect(result.inputTokens).toBe(300);
450
+ expect(result.outputTokens).toBe(150);
451
+ expect(result.cacheReadInputTokens).toBe(30);
452
+ expect(result.cacheCreationInputTokens).toBe(15);
453
+ });
454
+
455
+ it('should merge modelUsage for same model across attempts', () => {
456
+ const attempt1 = makeUsage({
457
+ modelUsage: {
458
+ 'claude-opus-4-6': {
459
+ inputTokens: 100,
460
+ outputTokens: 50,
461
+ cacheReadInputTokens: 10,
462
+ cacheCreationInputTokens: 5,
463
+ },
464
+ },
465
+ });
466
+ const attempt2 = makeUsage({
467
+ modelUsage: {
468
+ 'claude-opus-4-6': {
469
+ inputTokens: 200,
470
+ outputTokens: 100,
471
+ cacheReadInputTokens: 20,
472
+ cacheCreationInputTokens: 10,
473
+ },
474
+ },
475
+ });
476
+
477
+ const result = accumulateUsage([attempt1, attempt2]);
478
+ expect(result.modelUsage['claude-opus-4-6']?.inputTokens).toBe(300);
479
+ expect(result.modelUsage['claude-opus-4-6']?.outputTokens).toBe(150);
480
+ expect(result.modelUsage['claude-opus-4-6']?.cacheReadInputTokens).toBe(30);
481
+ expect(result.modelUsage['claude-opus-4-6']?.cacheCreationInputTokens).toBe(15);
482
+ });
483
+
484
+ it('should handle different models across attempts', () => {
485
+ const attempt1 = makeUsage({
486
+ inputTokens: 100,
487
+ outputTokens: 50,
488
+ modelUsage: {
489
+ 'claude-opus-4-6': {
490
+ inputTokens: 100,
491
+ outputTokens: 50,
492
+ cacheReadInputTokens: 0,
493
+ cacheCreationInputTokens: 0,
494
+ },
495
+ },
496
+ });
497
+ const attempt2 = makeUsage({
498
+ inputTokens: 200,
499
+ outputTokens: 100,
500
+ modelUsage: {
501
+ 'claude-sonnet-4-5': {
502
+ inputTokens: 200,
503
+ outputTokens: 100,
504
+ cacheReadInputTokens: 0,
505
+ cacheCreationInputTokens: 0,
506
+ },
507
+ },
508
+ });
509
+
510
+ const result = accumulateUsage([attempt1, attempt2]);
511
+ expect(result.inputTokens).toBe(300);
512
+ expect(result.outputTokens).toBe(150);
513
+ expect(result.modelUsage['claude-opus-4-6']?.inputTokens).toBe(100);
514
+ expect(result.modelUsage['claude-sonnet-4-5']?.inputTokens).toBe(200);
515
+ expect(Object.keys(result.modelUsage)).toHaveLength(2);
516
+ });
517
+
518
+ it('should handle mixed model usage across attempts', () => {
519
+ const attempt1 = makeUsage({
520
+ inputTokens: 300,
521
+ outputTokens: 150,
522
+ modelUsage: {
523
+ 'claude-opus-4-6': {
524
+ inputTokens: 200,
525
+ outputTokens: 100,
526
+ cacheReadInputTokens: 0,
527
+ cacheCreationInputTokens: 0,
528
+ },
529
+ 'claude-haiku-4-5': {
530
+ inputTokens: 100,
531
+ outputTokens: 50,
532
+ cacheReadInputTokens: 0,
533
+ cacheCreationInputTokens: 0,
534
+ },
535
+ },
536
+ });
537
+ const attempt2 = makeUsage({
538
+ inputTokens: 400,
539
+ outputTokens: 200,
540
+ modelUsage: {
541
+ 'claude-opus-4-6': {
542
+ inputTokens: 100,
543
+ outputTokens: 50,
544
+ cacheReadInputTokens: 0,
545
+ cacheCreationInputTokens: 0,
546
+ },
547
+ 'claude-sonnet-4-5': {
548
+ inputTokens: 300,
549
+ outputTokens: 150,
550
+ cacheReadInputTokens: 0,
551
+ cacheCreationInputTokens: 0,
552
+ },
553
+ },
554
+ });
555
+
556
+ const result = accumulateUsage([attempt1, attempt2]);
557
+ expect(result.inputTokens).toBe(700);
558
+ expect(result.outputTokens).toBe(350);
559
+ // Opus: 200 + 100 = 300
560
+ expect(result.modelUsage['claude-opus-4-6']?.inputTokens).toBe(300);
561
+ // Haiku: only from attempt1
562
+ expect(result.modelUsage['claude-haiku-4-5']?.inputTokens).toBe(100);
563
+ // Sonnet: only from attempt2
564
+ expect(result.modelUsage['claude-sonnet-4-5']?.inputTokens).toBe(300);
565
+ });
566
+
567
+ it('should not mutate input objects', () => {
568
+ const attempt1 = makeUsage({
569
+ inputTokens: 100,
570
+ modelUsage: {
571
+ 'claude-opus-4-6': {
572
+ inputTokens: 100,
573
+ outputTokens: 0,
574
+ cacheReadInputTokens: 0,
575
+ cacheCreationInputTokens: 0,
576
+ },
577
+ },
578
+ });
579
+ const attempt2 = makeUsage({
580
+ inputTokens: 200,
581
+ modelUsage: {
582
+ 'claude-opus-4-6': {
583
+ inputTokens: 200,
584
+ outputTokens: 0,
585
+ cacheReadInputTokens: 0,
586
+ cacheCreationInputTokens: 0,
587
+ },
588
+ },
589
+ });
590
+
591
+ accumulateUsage([attempt1, attempt2]);
592
+
593
+ expect(attempt1.inputTokens).toBe(100);
594
+ expect(attempt1.modelUsage['claude-opus-4-6']?.inputTokens).toBe(100);
595
+ expect(attempt2.inputTokens).toBe(200);
596
+ });
597
+ });
598
+
599
+ describe('multi-attempt cost calculation', () => {
600
+ it('should calculate correct cost when retry uses different model', () => {
601
+ const tracker = new TokenTracker(testPricing);
602
+ // Attempt 1: Opus, Attempt 2: Sonnet (fallback)
603
+ const attempt1 = makeUsage({
604
+ inputTokens: 1_000_000,
605
+ outputTokens: 500_000,
606
+ modelUsage: {
607
+ 'claude-opus-4-6': {
608
+ inputTokens: 1_000_000,
609
+ outputTokens: 500_000,
610
+ cacheReadInputTokens: 0,
611
+ cacheCreationInputTokens: 0,
612
+ },
613
+ },
614
+ });
615
+ const attempt2 = makeUsage({
616
+ inputTokens: 1_000_000,
617
+ outputTokens: 1_000_000,
618
+ modelUsage: {
619
+ 'claude-sonnet-4-5': {
620
+ inputTokens: 1_000_000,
621
+ outputTokens: 1_000_000,
622
+ cacheReadInputTokens: 0,
623
+ cacheCreationInputTokens: 0,
624
+ },
625
+ },
626
+ });
627
+
628
+ const entry = tracker.addTask('01', [attempt1, attempt2]);
629
+
630
+ // Opus: 1M*$15 + 0.5M*$75 = $15 + $37.5 = $52.5
631
+ // Sonnet: 1M*$3 + 1M*$15 = $3 + $15 = $18
632
+ // Total: $52.5 + $18 = $70.5
633
+ expect(entry.cost.inputCost).toBeCloseTo(18); // 15 + 3
634
+ expect(entry.cost.outputCost).toBeCloseTo(52.5); // 37.5 + 15
635
+ expect(entry.cost.totalCost).toBeCloseTo(70.5);
636
+ });
637
+
638
+ it('should include all attempt usage in grand totals', () => {
639
+ const tracker = new TokenTracker(testPricing);
640
+
641
+ // Task 1: 2 attempts
642
+ tracker.addTask('01', [
643
+ makeUsage({ inputTokens: 500_000 }),
644
+ makeUsage({ inputTokens: 500_000 }),
645
+ ]);
646
+
647
+ // Task 2: 1 attempt
648
+ tracker.addTask('02', [
649
+ makeUsage({ inputTokens: 1_000_000 }),
650
+ ]);
651
+
652
+ const totals = tracker.getTotals();
653
+ expect(totals.usage.inputTokens).toBe(2_000_000);
654
+ });
655
+ });
656
+
657
+ describe('custom pricing', () => {
658
+ it('should use custom pricing config', () => {
659
+ const customPricing: PricingConfig = {
660
+ opus: { inputPerMTok: 10, outputPerMTok: 50, cacheReadPerMTok: 1, cacheCreatePerMTok: 12.5 },
661
+ sonnet: { inputPerMTok: 2, outputPerMTok: 10, cacheReadPerMTok: 0.2, cacheCreatePerMTok: 2.5 },
662
+ haiku: { inputPerMTok: 0.5, outputPerMTok: 2.5, cacheReadPerMTok: 0.05, cacheCreatePerMTok: 0.625 },
663
+ };
664
+
665
+ const tracker = new TokenTracker(customPricing);
666
+ const usage = makeUsage({
667
+ inputTokens: 1_000_000,
668
+ outputTokens: 1_000_000,
669
+ modelUsage: {
670
+ 'claude-opus-4-6': {
671
+ inputTokens: 1_000_000,
672
+ outputTokens: 1_000_000,
673
+ cacheReadInputTokens: 0,
674
+ cacheCreationInputTokens: 0,
675
+ },
676
+ },
677
+ });
678
+
679
+ const cost = tracker.calculateCost(usage);
680
+ expect(cost.inputCost).toBeCloseTo(10); // 1M * $10/MTok
681
+ expect(cost.outputCost).toBeCloseTo(50); // 1M * $50/MTok
682
+ expect(cost.totalCost).toBeCloseTo(60);
683
+ });
684
+ });
685
+ });