rafcode 2.2.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. package/CLAUDE.md +19 -4
  2. package/RAF/ahtahs-token-reaper/decisions.md +37 -0
  3. package/RAF/ahtahs-token-reaper/input.md +20 -0
  4. package/RAF/ahtahs-token-reaper/outcomes/01-extend-token-tracker-data-model.md +42 -0
  5. package/RAF/ahtahs-token-reaper/outcomes/02-accumulate-usage-in-retry-loop.md +31 -0
  6. package/RAF/ahtahs-token-reaper/outcomes/03-per-attempt-display-formatting.md +60 -0
  7. package/RAF/ahtahs-token-reaper/outcomes/04-add-model-name-to-claude-call-logs.md +57 -0
  8. package/RAF/ahtahs-token-reaper/outcomes/05-handle-invalid-config-in-raf-config.md +46 -0
  9. package/RAF/ahtahs-token-reaper/outcomes/06-fix-verbose-toggle-timer-display.md +38 -0
  10. package/RAF/ahtahs-token-reaper/plans/01-extend-token-tracker-data-model.md +36 -0
  11. package/RAF/ahtahs-token-reaper/plans/02-accumulate-usage-in-retry-loop.md +36 -0
  12. package/RAF/ahtahs-token-reaper/plans/03-per-attempt-display-formatting.md +43 -0
  13. package/RAF/ahtahs-token-reaper/plans/04-add-model-name-to-claude-call-logs.md +38 -0
  14. package/RAF/ahtahs-token-reaper/plans/05-handle-invalid-config-in-raf-config.md +36 -0
  15. package/RAF/ahtahs-token-reaper/plans/06-fix-verbose-toggle-timer-display.md +40 -0
  16. package/RAF/ahvrih-rate-forge/decisions.md +70 -0
  17. package/RAF/ahvrih-rate-forge/input.md +44 -0
  18. package/RAF/ahvrih-rate-forge/outcomes/01-remove-claude-command-config.md +58 -0
  19. package/RAF/ahvrih-rate-forge/outcomes/02-fix-mixed-attempt-cost.md +46 -0
  20. package/RAF/ahvrih-rate-forge/outcomes/03-rate-limit-estimation.md +82 -0
  21. package/RAF/ahvrih-rate-forge/outcomes/04-show-version-in-do-logs.md +45 -0
  22. package/RAF/ahvrih-rate-forge/outcomes/05-sync-main-before-worktree.md +96 -0
  23. package/RAF/ahvrih-rate-forge/outcomes/06-sync-readme-with-codebase.md +45 -0
  24. package/RAF/ahvrih-rate-forge/outcomes/07-no-session-persistence.md +26 -0
  25. package/RAF/ahvrih-rate-forge/outcomes/08-plan-execution-metadata.md +130 -0
  26. package/RAF/ahvrih-rate-forge/plans/01-remove-claude-command-config.md +36 -0
  27. package/RAF/ahvrih-rate-forge/plans/02-fix-mixed-attempt-cost.md +33 -0
  28. package/RAF/ahvrih-rate-forge/plans/03-rate-limit-estimation.md +82 -0
  29. package/RAF/ahvrih-rate-forge/plans/04-show-version-in-do-logs.md +32 -0
  30. package/RAF/ahvrih-rate-forge/plans/05-sync-main-before-worktree.md +40 -0
  31. package/RAF/ahvrih-rate-forge/plans/06-sync-readme-with-codebase.md +61 -0
  32. package/RAF/ahvrih-rate-forge/plans/07-no-session-persistence.md +28 -0
  33. package/RAF/ahvrih-rate-forge/plans/08-plan-execution-metadata.md +123 -0
  34. package/README.md +27 -7
  35. package/dist/commands/config.d.ts.map +1 -1
  36. package/dist/commands/config.js +24 -7
  37. package/dist/commands/config.js.map +1 -1
  38. package/dist/commands/do.d.ts.map +1 -1
  39. package/dist/commands/do.js +122 -27
  40. package/dist/commands/do.js.map +1 -1
  41. package/dist/commands/plan.d.ts.map +1 -1
  42. package/dist/commands/plan.js +79 -3
  43. package/dist/commands/plan.js.map +1 -1
  44. package/dist/core/claude-runner.d.ts +6 -6
  45. package/dist/core/claude-runner.d.ts.map +1 -1
  46. package/dist/core/claude-runner.js +9 -10
  47. package/dist/core/claude-runner.js.map +1 -1
  48. package/dist/core/failure-analyzer.d.ts.map +1 -1
  49. package/dist/core/failure-analyzer.js +3 -3
  50. package/dist/core/failure-analyzer.js.map +1 -1
  51. package/dist/core/pull-request.d.ts.map +1 -1
  52. package/dist/core/pull-request.js +5 -3
  53. package/dist/core/pull-request.js.map +1 -1
  54. package/dist/core/state-derivation.d.ts +5 -0
  55. package/dist/core/state-derivation.d.ts.map +1 -1
  56. package/dist/core/state-derivation.js +14 -4
  57. package/dist/core/state-derivation.js.map +1 -1
  58. package/dist/core/worktree.d.ts +32 -0
  59. package/dist/core/worktree.d.ts.map +1 -1
  60. package/dist/core/worktree.js +215 -0
  61. package/dist/core/worktree.js.map +1 -1
  62. package/dist/prompts/amend.d.ts.map +1 -1
  63. package/dist/prompts/amend.js +26 -11
  64. package/dist/prompts/amend.js.map +1 -1
  65. package/dist/prompts/planning.d.ts.map +1 -1
  66. package/dist/prompts/planning.js +26 -11
  67. package/dist/prompts/planning.js.map +1 -1
  68. package/dist/types/config.d.ts +30 -13
  69. package/dist/types/config.d.ts.map +1 -1
  70. package/dist/types/config.js +14 -10
  71. package/dist/types/config.js.map +1 -1
  72. package/dist/utils/config.d.ts +53 -4
  73. package/dist/utils/config.d.ts.map +1 -1
  74. package/dist/utils/config.js +197 -30
  75. package/dist/utils/config.js.map +1 -1
  76. package/dist/utils/frontmatter.d.ts +43 -0
  77. package/dist/utils/frontmatter.d.ts.map +1 -0
  78. package/dist/utils/frontmatter.js +85 -0
  79. package/dist/utils/frontmatter.js.map +1 -0
  80. package/dist/utils/name-generator.d.ts.map +1 -1
  81. package/dist/utils/name-generator.js +2 -3
  82. package/dist/utils/name-generator.js.map +1 -1
  83. package/dist/utils/session-parser.d.ts +44 -0
  84. package/dist/utils/session-parser.d.ts.map +1 -0
  85. package/dist/utils/session-parser.js +122 -0
  86. package/dist/utils/session-parser.js.map +1 -0
  87. package/dist/utils/terminal-symbols.d.ts +28 -5
  88. package/dist/utils/terminal-symbols.d.ts.map +1 -1
  89. package/dist/utils/terminal-symbols.js +77 -18
  90. package/dist/utils/terminal-symbols.js.map +1 -1
  91. package/dist/utils/token-tracker.d.ts +31 -1
  92. package/dist/utils/token-tracker.d.ts.map +1 -1
  93. package/dist/utils/token-tracker.js +94 -4
  94. package/dist/utils/token-tracker.js.map +1 -1
  95. package/package.json +1 -1
  96. package/src/commands/config.ts +26 -7
  97. package/src/commands/do.ts +157 -29
  98. package/src/commands/plan.ts +89 -2
  99. package/src/core/claude-runner.ts +16 -17
  100. package/src/core/failure-analyzer.ts +3 -3
  101. package/src/core/pull-request.ts +5 -3
  102. package/src/core/state-derivation.ts +20 -4
  103. package/src/core/worktree.ts +230 -0
  104. package/src/prompts/amend.ts +26 -11
  105. package/src/prompts/config-docs.md +91 -29
  106. package/src/prompts/planning.ts +26 -11
  107. package/src/types/config.ts +46 -21
  108. package/src/utils/config.ts +222 -33
  109. package/src/utils/frontmatter.ts +110 -0
  110. package/src/utils/name-generator.ts +2 -3
  111. package/src/utils/session-parser.ts +161 -0
  112. package/src/utils/terminal-symbols.ts +105 -18
  113. package/src/utils/token-tracker.ts +109 -4
  114. package/tests/unit/claude-runner-interactive.test.ts +8 -6
  115. package/tests/unit/claude-runner.test.ts +5 -66
  116. package/tests/unit/config-command.test.ts +84 -5
  117. package/tests/unit/config.test.ts +292 -45
  118. package/tests/unit/frontmatter.test.ts +182 -0
  119. package/tests/unit/post-execution-picker.test.ts +5 -0
  120. package/tests/unit/session-parser.test.ts +301 -0
  121. package/tests/unit/terminal-symbols.test.ts +263 -33
  122. package/tests/unit/timer-verbose-integration.test.ts +170 -0
  123. package/tests/unit/token-tracker.test.ts +653 -17
  124. package/tests/unit/validation.test.ts +6 -4
  125. package/tests/unit/worktree.test.ts +242 -0
@@ -1,4 +1,4 @@
1
- import { TokenTracker, CostBreakdown } from '../../src/utils/token-tracker.js';
1
+ import { TokenTracker, CostBreakdown, accumulateUsage, sumCostBreakdowns } from '../../src/utils/token-tracker.js';
2
2
  import { UsageData, PricingConfig, DEFAULT_CONFIG } from '../../src/types/config.js';
3
3
 
4
4
  function makeUsage(overrides: Partial<UsageData> = {}): UsageData {
@@ -178,7 +178,7 @@ describe('TokenTracker', () => {
178
178
  it('should accumulate usage across multiple tasks', () => {
179
179
  const tracker = new TokenTracker(testPricing);
180
180
 
181
- tracker.addTask('01', makeUsage({
181
+ tracker.addTask('01', [makeUsage({
182
182
  inputTokens: 500_000,
183
183
  outputTokens: 200_000,
184
184
  modelUsage: {
@@ -189,9 +189,9 @@ describe('TokenTracker', () => {
189
189
  cacheCreationInputTokens: 0,
190
190
  },
191
191
  },
192
- }));
192
+ })]);
193
193
 
194
- tracker.addTask('02', makeUsage({
194
+ tracker.addTask('02', [makeUsage({
195
195
  inputTokens: 300_000,
196
196
  outputTokens: 100_000,
197
197
  modelUsage: {
@@ -202,7 +202,7 @@ describe('TokenTracker', () => {
202
202
  cacheCreationInputTokens: 0,
203
203
  },
204
204
  },
205
- }));
205
+ })]);
206
206
 
207
207
  const totals = tracker.getTotals();
208
208
  expect(totals.usage.inputTokens).toBe(800_000);
@@ -214,7 +214,7 @@ describe('TokenTracker', () => {
214
214
  it('should accumulate costs across multiple tasks', () => {
215
215
  const tracker = new TokenTracker(testPricing);
216
216
 
217
- const entry1 = tracker.addTask('01', makeUsage({
217
+ const entry1 = tracker.addTask('01', [makeUsage({
218
218
  inputTokens: 1_000_000,
219
219
  outputTokens: 1_000_000,
220
220
  modelUsage: {
@@ -225,9 +225,9 @@ describe('TokenTracker', () => {
225
225
  cacheCreationInputTokens: 0,
226
226
  },
227
227
  },
228
- }));
228
+ })]);
229
229
 
230
- const entry2 = tracker.addTask('02', makeUsage({
230
+ const entry2 = tracker.addTask('02', [makeUsage({
231
231
  inputTokens: 1_000_000,
232
232
  outputTokens: 1_000_000,
233
233
  modelUsage: {
@@ -238,7 +238,7 @@ describe('TokenTracker', () => {
238
238
  cacheCreationInputTokens: 0,
239
239
  },
240
240
  },
241
- }));
241
+ })]);
242
242
 
243
243
  const totals = tracker.getTotals();
244
244
  // Each task: $3 input + $15 output = $18
@@ -250,7 +250,7 @@ describe('TokenTracker', () => {
250
250
  it('should accumulate multi-model usage across tasks', () => {
251
251
  const tracker = new TokenTracker(testPricing);
252
252
 
253
- tracker.addTask('01', makeUsage({
253
+ tracker.addTask('01', [makeUsage({
254
254
  inputTokens: 1_000_000,
255
255
  outputTokens: 500_000,
256
256
  modelUsage: {
@@ -261,9 +261,9 @@ describe('TokenTracker', () => {
261
261
  cacheCreationInputTokens: 0,
262
262
  },
263
263
  },
264
- }));
264
+ })]);
265
265
 
266
- tracker.addTask('02', makeUsage({
266
+ tracker.addTask('02', [makeUsage({
267
267
  inputTokens: 500_000,
268
268
  outputTokens: 200_000,
269
269
  modelUsage: {
@@ -274,7 +274,7 @@ describe('TokenTracker', () => {
274
274
  cacheCreationInputTokens: 0,
275
275
  },
276
276
  },
277
- }));
277
+ })]);
278
278
 
279
279
  const totals = tracker.getTotals();
280
280
  expect(totals.usage.modelUsage['claude-opus-4-6']?.inputTokens).toBe(1_000_000);
@@ -292,8 +292,8 @@ describe('TokenTracker', () => {
292
292
 
293
293
  it('should return per-task entries', () => {
294
294
  const tracker = new TokenTracker(testPricing);
295
- tracker.addTask('01', makeUsage({ inputTokens: 100 }));
296
- tracker.addTask('02', makeUsage({ inputTokens: 200 }));
295
+ tracker.addTask('01', [makeUsage({ inputTokens: 100 })]);
296
+ tracker.addTask('02', [makeUsage({ inputTokens: 200 })]);
297
297
 
298
298
  const entries = tracker.getEntries();
299
299
  expect(entries).toHaveLength(2);
@@ -303,7 +303,7 @@ describe('TokenTracker', () => {
303
303
 
304
304
  it('addTask returns the entry with cost', () => {
305
305
  const tracker = new TokenTracker(testPricing);
306
- const entry = tracker.addTask('01', makeUsage({
306
+ const entry = tracker.addTask('01', [makeUsage({
307
307
  inputTokens: 1_000_000,
308
308
  modelUsage: {
309
309
  'claude-opus-4-6': {
@@ -313,12 +313,511 @@ describe('TokenTracker', () => {
313
313
  cacheCreationInputTokens: 0,
314
314
  },
315
315
  },
316
- }));
316
+ })]);
317
317
 
318
318
  expect(entry.taskId).toBe('01');
319
319
  expect(entry.cost.inputCost).toBeCloseTo(15);
320
320
  expect(entry.cost.totalCost).toBeCloseTo(15);
321
321
  });
322
+
323
+ it('should store attempts array in entry', () => {
324
+ const tracker = new TokenTracker(testPricing);
325
+ const usage = makeUsage({ inputTokens: 100 });
326
+ const entry = tracker.addTask('01', [usage]);
327
+
328
+ expect(entry.attempts).toHaveLength(1);
329
+ expect(entry.attempts[0]).toEqual(usage);
330
+ });
331
+
332
+ it('should accumulate multiple attempts for a single task', () => {
333
+ const tracker = new TokenTracker(testPricing);
334
+ const attempt1 = makeUsage({
335
+ inputTokens: 500_000,
336
+ outputTokens: 100_000,
337
+ modelUsage: {
338
+ 'claude-opus-4-6': {
339
+ inputTokens: 500_000,
340
+ outputTokens: 100_000,
341
+ cacheReadInputTokens: 0,
342
+ cacheCreationInputTokens: 0,
343
+ },
344
+ },
345
+ });
346
+ const attempt2 = makeUsage({
347
+ inputTokens: 600_000,
348
+ outputTokens: 200_000,
349
+ modelUsage: {
350
+ 'claude-opus-4-6': {
351
+ inputTokens: 600_000,
352
+ outputTokens: 200_000,
353
+ cacheReadInputTokens: 0,
354
+ cacheCreationInputTokens: 0,
355
+ },
356
+ },
357
+ });
358
+
359
+ const entry = tracker.addTask('01', [attempt1, attempt2]);
360
+
361
+ expect(entry.usage.inputTokens).toBe(1_100_000);
362
+ expect(entry.usage.outputTokens).toBe(300_000);
363
+ expect(entry.usage.modelUsage['claude-opus-4-6']?.inputTokens).toBe(1_100_000);
364
+ expect(entry.attempts).toHaveLength(2);
365
+ });
366
+
367
+ it('should correctly accumulate multi-attempt costs', () => {
368
+ const tracker = new TokenTracker(testPricing);
369
+ const attempt1 = makeUsage({
370
+ inputTokens: 1_000_000,
371
+ modelUsage: {
372
+ 'claude-sonnet-4-5': {
373
+ inputTokens: 1_000_000,
374
+ outputTokens: 0,
375
+ cacheReadInputTokens: 0,
376
+ cacheCreationInputTokens: 0,
377
+ },
378
+ },
379
+ });
380
+ const attempt2 = makeUsage({
381
+ inputTokens: 1_000_000,
382
+ modelUsage: {
383
+ 'claude-sonnet-4-5': {
384
+ inputTokens: 1_000_000,
385
+ outputTokens: 0,
386
+ cacheReadInputTokens: 0,
387
+ cacheCreationInputTokens: 0,
388
+ },
389
+ },
390
+ });
391
+
392
+ const entry = tracker.addTask('01', [attempt1, attempt2]);
393
+
394
+ // 2M tokens * $3/MTok = $6
395
+ expect(entry.cost.inputCost).toBeCloseTo(6);
396
+ expect(entry.cost.totalCost).toBeCloseTo(6);
397
+ });
398
+ });
399
+
400
+ describe('accumulateUsage', () => {
401
+ it('should return empty usage for empty array', () => {
402
+ const result = accumulateUsage([]);
403
+ expect(result.inputTokens).toBe(0);
404
+ expect(result.outputTokens).toBe(0);
405
+ expect(result.cacheReadInputTokens).toBe(0);
406
+ expect(result.cacheCreationInputTokens).toBe(0);
407
+ expect(Object.keys(result.modelUsage)).toHaveLength(0);
408
+ });
409
+
410
+ it('should return same usage for single-element array', () => {
411
+ const usage = makeUsage({
412
+ inputTokens: 100,
413
+ outputTokens: 200,
414
+ cacheReadInputTokens: 50,
415
+ cacheCreationInputTokens: 25,
416
+ modelUsage: {
417
+ 'claude-opus-4-6': {
418
+ inputTokens: 100,
419
+ outputTokens: 200,
420
+ cacheReadInputTokens: 50,
421
+ cacheCreationInputTokens: 25,
422
+ },
423
+ },
424
+ });
425
+
426
+ const result = accumulateUsage([usage]);
427
+ expect(result.inputTokens).toBe(100);
428
+ expect(result.outputTokens).toBe(200);
429
+ expect(result.cacheReadInputTokens).toBe(50);
430
+ expect(result.cacheCreationInputTokens).toBe(25);
431
+ expect(result.modelUsage['claude-opus-4-6']?.inputTokens).toBe(100);
432
+ });
433
+
434
+ it('should sum all token fields across attempts', () => {
435
+ const attempt1 = makeUsage({
436
+ inputTokens: 100,
437
+ outputTokens: 50,
438
+ cacheReadInputTokens: 10,
439
+ cacheCreationInputTokens: 5,
440
+ });
441
+ const attempt2 = makeUsage({
442
+ inputTokens: 200,
443
+ outputTokens: 100,
444
+ cacheReadInputTokens: 20,
445
+ cacheCreationInputTokens: 10,
446
+ });
447
+
448
+ const result = accumulateUsage([attempt1, attempt2]);
449
+ expect(result.inputTokens).toBe(300);
450
+ expect(result.outputTokens).toBe(150);
451
+ expect(result.cacheReadInputTokens).toBe(30);
452
+ expect(result.cacheCreationInputTokens).toBe(15);
453
+ });
454
+
455
+ it('should merge modelUsage for same model across attempts', () => {
456
+ const attempt1 = makeUsage({
457
+ modelUsage: {
458
+ 'claude-opus-4-6': {
459
+ inputTokens: 100,
460
+ outputTokens: 50,
461
+ cacheReadInputTokens: 10,
462
+ cacheCreationInputTokens: 5,
463
+ },
464
+ },
465
+ });
466
+ const attempt2 = makeUsage({
467
+ modelUsage: {
468
+ 'claude-opus-4-6': {
469
+ inputTokens: 200,
470
+ outputTokens: 100,
471
+ cacheReadInputTokens: 20,
472
+ cacheCreationInputTokens: 10,
473
+ },
474
+ },
475
+ });
476
+
477
+ const result = accumulateUsage([attempt1, attempt2]);
478
+ expect(result.modelUsage['claude-opus-4-6']?.inputTokens).toBe(300);
479
+ expect(result.modelUsage['claude-opus-4-6']?.outputTokens).toBe(150);
480
+ expect(result.modelUsage['claude-opus-4-6']?.cacheReadInputTokens).toBe(30);
481
+ expect(result.modelUsage['claude-opus-4-6']?.cacheCreationInputTokens).toBe(15);
482
+ });
483
+
484
+ it('should handle different models across attempts', () => {
485
+ const attempt1 = makeUsage({
486
+ inputTokens: 100,
487
+ outputTokens: 50,
488
+ modelUsage: {
489
+ 'claude-opus-4-6': {
490
+ inputTokens: 100,
491
+ outputTokens: 50,
492
+ cacheReadInputTokens: 0,
493
+ cacheCreationInputTokens: 0,
494
+ },
495
+ },
496
+ });
497
+ const attempt2 = makeUsage({
498
+ inputTokens: 200,
499
+ outputTokens: 100,
500
+ modelUsage: {
501
+ 'claude-sonnet-4-5': {
502
+ inputTokens: 200,
503
+ outputTokens: 100,
504
+ cacheReadInputTokens: 0,
505
+ cacheCreationInputTokens: 0,
506
+ },
507
+ },
508
+ });
509
+
510
+ const result = accumulateUsage([attempt1, attempt2]);
511
+ expect(result.inputTokens).toBe(300);
512
+ expect(result.outputTokens).toBe(150);
513
+ expect(result.modelUsage['claude-opus-4-6']?.inputTokens).toBe(100);
514
+ expect(result.modelUsage['claude-sonnet-4-5']?.inputTokens).toBe(200);
515
+ expect(Object.keys(result.modelUsage)).toHaveLength(2);
516
+ });
517
+
518
+ it('should handle mixed model usage across attempts', () => {
519
+ const attempt1 = makeUsage({
520
+ inputTokens: 300,
521
+ outputTokens: 150,
522
+ modelUsage: {
523
+ 'claude-opus-4-6': {
524
+ inputTokens: 200,
525
+ outputTokens: 100,
526
+ cacheReadInputTokens: 0,
527
+ cacheCreationInputTokens: 0,
528
+ },
529
+ 'claude-haiku-4-5': {
530
+ inputTokens: 100,
531
+ outputTokens: 50,
532
+ cacheReadInputTokens: 0,
533
+ cacheCreationInputTokens: 0,
534
+ },
535
+ },
536
+ });
537
+ const attempt2 = makeUsage({
538
+ inputTokens: 400,
539
+ outputTokens: 200,
540
+ modelUsage: {
541
+ 'claude-opus-4-6': {
542
+ inputTokens: 100,
543
+ outputTokens: 50,
544
+ cacheReadInputTokens: 0,
545
+ cacheCreationInputTokens: 0,
546
+ },
547
+ 'claude-sonnet-4-5': {
548
+ inputTokens: 300,
549
+ outputTokens: 150,
550
+ cacheReadInputTokens: 0,
551
+ cacheCreationInputTokens: 0,
552
+ },
553
+ },
554
+ });
555
+
556
+ const result = accumulateUsage([attempt1, attempt2]);
557
+ expect(result.inputTokens).toBe(700);
558
+ expect(result.outputTokens).toBe(350);
559
+ // Opus: 200 + 100 = 300
560
+ expect(result.modelUsage['claude-opus-4-6']?.inputTokens).toBe(300);
561
+ // Haiku: only from attempt1
562
+ expect(result.modelUsage['claude-haiku-4-5']?.inputTokens).toBe(100);
563
+ // Sonnet: only from attempt2
564
+ expect(result.modelUsage['claude-sonnet-4-5']?.inputTokens).toBe(300);
565
+ });
566
+
567
+ it('should not mutate input objects', () => {
568
+ const attempt1 = makeUsage({
569
+ inputTokens: 100,
570
+ modelUsage: {
571
+ 'claude-opus-4-6': {
572
+ inputTokens: 100,
573
+ outputTokens: 0,
574
+ cacheReadInputTokens: 0,
575
+ cacheCreationInputTokens: 0,
576
+ },
577
+ },
578
+ });
579
+ const attempt2 = makeUsage({
580
+ inputTokens: 200,
581
+ modelUsage: {
582
+ 'claude-opus-4-6': {
583
+ inputTokens: 200,
584
+ outputTokens: 0,
585
+ cacheReadInputTokens: 0,
586
+ cacheCreationInputTokens: 0,
587
+ },
588
+ },
589
+ });
590
+
591
+ accumulateUsage([attempt1, attempt2]);
592
+
593
+ expect(attempt1.inputTokens).toBe(100);
594
+ expect(attempt1.modelUsage['claude-opus-4-6']?.inputTokens).toBe(100);
595
+ expect(attempt2.inputTokens).toBe(200);
596
+ });
597
+ });
598
+
599
+ describe('multi-attempt cost calculation', () => {
600
+ it('should calculate correct cost when retry uses different model', () => {
601
+ const tracker = new TokenTracker(testPricing);
602
+ // Attempt 1: Opus, Attempt 2: Sonnet (fallback)
603
+ const attempt1 = makeUsage({
604
+ inputTokens: 1_000_000,
605
+ outputTokens: 500_000,
606
+ modelUsage: {
607
+ 'claude-opus-4-6': {
608
+ inputTokens: 1_000_000,
609
+ outputTokens: 500_000,
610
+ cacheReadInputTokens: 0,
611
+ cacheCreationInputTokens: 0,
612
+ },
613
+ },
614
+ });
615
+ const attempt2 = makeUsage({
616
+ inputTokens: 1_000_000,
617
+ outputTokens: 1_000_000,
618
+ modelUsage: {
619
+ 'claude-sonnet-4-5': {
620
+ inputTokens: 1_000_000,
621
+ outputTokens: 1_000_000,
622
+ cacheReadInputTokens: 0,
623
+ cacheCreationInputTokens: 0,
624
+ },
625
+ },
626
+ });
627
+
628
+ const entry = tracker.addTask('01', [attempt1, attempt2]);
629
+
630
+ // Opus: 1M*$15 + 0.5M*$75 = $15 + $37.5 = $52.5
631
+ // Sonnet: 1M*$3 + 1M*$15 = $3 + $15 = $18
632
+ // Total: $52.5 + $18 = $70.5
633
+ expect(entry.cost.inputCost).toBeCloseTo(18); // 15 + 3
634
+ expect(entry.cost.outputCost).toBeCloseTo(52.5); // 37.5 + 15
635
+ expect(entry.cost.totalCost).toBeCloseTo(70.5);
636
+ });
637
+
638
+ it('should include all attempt usage in grand totals', () => {
639
+ const tracker = new TokenTracker(testPricing);
640
+
641
+ // Task 1: 2 attempts
642
+ tracker.addTask('01', [
643
+ makeUsage({ inputTokens: 500_000 }),
644
+ makeUsage({ inputTokens: 500_000 }),
645
+ ]);
646
+
647
+ // Task 2: 1 attempt
648
+ tracker.addTask('02', [
649
+ makeUsage({ inputTokens: 1_000_000 }),
650
+ ]);
651
+
652
+ const totals = tracker.getTotals();
653
+ expect(totals.usage.inputTokens).toBe(2_000_000);
654
+ });
655
+ });
656
+
657
+ describe('mixed-attempt cost calculation (aggregate + modelUsage)', () => {
658
+ it('should correctly price attempts with mixed modelUsage presence', () => {
659
+ const tracker = new TokenTracker(testPricing);
660
+ // Attempt 1: has modelUsage (opus)
661
+ const attempt1 = makeUsage({
662
+ inputTokens: 1_000_000,
663
+ outputTokens: 500_000,
664
+ modelUsage: {
665
+ 'claude-opus-4-6': {
666
+ inputTokens: 1_000_000,
667
+ outputTokens: 500_000,
668
+ cacheReadInputTokens: 0,
669
+ cacheCreationInputTokens: 0,
670
+ },
671
+ },
672
+ });
673
+ // Attempt 2: NO modelUsage (aggregate-only, should use sonnet fallback)
674
+ const attempt2 = makeUsage({
675
+ inputTokens: 1_000_000,
676
+ outputTokens: 1_000_000,
677
+ modelUsage: {}, // Empty - should fallback to sonnet pricing
678
+ });
679
+
680
+ const entry = tracker.addTask('01', [attempt1, attempt2]);
681
+
682
+ // Attempt 1 (Opus): 1M*$15 + 0.5M*$75 = $15 + $37.5 = $52.5
683
+ // Attempt 2 (Sonnet fallback): 1M*$3 + 1M*$15 = $3 + $15 = $18
684
+ // Total: $52.5 + $18 = $70.5
685
+ expect(entry.cost.inputCost).toBeCloseTo(18); // 15 + 3
686
+ expect(entry.cost.outputCost).toBeCloseTo(52.5); // 37.5 + 15
687
+ expect(entry.cost.totalCost).toBeCloseTo(70.5);
688
+ });
689
+
690
+ it('should not underreport cost when first attempt has no modelUsage', () => {
691
+ const tracker = new TokenTracker(testPricing);
692
+ // Attempt 1: aggregate-only (no modelUsage)
693
+ const attempt1 = makeUsage({
694
+ inputTokens: 1_000_000,
695
+ outputTokens: 1_000_000,
696
+ modelUsage: {},
697
+ });
698
+ // Attempt 2: has modelUsage
699
+ const attempt2 = makeUsage({
700
+ inputTokens: 1_000_000,
701
+ outputTokens: 500_000,
702
+ modelUsage: {
703
+ 'claude-opus-4-6': {
704
+ inputTokens: 1_000_000,
705
+ outputTokens: 500_000,
706
+ cacheReadInputTokens: 0,
707
+ cacheCreationInputTokens: 0,
708
+ },
709
+ },
710
+ });
711
+
712
+ const entry = tracker.addTask('01', [attempt1, attempt2]);
713
+
714
+ // Attempt 1 (Sonnet fallback): 1M*$3 + 1M*$15 = $18
715
+ // Attempt 2 (Opus): 1M*$15 + 0.5M*$75 = $52.5
716
+ // Total: $18 + $52.5 = $70.5
717
+ expect(entry.cost.totalCost).toBeCloseTo(70.5);
718
+ });
719
+
720
+ it('should handle all aggregate-only attempts', () => {
721
+ const tracker = new TokenTracker(testPricing);
722
+ const attempt1 = makeUsage({
723
+ inputTokens: 1_000_000,
724
+ outputTokens: 1_000_000,
725
+ modelUsage: {},
726
+ });
727
+ const attempt2 = makeUsage({
728
+ inputTokens: 1_000_000,
729
+ outputTokens: 1_000_000,
730
+ modelUsage: {},
731
+ });
732
+
733
+ const entry = tracker.addTask('01', [attempt1, attempt2]);
734
+
735
+ // Both use sonnet fallback: 2 * (1M*$3 + 1M*$15) = 2 * $18 = $36
736
+ expect(entry.cost.totalCost).toBeCloseTo(36);
737
+ });
738
+
739
+ it('should include cache costs from aggregate-only attempts', () => {
740
+ const tracker = new TokenTracker(testPricing);
741
+ // Attempt 1: has modelUsage with cache
742
+ const attempt1 = makeUsage({
743
+ inputTokens: 500_000,
744
+ outputTokens: 200_000,
745
+ cacheReadInputTokens: 100_000,
746
+ cacheCreationInputTokens: 50_000,
747
+ modelUsage: {
748
+ 'claude-opus-4-6': {
749
+ inputTokens: 500_000,
750
+ outputTokens: 200_000,
751
+ cacheReadInputTokens: 100_000,
752
+ cacheCreationInputTokens: 50_000,
753
+ },
754
+ },
755
+ });
756
+ // Attempt 2: aggregate-only with cache
757
+ const attempt2 = makeUsage({
758
+ inputTokens: 500_000,
759
+ outputTokens: 200_000,
760
+ cacheReadInputTokens: 100_000,
761
+ cacheCreationInputTokens: 50_000,
762
+ modelUsage: {},
763
+ });
764
+
765
+ const entry = tracker.addTask('01', [attempt1, attempt2]);
766
+
767
+ // Opus cache rates: $1.5/MTok read, $18.75/MTok create
768
+ // Sonnet cache rates: $0.30/MTok read, $3.75/MTok create
769
+ // Attempt 1 cache: 0.1M*$1.5 + 0.05M*$18.75 = $0.15 + $0.9375 = $1.0875
770
+ // Attempt 2 cache: 0.1M*$0.30 + 0.05M*$3.75 = $0.03 + $0.1875 = $0.2175
771
+ // Total cache: $1.0875 + $0.2175 = $1.305
772
+ expect(entry.cost.cacheReadCost).toBeCloseTo(0.15 + 0.03);
773
+ expect(entry.cost.cacheCreateCost).toBeCloseTo(0.9375 + 0.1875);
774
+ });
775
+ });
776
+
777
+ describe('sumCostBreakdowns', () => {
778
+ it('should return zero breakdown for empty array', () => {
779
+ const result = sumCostBreakdowns([]);
780
+ expect(result.inputCost).toBe(0);
781
+ expect(result.outputCost).toBe(0);
782
+ expect(result.cacheReadCost).toBe(0);
783
+ expect(result.cacheCreateCost).toBe(0);
784
+ expect(result.totalCost).toBe(0);
785
+ });
786
+
787
+ it('should return same breakdown for single element', () => {
788
+ const cost: CostBreakdown = {
789
+ inputCost: 10,
790
+ outputCost: 20,
791
+ cacheReadCost: 1,
792
+ cacheCreateCost: 2,
793
+ totalCost: 33,
794
+ };
795
+ const result = sumCostBreakdowns([cost]);
796
+ expect(result).toEqual(cost);
797
+ });
798
+
799
+ it('should sum all cost fields across breakdowns', () => {
800
+ const cost1: CostBreakdown = {
801
+ inputCost: 10,
802
+ outputCost: 20,
803
+ cacheReadCost: 1,
804
+ cacheCreateCost: 2,
805
+ totalCost: 33,
806
+ };
807
+ const cost2: CostBreakdown = {
808
+ inputCost: 5,
809
+ outputCost: 10,
810
+ cacheReadCost: 0.5,
811
+ cacheCreateCost: 1,
812
+ totalCost: 16.5,
813
+ };
814
+ const result = sumCostBreakdowns([cost1, cost2]);
815
+ expect(result.inputCost).toBe(15);
816
+ expect(result.outputCost).toBe(30);
817
+ expect(result.cacheReadCost).toBe(1.5);
818
+ expect(result.cacheCreateCost).toBe(3);
819
+ expect(result.totalCost).toBe(49.5);
820
+ });
322
821
  });
323
822
 
324
823
  describe('custom pricing', () => {
@@ -349,4 +848,141 @@ describe('TokenTracker', () => {
349
848
  expect(cost.totalCost).toBeCloseTo(60);
350
849
  });
351
850
  });
851
+
852
+ describe('rate limit estimation', () => {
853
+ it('should calculate rate limit percentage from cost', () => {
854
+ const tracker = new TokenTracker(testPricing);
855
+ // With default sonnet pricing ($3 input, $15 output), avg = $9/MTok
856
+ // Sonnet-equivalent tokens = cost / (9/1M) = cost * 1M/9
857
+ // Percentage = sonnetEquivTokens / cap * 100
858
+
859
+ // Test with $0.18 cost (should be ~2222 Sonnet-equiv tokens)
860
+ // With cap of 88000, that's ~2.5%
861
+ const percentage = tracker.calculateRateLimitPercentage(0.18, 88000);
862
+ // $0.18 / ($9/1M) = 20000 Sonnet-equiv tokens
863
+ // 20000 / 88000 * 100 = ~22.7%
864
+ expect(percentage).toBeCloseTo(22.73, 1);
865
+ });
866
+
867
+ it('should return 0 for zero cost', () => {
868
+ const tracker = new TokenTracker(testPricing);
869
+ expect(tracker.calculateRateLimitPercentage(0, 88000)).toBe(0);
870
+ });
871
+
872
+ it('should respect custom sonnetTokenCap', () => {
873
+ const tracker = new TokenTracker(testPricing);
874
+ const percentageDefault = tracker.calculateRateLimitPercentage(0.09, 88000);
875
+ const percentageHigherCap = tracker.calculateRateLimitPercentage(0.09, 176000);
876
+ // Higher cap should halve the percentage
877
+ expect(percentageHigherCap).toBeCloseTo(percentageDefault / 2, 1);
878
+ });
879
+
880
+ it('should calculate cumulative rate limit across tasks', () => {
881
+ const tracker = new TokenTracker(testPricing);
882
+
883
+ // Add a task with sonnet usage: 1M in / 1M out = $3 + $15 = $18
884
+ tracker.addTask('01', [makeUsage({
885
+ inputTokens: 1_000_000,
886
+ outputTokens: 1_000_000,
887
+ modelUsage: {
888
+ 'claude-sonnet-4-5': {
889
+ inputTokens: 1_000_000,
890
+ outputTokens: 1_000_000,
891
+ cacheReadInputTokens: 0,
892
+ cacheCreationInputTokens: 0,
893
+ },
894
+ },
895
+ })]);
896
+
897
+ const percentage = tracker.getCumulativeRateLimitPercentage(88000);
898
+ // $18 / ($9/1M) = 2,000,000 Sonnet-equiv tokens
899
+ // 2,000,000 / 88,000 * 100 = ~2272.7%
900
+ expect(percentage).toBeCloseTo(2272.73, 0);
901
+ });
902
+
903
+ it('should correctly weight Opus usage higher than Sonnet', () => {
904
+ const tracker = new TokenTracker(testPricing);
905
+
906
+ // Opus task: 1M in / 1M out = $15 + $75 = $90
907
+ tracker.addTask('01', [makeUsage({
908
+ inputTokens: 1_000_000,
909
+ outputTokens: 1_000_000,
910
+ modelUsage: {
911
+ 'claude-opus-4-6': {
912
+ inputTokens: 1_000_000,
913
+ outputTokens: 1_000_000,
914
+ cacheReadInputTokens: 0,
915
+ cacheCreationInputTokens: 0,
916
+ },
917
+ },
918
+ })]);
919
+
920
+ const opusPercentage = tracker.getCumulativeRateLimitPercentage(88000);
921
+
922
+ // Sonnet equivalent of $90 = $90 / ($9/1M) = 10,000,000 tokens
923
+ // 10,000,000 / 88,000 * 100 = ~11363.6%
924
+ expect(opusPercentage).toBeCloseTo(11363.6, 0);
925
+ });
926
+
927
+ it('should correctly weight Haiku usage lower than Sonnet', () => {
928
+ const tracker = new TokenTracker(testPricing);
929
+
930
+ // Haiku task: 1M in / 1M out = $1 + $5 = $6
931
+ tracker.addTask('01', [makeUsage({
932
+ inputTokens: 1_000_000,
933
+ outputTokens: 1_000_000,
934
+ modelUsage: {
935
+ 'claude-haiku-4-5': {
936
+ inputTokens: 1_000_000,
937
+ outputTokens: 1_000_000,
938
+ cacheReadInputTokens: 0,
939
+ cacheCreationInputTokens: 0,
940
+ },
941
+ },
942
+ })]);
943
+
944
+ const haikuPercentage = tracker.getCumulativeRateLimitPercentage(88000);
945
+
946
+ // Sonnet equivalent of $6 = $6 / ($9/1M) = ~666,667 tokens
947
+ // 666,667 / 88,000 * 100 = ~757.6%
948
+ expect(haikuPercentage).toBeCloseTo(757.6, 0);
949
+ });
950
+
951
+ it('should handle multi-model tasks correctly for rate limit', () => {
952
+ const tracker = new TokenTracker(testPricing);
953
+
954
+ // Mixed task: Opus attempt ($52.5) + Sonnet attempt ($18) = $70.5
955
+ const attempt1 = makeUsage({
956
+ inputTokens: 1_000_000,
957
+ outputTokens: 500_000,
958
+ modelUsage: {
959
+ 'claude-opus-4-6': {
960
+ inputTokens: 1_000_000,
961
+ outputTokens: 500_000,
962
+ cacheReadInputTokens: 0,
963
+ cacheCreationInputTokens: 0,
964
+ },
965
+ },
966
+ });
967
+ const attempt2 = makeUsage({
968
+ inputTokens: 1_000_000,
969
+ outputTokens: 1_000_000,
970
+ modelUsage: {
971
+ 'claude-sonnet-4-5': {
972
+ inputTokens: 1_000_000,
973
+ outputTokens: 1_000_000,
974
+ cacheReadInputTokens: 0,
975
+ cacheCreationInputTokens: 0,
976
+ },
977
+ },
978
+ });
979
+
980
+ tracker.addTask('01', [attempt1, attempt2]);
981
+ const percentage = tracker.getCumulativeRateLimitPercentage(88000);
982
+
983
+ // $70.5 / ($9/1M) = 7,833,333 Sonnet-equiv tokens
984
+ // 7,833,333 / 88,000 * 100 = ~8901.5%
985
+ expect(percentage).toBeCloseTo(8901.5, 0);
986
+ });
987
+ });
352
988
  });