@diogonzafe/tokenwatch 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -25,8 +25,45 @@ function lookupInMap(model, map) {
25
25
  }
26
26
  return void 0;
27
27
  }
28
- function calculateCost(inputTokens, outputTokens, price) {
29
- return inputTokens / 1e6 * price.input + outputTokens / 1e6 * price.output;
28
+ function calculateCost(inputTokens, outputTokens, price, cachedTokens = 0, cacheCreationTokens = 0) {
29
+ const regularInputCost = inputTokens / 1e6 * price.input;
30
+ const cachedReadCost = cachedTokens / 1e6 * (price.cachedInput ?? price.input);
31
+ const cacheCreationCost = cacheCreationTokens / 1e6 * (price.cacheCreationInput ?? price.input * 1.25);
32
+ const outputCost = outputTokens / 1e6 * price.output;
33
+ return regularInputCost + cachedReadCost + cacheCreationCost + outputCost;
34
+ }
35
+
36
+ // src/core/suggestions.ts
37
+ var PROVIDER_PREFIXES = ["gpt-", "claude-", "gemini-", "deepseek-"];
38
+ function getProviderPrefix(model) {
39
+ return PROVIDER_PREFIXES.find((p) => model.startsWith(p));
40
+ }
41
+ function maybeSuggestCheaperModel(model, costUSD, inputTokens, outputTokens, layers) {
42
+ if (costUSD <= 0) return;
43
+ const prefix = getProviderPrefix(model);
44
+ if (!prefix) return;
45
+ const mergedMap = {
46
+ ...layers.bundledPrices,
47
+ ...layers.remotePrices ?? {},
48
+ ...layers.customPrices ?? {}
49
+ };
50
+ let cheapestModel;
51
+ let cheapestCost = Infinity;
52
+ for (const key of Object.keys(mergedMap)) {
53
+ if (key === model || !key.startsWith(prefix)) continue;
54
+ const price = mergedMap[key];
55
+ if (!price) continue;
56
+ const candidateCost = calculateCost(inputTokens, outputTokens, price);
57
+ if (candidateCost < cheapestCost) {
58
+ cheapestCost = candidateCost;
59
+ cheapestModel = key;
60
+ }
61
+ }
62
+ if (cheapestModel === void 0 || cheapestCost >= costUSD * 0.5) return;
63
+ const savingsPct = Math.round((1 - cheapestCost / costUSD) * 100);
64
+ console.log(
65
+ `[tokenwatch] Suggestion: ${cheapestModel} could handle this for ~$${cheapestCost.toFixed(4)} (${savingsPct}% cheaper than ${model})`
66
+ );
30
67
  }
31
68
 
32
69
  // src/core/storage.ts
@@ -74,29 +111,51 @@ var SqliteStorage = class {
74
111
  migrate() {
75
112
  this.db.exec(`
76
113
  CREATE TABLE IF NOT EXISTS usage (
77
- id INTEGER PRIMARY KEY AUTOINCREMENT,
78
- model TEXT NOT NULL,
79
- input_tokens INTEGER NOT NULL,
80
- output_tokens INTEGER NOT NULL,
81
- cost_usd REAL NOT NULL,
82
- session_id TEXT,
83
- user_id TEXT,
84
- timestamp TEXT NOT NULL
114
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
115
+ model TEXT NOT NULL,
116
+ input_tokens INTEGER NOT NULL,
117
+ output_tokens INTEGER NOT NULL,
118
+ reasoning_tokens INTEGER NOT NULL DEFAULT 0,
119
+ cached_tokens INTEGER NOT NULL DEFAULT 0,
120
+ cache_creation_tokens INTEGER NOT NULL DEFAULT 0,
121
+ cost_usd REAL NOT NULL,
122
+ session_id TEXT,
123
+ user_id TEXT,
124
+ feature TEXT,
125
+ timestamp TEXT NOT NULL
85
126
  )
86
127
  `);
128
+ const cols = this.db.prepare(`PRAGMA table_info(usage)`).all().map((c) => c.name);
129
+ if (!cols.includes("reasoning_tokens")) {
130
+ this.db.exec(`ALTER TABLE usage ADD COLUMN reasoning_tokens INTEGER NOT NULL DEFAULT 0`);
131
+ }
132
+ if (!cols.includes("feature")) {
133
+ this.db.exec(`ALTER TABLE usage ADD COLUMN feature TEXT`);
134
+ }
135
+ if (!cols.includes("cached_tokens")) {
136
+ this.db.exec(`ALTER TABLE usage ADD COLUMN cached_tokens INTEGER NOT NULL DEFAULT 0`);
137
+ }
138
+ if (!cols.includes("cache_creation_tokens")) {
139
+ this.db.exec(`ALTER TABLE usage ADD COLUMN cache_creation_tokens INTEGER NOT NULL DEFAULT 0`);
140
+ }
87
141
  }
88
142
  record(entry) {
89
143
  this.db.prepare(
90
144
  `INSERT INTO usage
91
- (model, input_tokens, output_tokens, cost_usd, session_id, user_id, timestamp)
92
- VALUES (?, ?, ?, ?, ?, ?, ?)`
145
+ (model, input_tokens, output_tokens, reasoning_tokens, cached_tokens, cache_creation_tokens,
146
+ cost_usd, session_id, user_id, feature, timestamp)
147
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
93
148
  ).run(
94
149
  entry.model,
95
150
  entry.inputTokens,
96
151
  entry.outputTokens,
152
+ entry.reasoningTokens ?? 0,
153
+ entry.cachedTokens ?? 0,
154
+ entry.cacheCreationTokens ?? 0,
97
155
  entry.costUSD,
98
156
  entry.sessionId ?? null,
99
157
  entry.userId ?? null,
158
+ entry.feature ?? null,
100
159
  entry.timestamp
101
160
  );
102
161
  }
@@ -106,9 +165,13 @@ var SqliteStorage = class {
106
165
  model: r.model,
107
166
  inputTokens: r.input_tokens,
108
167
  outputTokens: r.output_tokens,
168
+ ...r.reasoning_tokens > 0 && { reasoningTokens: r.reasoning_tokens },
169
+ ...r.cached_tokens > 0 && { cachedTokens: r.cached_tokens },
170
+ ...r.cache_creation_tokens > 0 && { cacheCreationTokens: r.cache_creation_tokens },
109
171
  costUSD: r.cost_usd,
110
172
  ...r.session_id != null && { sessionId: r.session_id },
111
173
  ...r.user_id != null && { userId: r.user_id },
174
+ ...r.feature != null && { feature: r.feature },
112
175
  timestamp: r.timestamp
113
176
  }));
114
177
  }
@@ -140,7 +203,7 @@ async function fetchRemotePrices(url = REMOTE_URL) {
140
203
  const data = await res.json();
141
204
  if (!data?.models) return null;
142
205
  await persistCache(data);
143
- return data.models;
206
+ return { models: data.models, updated_at: data.updated_at ?? "" };
144
207
  } catch {
145
208
  return null;
146
209
  }
@@ -152,7 +215,8 @@ async function loadCachedPrices() {
152
215
  const data = JSON.parse(raw);
153
216
  const age = Date.now() - (data._cachedAt ?? 0);
154
217
  if (age > CACHE_TTL_MS) return null;
155
- return data.models ?? null;
218
+ if (!data.models) return null;
219
+ return { models: data.models, updated_at: data.updated_at ?? "" };
156
220
  } catch {
157
221
  return null;
158
222
  }
@@ -173,87 +237,110 @@ async function getRemotePrices() {
173
237
 
174
238
  // prices.json
175
239
  var prices_default = {
176
- updated_at: "2026-04-21",
240
+ updated_at: "2026-04-22",
177
241
  source: "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json",
178
242
  models: {
179
243
  "gpt-4o": {
180
244
  input: 2.5,
181
245
  output: 10,
246
+ cachedInput: 1.25,
182
247
  maxInputTokens: 128e3
183
248
  },
184
249
  "gpt-4o-mini": {
185
250
  input: 0.15,
186
251
  output: 0.6,
252
+ cachedInput: 0.075,
187
253
  maxInputTokens: 128e3
188
254
  },
189
255
  "gpt-5": {
190
256
  input: 1.25,
191
257
  output: 10,
258
+ cachedInput: 0.125,
192
259
  maxInputTokens: 272e3
193
260
  },
194
261
  "gpt-5-mini": {
195
262
  input: 0.25,
196
263
  output: 2,
264
+ cachedInput: 0.025,
197
265
  maxInputTokens: 272e3
198
266
  },
199
267
  "gpt-5-nano": {
200
268
  input: 0.05,
201
269
  output: 0.4,
270
+ cachedInput: 5e-3,
202
271
  maxInputTokens: 272e3
203
272
  },
204
273
  "claude-opus-4-6": {
205
274
  input: 5,
206
275
  output: 25,
276
+ cachedInput: 0.5,
277
+ cacheCreationInput: 6.25,
207
278
  maxInputTokens: 1e6
208
279
  },
209
280
  "claude-sonnet-4-6": {
210
281
  input: 3,
211
282
  output: 15,
283
+ cachedInput: 0.3,
284
+ cacheCreationInput: 3.75,
212
285
  maxInputTokens: 1e6
213
286
  },
214
287
  "claude-haiku-4-5": {
215
288
  input: 1,
216
289
  output: 5,
290
+ cachedInput: 0.1,
291
+ cacheCreationInput: 1.25,
217
292
  maxInputTokens: 2e5
218
293
  },
219
294
  "gemini-2.5-pro": {
220
295
  input: 1.25,
221
296
  output: 10,
297
+ cachedInput: 0.125,
222
298
  maxInputTokens: 1048576
223
299
  },
224
300
  "gemini-2.5-flash": {
225
301
  input: 0.3,
226
302
  output: 2.5,
303
+ cachedInput: 0.03,
227
304
  maxInputTokens: 1048576
228
305
  },
229
306
  "deepseek-chat": {
230
307
  input: 0.28,
231
308
  output: 0.42,
309
+ cachedInput: 0.028,
232
310
  maxInputTokens: 131072
233
311
  },
234
312
  "deepseek-reasoner": {
235
313
  input: 0.28,
236
314
  output: 0.42,
315
+ cachedInput: 0.028,
237
316
  maxInputTokens: 131072
238
317
  },
239
318
  "claude-opus-4-5": {
240
319
  input: 5,
241
320
  output: 25,
321
+ cachedInput: 0.5,
322
+ cacheCreationInput: 6.25,
242
323
  maxInputTokens: 2e5
243
324
  },
244
325
  "claude-opus-4-7": {
245
326
  input: 5,
246
327
  output: 25,
328
+ cachedInput: 0.5,
329
+ cacheCreationInput: 6.25,
247
330
  maxInputTokens: 1e6
248
331
  },
249
332
  "claude-opus-4-1": {
250
333
  input: 15,
251
334
  output: 75,
335
+ cachedInput: 1.5,
336
+ cacheCreationInput: 18.75,
252
337
  maxInputTokens: 2e5
253
338
  },
254
339
  "claude-sonnet-4-5": {
255
340
  input: 3,
256
341
  output: 15,
342
+ cachedInput: 0.3,
343
+ cacheCreationInput: 3.75,
257
344
  maxInputTokens: 2e5
258
345
  },
259
346
  "gpt-oss-120b": {
@@ -344,36 +431,43 @@ var prices_default = {
344
431
  "gpt-4.1": {
345
432
  input: 2,
346
433
  output: 8,
434
+ cachedInput: 0.5,
347
435
  maxInputTokens: 1047576
348
436
  },
349
437
  "gpt-4.1-2025-04-14": {
350
438
  input: 2,
351
439
  output: 8,
440
+ cachedInput: 0.5,
352
441
  maxInputTokens: 1047576
353
442
  },
354
443
  "gpt-4.1-mini": {
355
444
  input: 0.4,
356
445
  output: 1.6,
446
+ cachedInput: 0.1,
357
447
  maxInputTokens: 1047576
358
448
  },
359
449
  "gpt-4.1-mini-2025-04-14": {
360
450
  input: 0.4,
361
451
  output: 1.6,
452
+ cachedInput: 0.1,
362
453
  maxInputTokens: 1047576
363
454
  },
364
455
  "gpt-4.1-nano": {
365
456
  input: 0.1,
366
457
  output: 0.4,
458
+ cachedInput: 0.025,
367
459
  maxInputTokens: 1047576
368
460
  },
369
461
  "gpt-4.1-nano-2025-04-14": {
370
462
  input: 0.1,
371
463
  output: 0.4,
464
+ cachedInput: 0.025,
372
465
  maxInputTokens: 1047576
373
466
  },
374
467
  "gpt-4.5-preview": {
375
468
  input: 75,
376
469
  output: 150,
470
+ cachedInput: 37.5,
377
471
  maxInputTokens: 128e3
378
472
  },
379
473
  "gpt-4o-2024-05-13": {
@@ -384,11 +478,13 @@ var prices_default = {
384
478
  "gpt-4o-2024-08-06": {
385
479
  input: 2.5,
386
480
  output: 10,
481
+ cachedInput: 1.25,
387
482
  maxInputTokens: 128e3
388
483
  },
389
484
  "gpt-4o-2024-11-20": {
390
485
  input: 2.5,
391
486
  output: 10,
487
+ cachedInput: 1.25,
392
488
  maxInputTokens: 128e3
393
489
  },
394
490
  "gpt-audio-2025-08-28": {
@@ -414,6 +510,7 @@ var prices_default = {
414
510
  "gpt-4o-mini-2024-07-18": {
415
511
  input: 0.15,
416
512
  output: 0.6,
513
+ cachedInput: 0.075,
417
514
  maxInputTokens: 128e3
418
515
  },
419
516
  "gpt-4o-mini-audio-preview-2024-12-17": {
@@ -424,21 +521,25 @@ var prices_default = {
424
521
  "gpt-4o-mini-realtime-preview-2024-12-17": {
425
522
  input: 0.6,
426
523
  output: 2.4,
524
+ cachedInput: 0.3,
427
525
  maxInputTokens: 128e3
428
526
  },
429
527
  "gpt-realtime-2025-08-28": {
430
528
  input: 4,
431
529
  output: 16,
530
+ cachedInput: 0.4,
432
531
  maxInputTokens: 32e3
433
532
  },
434
533
  "gpt-realtime-1.5-2026-02-23": {
435
534
  input: 4,
436
535
  output: 16,
536
+ cachedInput: 4,
437
537
  maxInputTokens: 32e3
438
538
  },
439
539
  "gpt-realtime-mini-2025-10-06": {
440
540
  input: 0.6,
441
541
  output: 2.4,
542
+ cachedInput: 0.06,
442
543
  maxInputTokens: 128e3
443
544
  },
444
545
  "gpt-4o-mini-transcribe": {
@@ -449,11 +550,13 @@ var prices_default = {
449
550
  "gpt-4o-realtime-preview-2024-10-01": {
450
551
  input: 5,
451
552
  output: 20,
553
+ cachedInput: 2.5,
452
554
  maxInputTokens: 128e3
453
555
  },
454
556
  "gpt-4o-realtime-preview-2024-12-17": {
455
557
  input: 5,
456
558
  output: 20,
559
+ cachedInput: 2.5,
457
560
  maxInputTokens: 128e3
458
561
  },
459
562
  "gpt-4o-transcribe": {
@@ -469,51 +572,61 @@ var prices_default = {
469
572
  "gpt-5.1-2025-11-13": {
470
573
  input: 1.25,
471
574
  output: 10,
575
+ cachedInput: 0.125,
472
576
  maxInputTokens: 272e3
473
577
  },
474
578
  "gpt-5.1-chat-2025-11-13": {
475
579
  input: 1.25,
476
580
  output: 10,
581
+ cachedInput: 0.125,
477
582
  maxInputTokens: 128e3
478
583
  },
479
584
  "gpt-5.1-codex-2025-11-13": {
480
585
  input: 1.25,
481
586
  output: 10,
587
+ cachedInput: 0.125,
482
588
  maxInputTokens: 272e3
483
589
  },
484
590
  "gpt-5.1-codex-mini-2025-11-13": {
485
591
  input: 0.25,
486
592
  output: 2,
593
+ cachedInput: 0.025,
487
594
  maxInputTokens: 272e3
488
595
  },
489
596
  "gpt-5-2025-08-07": {
490
597
  input: 1.25,
491
598
  output: 10,
599
+ cachedInput: 0.125,
492
600
  maxInputTokens: 272e3
493
601
  },
494
602
  "gpt-5-chat": {
495
603
  input: 1.25,
496
604
  output: 10,
605
+ cachedInput: 0.125,
497
606
  maxInputTokens: 128e3
498
607
  },
499
608
  "gpt-5-chat-latest": {
500
609
  input: 1.25,
501
610
  output: 10,
611
+ cachedInput: 0.125,
502
612
  maxInputTokens: 128e3
503
613
  },
504
614
  "gpt-5-codex": {
505
615
  input: 1.25,
506
616
  output: 10,
617
+ cachedInput: 0.125,
507
618
  maxInputTokens: 272e3
508
619
  },
509
620
  "gpt-5-mini-2025-08-07": {
510
621
  input: 0.25,
511
622
  output: 2,
623
+ cachedInput: 0.025,
512
624
  maxInputTokens: 272e3
513
625
  },
514
626
  "gpt-5-nano-2025-08-07": {
515
627
  input: 0.05,
516
628
  output: 0.4,
629
+ cachedInput: 5e-3,
517
630
  maxInputTokens: 272e3
518
631
  },
519
632
  "gpt-5-pro": {
@@ -524,61 +637,73 @@ var prices_default = {
524
637
  "gpt-5.1": {
525
638
  input: 1.25,
526
639
  output: 10,
640
+ cachedInput: 0.125,
527
641
  maxInputTokens: 272e3
528
642
  },
529
643
  "gpt-5.1-chat": {
530
644
  input: 1.25,
531
645
  output: 10,
646
+ cachedInput: 0.125,
532
647
  maxInputTokens: 128e3
533
648
  },
534
649
  "gpt-5.1-codex": {
535
650
  input: 1.25,
536
651
  output: 10,
652
+ cachedInput: 0.125,
537
653
  maxInputTokens: 272e3
538
654
  },
539
655
  "gpt-5.1-codex-max": {
540
656
  input: 1.25,
541
657
  output: 10,
658
+ cachedInput: 0.125,
542
659
  maxInputTokens: 272e3
543
660
  },
544
661
  "gpt-5.1-codex-mini": {
545
662
  input: 0.25,
546
663
  output: 2,
664
+ cachedInput: 0.025,
547
665
  maxInputTokens: 272e3
548
666
  },
549
667
  "gpt-5.2": {
550
668
  input: 1.75,
551
669
  output: 14,
670
+ cachedInput: 0.175,
552
671
  maxInputTokens: 272e3
553
672
  },
554
673
  "gpt-5.2-2025-12-11": {
555
674
  input: 1.75,
556
675
  output: 14,
676
+ cachedInput: 0.175,
557
677
  maxInputTokens: 272e3
558
678
  },
559
679
  "gpt-5.2-chat": {
560
680
  input: 1.75,
561
681
  output: 14,
682
+ cachedInput: 0.175,
562
683
  maxInputTokens: 128e3
563
684
  },
564
685
  "gpt-5.2-chat-2025-12-11": {
565
686
  input: 1.75,
566
687
  output: 14,
688
+ cachedInput: 0.175,
567
689
  maxInputTokens: 128e3
568
690
  },
569
691
  "gpt-5.2-codex": {
570
692
  input: 1.75,
571
693
  output: 14,
694
+ cachedInput: 0.175,
572
695
  maxInputTokens: 272e3
573
696
  },
574
697
  "gpt-5.3-chat": {
575
698
  input: 1.75,
576
699
  output: 14,
700
+ cachedInput: 0.175,
577
701
  maxInputTokens: 128e3
578
702
  },
579
703
  "gpt-5.3-codex": {
580
704
  input: 1.75,
581
705
  output: 14,
706
+ cachedInput: 0.175,
582
707
  maxInputTokens: 272e3
583
708
  },
584
709
  "gpt-5.2-pro": {
@@ -594,71 +719,85 @@ var prices_default = {
594
719
  "gpt-5.4": {
595
720
  input: 2.5,
596
721
  output: 15,
722
+ cachedInput: 0.25,
597
723
  maxInputTokens: 105e4
598
724
  },
599
725
  "gpt-5.4-2026-03-05": {
600
726
  input: 2.5,
601
727
  output: 15,
728
+ cachedInput: 0.25,
602
729
  maxInputTokens: 105e4
603
730
  },
604
731
  "gpt-5.4-pro": {
605
732
  input: 30,
606
733
  output: 180,
734
+ cachedInput: 3,
607
735
  maxInputTokens: 105e4
608
736
  },
609
737
  "gpt-5.4-pro-2026-03-05": {
610
738
  input: 30,
611
739
  output: 180,
740
+ cachedInput: 3,
612
741
  maxInputTokens: 105e4
613
742
  },
614
743
  "gpt-5.4-mini": {
615
744
  input: 0.75,
616
745
  output: 4.5,
746
+ cachedInput: 0.075,
617
747
  maxInputTokens: 272e3
618
748
  },
619
749
  "gpt-5.4-nano": {
620
750
  input: 0.2,
621
751
  output: 1.25,
752
+ cachedInput: 0.02,
622
753
  maxInputTokens: 272e3
623
754
  },
624
755
  "o1-2024-12-17": {
625
756
  input: 15,
626
757
  output: 60,
758
+ cachedInput: 7.5,
627
759
  maxInputTokens: 2e5
628
760
  },
629
761
  "o1-mini": {
630
762
  input: 1.21,
631
763
  output: 4.84,
764
+ cachedInput: 0.605,
632
765
  maxInputTokens: 128e3
633
766
  },
634
767
  "o1-mini-2024-09-12": {
635
768
  input: 1.1,
636
769
  output: 4.4,
770
+ cachedInput: 0.55,
637
771
  maxInputTokens: 128e3
638
772
  },
639
773
  "o1-preview": {
640
774
  input: 15,
641
775
  output: 60,
776
+ cachedInput: 7.5,
642
777
  maxInputTokens: 128e3
643
778
  },
644
779
  "o1-preview-2024-09-12": {
645
780
  input: 15,
646
781
  output: 60,
782
+ cachedInput: 7.5,
647
783
  maxInputTokens: 128e3
648
784
  },
649
785
  "o3-2025-04-16": {
650
786
  input: 2,
651
787
  output: 8,
788
+ cachedInput: 0.5,
652
789
  maxInputTokens: 2e5
653
790
  },
654
791
  "o3-mini": {
655
792
  input: 1.1,
656
793
  output: 4.4,
794
+ cachedInput: 0.55,
657
795
  maxInputTokens: 2e5
658
796
  },
659
797
  "o3-mini-2025-01-31": {
660
798
  input: 1.1,
661
799
  output: 4.4,
800
+ cachedInput: 0.55,
662
801
  maxInputTokens: 2e5
663
802
  },
664
803
  "o3-pro": {
@@ -674,11 +813,13 @@ var prices_default = {
674
813
  "o4-mini": {
675
814
  input: 1.1,
676
815
  output: 4.4,
816
+ cachedInput: 0.275,
677
817
  maxInputTokens: 2e5
678
818
  },
679
819
  "o4-mini-2025-04-16": {
680
820
  input: 1.1,
681
821
  output: 4.4,
822
+ cachedInput: 0.275,
682
823
  maxInputTokens: 2e5
683
824
  },
684
825
  "deepseek-v3.2": {
@@ -699,6 +840,7 @@ var prices_default = {
699
840
  "deepseek-v3": {
700
841
  input: 0.27,
701
842
  output: 1.1,
843
+ cachedInput: 0.07,
702
844
  maxInputTokens: 65536
703
845
  },
704
846
  "deepseek-v3-0324": {
@@ -714,76 +856,105 @@ var prices_default = {
714
856
  "claude-haiku-4-5-20251001": {
715
857
  input: 1,
716
858
  output: 5,
859
+ cachedInput: 0.1,
860
+ cacheCreationInput: 1.25,
717
861
  maxInputTokens: 2e5
718
862
  },
719
863
  "claude-3-7-sonnet-20250219": {
720
864
  input: 3,
721
865
  output: 15,
866
+ cachedInput: 0.3,
867
+ cacheCreationInput: 3.75,
722
868
  maxInputTokens: 2e5
723
869
  },
724
870
  "claude-3-haiku-20240307": {
725
871
  input: 0.25,
726
872
  output: 1.25,
873
+ cachedInput: 0.03,
874
+ cacheCreationInput: 0.3,
727
875
  maxInputTokens: 2e5
728
876
  },
729
877
  "claude-3-opus-20240229": {
730
878
  input: 15,
731
879
  output: 75,
880
+ cachedInput: 1.5,
881
+ cacheCreationInput: 18.75,
732
882
  maxInputTokens: 2e5
733
883
  },
734
884
  "claude-4-opus-20250514": {
735
885
  input: 15,
736
886
  output: 75,
887
+ cachedInput: 1.5,
888
+ cacheCreationInput: 18.75,
737
889
  maxInputTokens: 2e5
738
890
  },
739
891
  "claude-4-sonnet-20250514": {
740
892
  input: 3,
741
893
  output: 15,
894
+ cachedInput: 0.3,
895
+ cacheCreationInput: 3.75,
742
896
  maxInputTokens: 1e6
743
897
  },
744
898
  "claude-sonnet-4-5-20250929": {
745
899
  input: 3,
746
900
  output: 15,
901
+ cachedInput: 0.3,
902
+ cacheCreationInput: 3.75,
747
903
  maxInputTokens: 2e5
748
904
  },
749
905
  "claude-sonnet-4-5-20250929-v1:0": {
750
906
  input: 3,
751
907
  output: 15,
908
+ cachedInput: 0.3,
909
+ cacheCreationInput: 3.75,
752
910
  maxInputTokens: 2e5
753
911
  },
754
912
  "claude-opus-4-1-20250805": {
755
913
  input: 15,
756
914
  output: 75,
915
+ cachedInput: 1.5,
916
+ cacheCreationInput: 18.75,
757
917
  maxInputTokens: 2e5
758
918
  },
759
919
  "claude-opus-4-20250514": {
760
920
  input: 15,
761
921
  output: 75,
922
+ cachedInput: 1.5,
923
+ cacheCreationInput: 18.75,
762
924
  maxInputTokens: 2e5
763
925
  },
764
926
  "claude-opus-4-5-20251101": {
765
927
  input: 5,
766
928
  output: 25,
929
+ cachedInput: 0.5,
930
+ cacheCreationInput: 6.25,
767
931
  maxInputTokens: 2e5
768
932
  },
769
933
  "claude-opus-4-6-20260205": {
770
934
  input: 5,
771
935
  output: 25,
936
+ cachedInput: 0.5,
937
+ cacheCreationInput: 6.25,
772
938
  maxInputTokens: 1e6
773
939
  },
774
940
  "claude-opus-4-7-20260416": {
775
941
  input: 5,
776
942
  output: 25,
943
+ cachedInput: 0.5,
944
+ cacheCreationInput: 6.25,
777
945
  maxInputTokens: 1e6
778
946
  },
779
947
  "claude-sonnet-4-20250514": {
780
948
  input: 3,
781
949
  output: 15,
950
+ cachedInput: 0.3,
951
+ cacheCreationInput: 3.75,
782
952
  maxInputTokens: 1e6
783
953
  },
784
954
  "codex-mini-latest": {
785
955
  input: 1.5,
786
956
  output: 6,
957
+ cachedInput: 0.375,
787
958
  maxInputTokens: 2e5
788
959
  },
789
960
  "deepseek-ai/deepseek-r1": {
@@ -833,6 +1004,7 @@ var prices_default = {
833
1004
  "deepseek-ai/deepseek-v3.1-terminus": {
834
1005
  input: 0.27,
835
1006
  output: 1,
1007
+ cachedInput: 0.216,
836
1008
  maxInputTokens: 163840
837
1009
  },
838
1010
  "deepseek-coder": {
@@ -843,26 +1015,31 @@ var prices_default = {
843
1015
  "gemini-2.0-flash": {
844
1016
  input: 0.1,
845
1017
  output: 0.4,
1018
+ cachedInput: 0.025,
846
1019
  maxInputTokens: 1048576
847
1020
  },
848
1021
  "gemini-2.0-flash-001": {
849
1022
  input: 0.1,
850
1023
  output: 0.4,
1024
+ cachedInput: 0.025,
851
1025
  maxInputTokens: 1048576
852
1026
  },
853
1027
  "gemini-2.0-flash-lite": {
854
1028
  input: 0.075,
855
1029
  output: 0.3,
1030
+ cachedInput: 0.01875,
856
1031
  maxInputTokens: 1048576
857
1032
  },
858
1033
  "gemini-2.0-flash-lite-001": {
859
1034
  input: 0.075,
860
1035
  output: 0.3,
1036
+ cachedInput: 0.01875,
861
1037
  maxInputTokens: 1048576
862
1038
  },
863
1039
  "gemini-2.5-flash-image": {
864
1040
  input: 0.3,
865
1041
  output: 2.5,
1042
+ cachedInput: 0.03,
866
1043
  maxInputTokens: 32768
867
1044
  },
868
1045
  "gemini-3-pro-image-preview": {
@@ -878,51 +1055,61 @@ var prices_default = {
878
1055
  "gemini-3.1-flash-lite-preview": {
879
1056
  input: 0.25,
880
1057
  output: 1.5,
1058
+ cachedInput: 0.025,
881
1059
  maxInputTokens: 1048576
882
1060
  },
883
1061
  "gemini-2.5-flash-lite": {
884
1062
  input: 0.1,
885
1063
  output: 0.4,
1064
+ cachedInput: 0.01,
886
1065
  maxInputTokens: 1048576
887
1066
  },
888
1067
  "gemini-2.5-flash-lite-preview-09-2025": {
889
1068
  input: 0.1,
890
1069
  output: 0.4,
1070
+ cachedInput: 0.01,
891
1071
  maxInputTokens: 1048576
892
1072
  },
893
1073
  "gemini-2.5-flash-preview-09-2025": {
894
1074
  input: 0.3,
895
1075
  output: 2.5,
1076
+ cachedInput: 0.075,
896
1077
  maxInputTokens: 1048576
897
1078
  },
898
1079
  "gemini-live-2.5-flash-preview-native-audio-09-2025": {
899
1080
  input: 0.3,
900
1081
  output: 2,
1082
+ cachedInput: 0.075,
901
1083
  maxInputTokens: 1048576
902
1084
  },
903
1085
  "gemini-2.5-flash-lite-preview-06-17": {
904
1086
  input: 0.1,
905
1087
  output: 0.4,
1088
+ cachedInput: 0.025,
906
1089
  maxInputTokens: 1048576
907
1090
  },
908
1091
  "gemini-3-pro-preview": {
909
1092
  input: 2,
910
1093
  output: 12,
1094
+ cachedInput: 0.2,
911
1095
  maxInputTokens: 1048576
912
1096
  },
913
1097
  "gemini-3.1-pro-preview": {
914
1098
  input: 2,
915
1099
  output: 12,
1100
+ cachedInput: 0.2,
916
1101
  maxInputTokens: 1048576
917
1102
  },
918
1103
  "gemini-3.1-pro-preview-customtools": {
919
1104
  input: 2,
920
1105
  output: 12,
1106
+ cachedInput: 0.2,
921
1107
  maxInputTokens: 1048576
922
1108
  },
923
1109
  "gemini-3-flash-preview": {
924
1110
  input: 0.5,
925
1111
  output: 3,
1112
+ cachedInput: 0.05,
926
1113
  maxInputTokens: 1048576
927
1114
  },
928
1115
  "gemini-robotics-er-1.5-preview": {
@@ -938,11 +1125,13 @@ var prices_default = {
938
1125
  "gemini-flash-latest": {
939
1126
  input: 0.3,
940
1127
  output: 2.5,
1128
+ cachedInput: 0.03,
941
1129
  maxInputTokens: 1048576
942
1130
  },
943
1131
  "gemini-flash-lite-latest": {
944
1132
  input: 0.1,
945
1133
  output: 0.4,
1134
+ cachedInput: 0.01,
946
1135
  maxInputTokens: 1048576
947
1136
  },
948
1137
  "gemini-gemma-2-27b-it": {
@@ -1018,39 +1207,47 @@ var prices_default = {
1018
1207
  "gpt-4o-mini-realtime-preview": {
1019
1208
  input: 0.6,
1020
1209
  output: 2.4,
1210
+ cachedInput: 0.3,
1021
1211
  maxInputTokens: 128e3
1022
1212
  },
1023
1213
  "gpt-4o-realtime-preview": {
1024
1214
  input: 5,
1025
1215
  output: 20,
1216
+ cachedInput: 2.5,
1026
1217
  maxInputTokens: 128e3
1027
1218
  },
1028
1219
  "gpt-4o-realtime-preview-2025-06-03": {
1029
1220
  input: 5,
1030
1221
  output: 20,
1222
+ cachedInput: 2.5,
1031
1223
  maxInputTokens: 128e3
1032
1224
  },
1033
1225
  "gpt-image-1.5": {
1034
1226
  input: 5,
1035
- output: 10
1227
+ output: 10,
1228
+ cachedInput: 1.25
1036
1229
  },
1037
1230
  "gpt-image-1.5-2025-12-16": {
1038
1231
  input: 5,
1039
- output: 10
1232
+ output: 10,
1233
+ cachedInput: 1.25
1040
1234
  },
1041
1235
  "gpt-5.1-chat-latest": {
1042
1236
  input: 1.25,
1043
1237
  output: 10,
1238
+ cachedInput: 0.125,
1044
1239
  maxInputTokens: 128e3
1045
1240
  },
1046
1241
  "gpt-5.2-chat-latest": {
1047
1242
  input: 1.75,
1048
1243
  output: 14,
1244
+ cachedInput: 0.175,
1049
1245
  maxInputTokens: 128e3
1050
1246
  },
1051
1247
  "gpt-5.3-chat-latest": {
1052
1248
  input: 1.75,
1053
1249
  output: 14,
1250
+ cachedInput: 0.175,
1054
1251
  maxInputTokens: 128e3
1055
1252
  },
1056
1253
  "gpt-5-pro-2025-10-06": {
@@ -1061,11 +1258,13 @@ var prices_default = {
1061
1258
  "gpt-realtime": {
1062
1259
  input: 4,
1063
1260
  output: 16,
1261
+ cachedInput: 0.4,
1064
1262
  maxInputTokens: 32e3
1065
1263
  },
1066
1264
  "gpt-realtime-1.5": {
1067
1265
  input: 4,
1068
1266
  output: 16,
1267
+ cachedInput: 0.4,
1069
1268
  maxInputTokens: 32e3
1070
1269
  },
1071
1270
  "gpt-realtime-mini": {
@@ -1112,6 +1311,7 @@ var prices_default = {
1112
1311
  o1: {
1113
1312
  input: 15,
1114
1313
  output: 60,
1314
+ cachedInput: 7.5,
1115
1315
  maxInputTokens: 2e5
1116
1316
  },
1117
1317
  "o1-pro": {
@@ -1127,6 +1327,7 @@ var prices_default = {
1127
1327
  o3: {
1128
1328
  input: 2,
1129
1329
  output: 8,
1330
+ cachedInput: 0.5,
1130
1331
  maxInputTokens: 2e5
1131
1332
  },
1132
1333
  "gpt-oss-20b": {
@@ -1151,6 +1352,8 @@ var prices_default = {
1151
1352
  "claude-haiku-4-5@20251001": {
1152
1353
  input: 1,
1153
1354
  output: 5,
1355
+ cachedInput: 0.1,
1356
+ cacheCreationInput: 1.25,
1154
1357
  maxInputTokens: 2e5
1155
1358
  },
1156
1359
  "claude-3-5-sonnet": {
@@ -1166,6 +1369,8 @@ var prices_default = {
1166
1369
  "claude-3-7-sonnet@20250219": {
1167
1370
  input: 3,
1168
1371
  output: 15,
1372
+ cachedInput: 0.3,
1373
+ cacheCreationInput: 3.75,
1169
1374
  maxInputTokens: 2e5
1170
1375
  },
1171
1376
  "claude-3-haiku": {
@@ -1201,46 +1406,64 @@ var prices_default = {
1201
1406
  "claude-opus-4": {
1202
1407
  input: 15,
1203
1408
  output: 75,
1409
+ cachedInput: 1.5,
1410
+ cacheCreationInput: 18.75,
1204
1411
  maxInputTokens: 2e5
1205
1412
  },
1206
1413
  "claude-opus-4-1@20250805": {
1207
1414
  input: 15,
1208
1415
  output: 75,
1416
+ cachedInput: 1.5,
1417
+ cacheCreationInput: 18.75,
1209
1418
  maxInputTokens: 2e5
1210
1419
  },
1211
1420
  "claude-opus-4-5@20251101": {
1212
1421
  input: 5,
1213
1422
  output: 25,
1423
+ cachedInput: 0.5,
1424
+ cacheCreationInput: 6.25,
1214
1425
  maxInputTokens: 2e5
1215
1426
  },
1216
1427
  "claude-opus-4-6@default": {
1217
1428
  input: 5,
1218
1429
  output: 25,
1430
+ cachedInput: 0.5,
1431
+ cacheCreationInput: 6.25,
1219
1432
  maxInputTokens: 1e6
1220
1433
  },
1221
1434
  "claude-opus-4-7@default": {
1222
1435
  input: 5,
1223
1436
  output: 25,
1437
+ cachedInput: 0.5,
1438
+ cacheCreationInput: 6.25,
1224
1439
  maxInputTokens: 1e6
1225
1440
  },
1226
1441
  "claude-sonnet-4-5@20250929": {
1227
1442
  input: 3,
1228
1443
  output: 15,
1444
+ cachedInput: 0.3,
1445
+ cacheCreationInput: 3.75,
1229
1446
  maxInputTokens: 2e5
1230
1447
  },
1231
1448
  "claude-opus-4@20250514": {
1232
1449
  input: 15,
1233
1450
  output: 75,
1451
+ cachedInput: 1.5,
1452
+ cacheCreationInput: 18.75,
1234
1453
  maxInputTokens: 2e5
1235
1454
  },
1236
1455
  "claude-sonnet-4": {
1237
1456
  input: 3,
1238
1457
  output: 15,
1458
+ cachedInput: 0.3,
1459
+ cacheCreationInput: 3.75,
1239
1460
  maxInputTokens: 1e6
1240
1461
  },
1241
1462
  "claude-sonnet-4@20250514": {
1242
1463
  input: 3,
1243
1464
  output: 15,
1465
+ cachedInput: 0.3,
1466
+ cacheCreationInput: 3.75,
1244
1467
  maxInputTokens: 1e6
1245
1468
  },
1246
1469
  "deepseek-ai/deepseek-v3.1-maas": {
@@ -1290,6 +1513,7 @@ var prices_default = {
1290
1513
  "gpt-realtime-mini-2025-12-15": {
1291
1514
  input: 0.6,
1292
1515
  output: 2.4,
1516
+ cachedInput: 0.06,
1293
1517
  maxInputTokens: 128e3
1294
1518
  },
1295
1519
  "gemini-2.5-flash-native-audio-latest": {
@@ -1315,16 +1539,20 @@ var prices_default = {
1315
1539
  "gemini-pro-latest": {
1316
1540
  input: 1.25,
1317
1541
  output: 10,
1542
+ cachedInput: 0.125,
1318
1543
  maxInputTokens: 1048576
1319
1544
  },
1320
1545
  "gemini-exp-1206": {
1321
1546
  input: 0.3,
1322
1547
  output: 2.5,
1548
+ cachedInput: 0.03,
1323
1549
  maxInputTokens: 1048576
1324
1550
  },
1325
1551
  "claude-sonnet-4-6@default": {
1326
1552
  input: 3,
1327
1553
  output: 15,
1554
+ cachedInput: 0.3,
1555
+ cacheCreationInput: 3.75,
1328
1556
  maxInputTokens: 1e6
1329
1557
  }
1330
1558
  }
@@ -1332,11 +1560,19 @@ var prices_default = {
1332
1560
 
1333
1561
  // src/core/tracker.ts
1334
1562
  var bundledPrices = prices_default.models;
1563
+ var bundledUpdatedAt = prices_default.updated_at ?? "";
1335
1564
  var ModelPriceSchema = z.object({
1336
1565
  input: z.number().nonnegative(),
1337
1566
  output: z.number().nonnegative(),
1567
+ cachedInput: z.number().nonnegative().optional(),
1568
+ cacheCreationInput: z.number().nonnegative().optional(),
1338
1569
  maxInputTokens: z.number().positive().optional()
1339
1570
  });
1571
+ var BudgetConfigSchema = z.object({
1572
+ threshold: z.number().positive(),
1573
+ webhookUrl: z.string().url(),
1574
+ mode: z.enum(["once", "always"]).optional().default("once")
1575
+ });
1340
1576
  var TrackerConfigSchema = z.object({
1341
1577
  storage: z.union([z.enum(["memory", "sqlite"]), z.custom((v) => {
1342
1578
  return v !== null && typeof v === "object" && typeof v.record === "function" && typeof v.getAll === "function" && typeof v.clearAll === "function" && typeof v.clearSession === "function";
@@ -1344,7 +1580,13 @@ var TrackerConfigSchema = z.object({
1344
1580
  alertThreshold: z.number().positive().optional(),
1345
1581
  webhookUrl: z.string().url().optional(),
1346
1582
  syncPrices: z.boolean().optional().default(true),
1347
- customPrices: z.record(z.string(), ModelPriceSchema).optional()
1583
+ customPrices: z.record(z.string(), ModelPriceSchema).optional(),
1584
+ warnIfStaleAfterHours: z.number().nonnegative().optional().default(72),
1585
+ budgets: z.object({
1586
+ perUser: BudgetConfigSchema.optional(),
1587
+ perSession: BudgetConfigSchema.optional()
1588
+ }).optional(),
1589
+ suggestions: z.boolean().optional().default(false)
1348
1590
  });
1349
1591
  function createTracker(config = {}) {
1350
1592
  const parsed = TrackerConfigSchema.safeParse(config);
@@ -1358,19 +1600,45 @@ ${issues}`);
1358
1600
  alertThreshold,
1359
1601
  webhookUrl,
1360
1602
  syncPrices,
1361
- customPrices
1603
+ customPrices,
1604
+ warnIfStaleAfterHours,
1605
+ budgets,
1606
+ suggestions
1362
1607
  } = parsed.data;
1363
1608
  const storage = typeof storageOption === "object" ? storageOption : createStorage(storageOption);
1364
1609
  let remotePrices;
1610
+ let pricesUpdatedAt = bundledUpdatedAt;
1365
1611
  if (syncPrices) {
1366
1612
  getRemotePrices().then((result) => {
1367
- if (result) remotePrices = result;
1613
+ if (result) {
1614
+ remotePrices = result.models;
1615
+ pricesUpdatedAt = result.updated_at;
1616
+ }
1368
1617
  }).catch(() => {
1369
1618
  });
1370
1619
  }
1620
+ let stalenessChecked = false;
1621
+ function maybeWarnStaleness() {
1622
+ if (stalenessChecked || !warnIfStaleAfterHours) return;
1623
+ stalenessChecked = true;
1624
+ if (!pricesUpdatedAt) return;
1625
+ try {
1626
+ const updatedMs = new Date(pricesUpdatedAt).getTime();
1627
+ const ageHours = (Date.now() - updatedMs) / (1e3 * 60 * 60);
1628
+ if (ageHours > warnIfStaleAfterHours) {
1629
+ console.warn(
1630
+ `[tokenwatch] Price data is ${Math.round(ageHours)}h old (updated_at: ${pricesUpdatedAt}). Run "tokenwatch sync" to refresh, or set warnIfStaleAfterHours: 0 to suppress.`
1631
+ );
1632
+ }
1633
+ } catch {
1634
+ }
1635
+ }
1371
1636
  let alertFired = false;
1637
+ const firedUserAlerts = /* @__PURE__ */ new Set();
1638
+ const firedSessionAlerts = /* @__PURE__ */ new Set();
1372
1639
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
1373
1640
  function resolveModelPrice(model) {
1641
+ maybeWarnStaleness();
1374
1642
  return resolvePrice(model, {
1375
1643
  bundledPrices,
1376
1644
  ...customPrices !== void 0 && { customPrices },
@@ -1381,8 +1649,10 @@ ${issues}`);
1381
1649
  const price = resolveModelPrice(entry.model);
1382
1650
  const costUSD = calculateCost(
1383
1651
  entry.inputTokens,
1384
- entry.outputTokens + (entry.reasoningTokens ?? 0),
1385
- price
1652
+ entry.outputTokens,
1653
+ price,
1654
+ entry.cachedTokens,
1655
+ entry.cacheCreationTokens
1386
1656
  );
1387
1657
  const full = {
1388
1658
  ...entry,
@@ -1390,32 +1660,81 @@ ${issues}`);
1390
1660
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
1391
1661
  };
1392
1662
  storage.record(full);
1393
- maybeFireAlert();
1663
+ maybeFireAlerts(full);
1664
+ if (suggestions) {
1665
+ maybeSuggestCheaperModel(entry.model, costUSD, entry.inputTokens, entry.outputTokens, {
1666
+ bundledPrices,
1667
+ ...customPrices !== void 0 && { customPrices },
1668
+ ...remotePrices !== void 0 && { remotePrices }
1669
+ });
1670
+ }
1394
1671
  }
1395
- function maybeFireAlert() {
1396
- if (!alertThreshold || !webhookUrl || alertFired) return;
1397
- alertFired = true;
1398
- Promise.resolve(storage.getAll()).then((entries) => {
1399
- const total = computeTotal(entries);
1400
- if (total < alertThreshold) {
1401
- alertFired = false;
1402
- return;
1403
- }
1404
- const payload = {
1405
- text: `[tokenwatch] Alert: total cost reached $${total.toFixed(4)} USD (threshold: $${alertThreshold})`
1406
- };
1407
- fetch(webhookUrl, {
1408
- method: "POST",
1409
- headers: { "Content-Type": "application/json" },
1410
- body: JSON.stringify(payload)
1672
+ function maybeFireAlerts(entry) {
1673
+ if (alertThreshold && webhookUrl && !alertFired) {
1674
+ alertFired = true;
1675
+ Promise.resolve(storage.getAll()).then((entries) => {
1676
+ const total = computeTotal(entries);
1677
+ if (total < alertThreshold) {
1678
+ alertFired = false;
1679
+ return;
1680
+ }
1681
+ fireWebhook(webhookUrl, {
1682
+ text: `[tokenwatch] Alert: total cost reached $${total.toFixed(4)} USD (threshold: $${alertThreshold})`
1683
+ });
1411
1684
  }).catch(() => {
1685
+ alertFired = false;
1412
1686
  });
1687
+ }
1688
+ if (budgets?.perUser && entry.userId) {
1689
+ const cfg = budgets.perUser;
1690
+ const uid = entry.userId;
1691
+ if (cfg.mode === "always" || !firedUserAlerts.has(uid)) {
1692
+ if (cfg.mode !== "always") firedUserAlerts.add(uid);
1693
+ Promise.resolve(storage.getAll()).then((entries) => {
1694
+ const userCost = entries.filter((e) => e.userId === uid).reduce((s, e) => s + e.costUSD, 0);
1695
+ if (userCost >= cfg.threshold) {
1696
+ fireWebhook(cfg.webhookUrl, {
1697
+ text: `[tokenwatch] Budget alert: user "${uid}" reached $${userCost.toFixed(4)} USD (threshold: $${cfg.threshold})`
1698
+ });
1699
+ } else {
1700
+ if (cfg.mode !== "always") firedUserAlerts.delete(uid);
1701
+ }
1702
+ }).catch(() => {
1703
+ if (cfg.mode !== "always") firedUserAlerts.delete(uid);
1704
+ });
1705
+ }
1706
+ }
1707
+ if (budgets?.perSession && entry.sessionId) {
1708
+ const cfg = budgets.perSession;
1709
+ const sid = entry.sessionId;
1710
+ if (cfg.mode === "always" || !firedSessionAlerts.has(sid)) {
1711
+ if (cfg.mode !== "always") firedSessionAlerts.add(sid);
1712
+ Promise.resolve(storage.getAll()).then((entries) => {
1713
+ const sessionCost = entries.filter((e) => e.sessionId === sid).reduce((s, e) => s + e.costUSD, 0);
1714
+ if (sessionCost >= cfg.threshold) {
1715
+ fireWebhook(cfg.webhookUrl, {
1716
+ text: `[tokenwatch] Budget alert: session "${sid}" reached $${sessionCost.toFixed(4)} USD (threshold: $${cfg.threshold})`
1717
+ });
1718
+ } else {
1719
+ if (cfg.mode !== "always") firedSessionAlerts.delete(sid);
1720
+ }
1721
+ }).catch(() => {
1722
+ if (cfg.mode !== "always") firedSessionAlerts.delete(sid);
1723
+ });
1724
+ }
1725
+ }
1726
+ }
1727
+ function fireWebhook(url, payload) {
1728
+ fetch(url, {
1729
+ method: "POST",
1730
+ headers: { "Content-Type": "application/json" },
1731
+ body: JSON.stringify(payload)
1413
1732
  }).catch(() => {
1414
- alertFired = false;
1415
1733
  });
1416
1734
  }
1417
- async function getReport() {
1418
- const entries = await Promise.resolve(storage.getAll());
1735
+ async function getReport(options) {
1736
+ const allEntries = await Promise.resolve(storage.getAll());
1737
+ const entries = filterEntries(allEntries, options);
1419
1738
  const byModel = {};
1420
1739
  const bySession = {};
1421
1740
  const byUser = {};
@@ -1423,18 +1742,24 @@ ${issues}`);
1423
1742
  let totalInput = 0;
1424
1743
  let totalOutput = 0;
1425
1744
  let totalCost = 0;
1426
- let lastTimestamp = startedAt;
1745
+ let periodFrom = options ? entries[0]?.timestamp ?? startedAt : startedAt;
1746
+ let lastTimestamp = periodFrom;
1427
1747
  for (const e of entries) {
1428
- totalInput += e.inputTokens;
1748
+ totalInput += e.inputTokens + (e.cachedTokens ?? 0) + (e.cacheCreationTokens ?? 0);
1429
1749
  totalOutput += e.outputTokens;
1430
1750
  totalCost += e.costUSD;
1431
1751
  if (e.timestamp > lastTimestamp) lastTimestamp = e.timestamp;
1432
- const m = byModel[e.model] ??= { costUSD: 0, calls: 0, tokens: { input: 0, output: 0, reasoning: 0 } };
1752
+ const m = byModel[e.model] ??= {
1753
+ costUSD: 0,
1754
+ calls: 0,
1755
+ tokens: { input: 0, output: 0, reasoning: 0, cached: 0 }
1756
+ };
1433
1757
  m.costUSD += e.costUSD;
1434
1758
  m.calls += 1;
1435
- m.tokens.input += e.inputTokens;
1759
+ m.tokens.input += e.inputTokens + (e.cachedTokens ?? 0) + (e.cacheCreationTokens ?? 0);
1436
1760
  m.tokens.output += e.outputTokens;
1437
1761
  m.tokens.reasoning += e.reasoningTokens ?? 0;
1762
+ m.tokens.cached += e.cachedTokens ?? 0;
1438
1763
  if (e.sessionId) {
1439
1764
  const s = bySession[e.sessionId] ??= { costUSD: 0, calls: 0 };
1440
1765
  s.costUSD += e.costUSD;
@@ -1451,6 +1776,9 @@ ${issues}`);
1451
1776
  f.calls += 1;
1452
1777
  }
1453
1778
  }
1779
+ if (options && entries.length > 0) {
1780
+ periodFrom = entries[0]?.timestamp ?? periodFrom;
1781
+ }
1454
1782
  return {
1455
1783
  totalCostUSD: totalCost,
1456
1784
  totalTokens: { input: totalInput, output: totalOutput },
@@ -1458,22 +1786,66 @@ ${issues}`);
1458
1786
  bySession,
1459
1787
  byUser,
1460
1788
  byFeature,
1461
- period: { from: startedAt, to: lastTimestamp }
1789
+ period: { from: periodFrom, to: lastTimestamp },
1790
+ ...pricesUpdatedAt ? { pricesUpdatedAt } : {}
1791
+ };
1792
+ }
1793
+ async function getCostForecast(options = {}) {
1794
+ const windowHours = options.windowHours ?? 24;
1795
+ const allEntries = await Promise.resolve(storage.getAll());
1796
+ const now = Date.now();
1797
+ const windowStart = now - windowHours * 60 * 60 * 1e3;
1798
+ const windowEntries = allEntries.filter(
1799
+ (e) => new Date(e.timestamp).getTime() >= windowStart
1800
+ );
1801
+ if (windowEntries.length < 2) {
1802
+ return {
1803
+ burnRatePerHour: 0,
1804
+ projectedDailyCostUSD: 0,
1805
+ projectedMonthlyCostUSD: 0,
1806
+ basedOnHours: 0,
1807
+ basedOnPeriod: null
1808
+ };
1809
+ }
1810
+ const first = windowEntries[0]?.timestamp ?? "";
1811
+ const last = windowEntries[windowEntries.length - 1]?.timestamp ?? "";
1812
+ const actualMs = new Date(last).getTime() - new Date(first).getTime();
1813
+ const actualHours = actualMs / (1e3 * 60 * 60);
1814
+ if (actualHours < 1e-3) {
1815
+ return {
1816
+ burnRatePerHour: 0,
1817
+ projectedDailyCostUSD: 0,
1818
+ projectedMonthlyCostUSD: 0,
1819
+ basedOnHours: 0,
1820
+ basedOnPeriod: { from: first, to: last }
1821
+ };
1822
+ }
1823
+ const totalCost = windowEntries.reduce((s, e) => s + e.costUSD, 0);
1824
+ const burnRatePerHour = totalCost / actualHours;
1825
+ return {
1826
+ burnRatePerHour,
1827
+ projectedDailyCostUSD: burnRatePerHour * 24,
1828
+ projectedMonthlyCostUSD: burnRatePerHour * 24 * 30,
1829
+ basedOnHours: Math.round(actualHours * 100) / 100,
1830
+ basedOnPeriod: { from: first, to: last }
1462
1831
  };
1463
1832
  }
1464
1833
  async function reset() {
1465
1834
  await Promise.resolve(storage.clearAll());
1466
1835
  alertFired = false;
1836
+ firedUserAlerts.clear();
1837
+ firedSessionAlerts.clear();
1467
1838
  }
1468
1839
  async function resetSession(sessionId) {
1469
1840
  await Promise.resolve(storage.clearSession(sessionId));
1841
+ firedSessionAlerts.delete(sessionId);
1470
1842
  }
1471
1843
  async function exportJSON() {
1472
1844
  return JSON.stringify(await getReport(), null, 2);
1473
1845
  }
1474
1846
  async function exportCSV() {
1475
1847
  const entries = await Promise.resolve(storage.getAll());
1476
- const header = "timestamp,model,inputTokens,outputTokens,reasoningTokens,costUSD,sessionId,userId,feature";
1848
+ const header = "timestamp,model,inputTokens,outputTokens,reasoningTokens,cachedTokens,cacheCreationTokens,costUSD,sessionId,userId,feature";
1477
1849
  const rows = entries.map(
1478
1850
  (e) => [
1479
1851
  csvEscape(e.timestamp),
@@ -1481,6 +1853,8 @@ ${issues}`);
1481
1853
  e.inputTokens,
1482
1854
  e.outputTokens,
1483
1855
  e.reasoningTokens ?? 0,
1856
+ e.cachedTokens ?? 0,
1857
+ e.cacheCreationTokens ?? 0,
1484
1858
  e.costUSD.toFixed(8),
1485
1859
  csvEscape(e.sessionId ?? ""),
1486
1860
  csvEscape(e.userId ?? ""),
@@ -1496,11 +1870,47 @@ ${issues}`);
1496
1870
  ...remotePrices !== void 0 && { remotePrices }
1497
1871
  }) ?? null;
1498
1872
  }
1499
- return { track, getReport, reset, resetSession, exportJSON, exportCSV, getModelInfo };
1873
+ return {
1874
+ track,
1875
+ getReport,
1876
+ getCostForecast,
1877
+ reset,
1878
+ resetSession,
1879
+ exportJSON,
1880
+ exportCSV,
1881
+ getModelInfo
1882
+ };
1500
1883
  }
1501
1884
  function computeTotal(entries) {
1502
1885
  return entries.reduce((sum, e) => sum + e.costUSD, 0);
1503
1886
  }
1887
+ function parseLastMs(last) {
1888
+ const match = /^(\d+(?:\.\d+)?)(h|d)$/.exec(last.trim());
1889
+ if (!match) throw new Error(`[tokenwatch] Invalid "last" value: "${last}". Use e.g. "24h", "7d".`);
1890
+ const value = parseFloat(match[1] ?? "0");
1891
+ const unit = match[2] ?? "h";
1892
+ return unit === "h" ? value * 60 * 60 * 1e3 : value * 24 * 60 * 60 * 1e3;
1893
+ }
1894
+ function filterEntries(entries, options) {
1895
+ if (!options) return entries;
1896
+ let sinceMs;
1897
+ let untilMs;
1898
+ if (options.last) {
1899
+ sinceMs = Date.now() - parseLastMs(options.last);
1900
+ } else if (options.since) {
1901
+ sinceMs = new Date(options.since).getTime();
1902
+ }
1903
+ if (options.until) {
1904
+ untilMs = new Date(options.until).getTime();
1905
+ }
1906
+ if (sinceMs === void 0 && untilMs === void 0) return entries;
1907
+ return entries.filter((e) => {
1908
+ const ts = new Date(e.timestamp).getTime();
1909
+ if (sinceMs !== void 0 && ts < sinceMs) return false;
1910
+ if (untilMs !== void 0 && ts > untilMs) return false;
1911
+ return true;
1912
+ });
1913
+ }
1504
1914
  function csvEscape(value) {
1505
1915
  if (value.includes(",") || value.includes('"') || value.includes("\n")) {
1506
1916
  return `"${value.replace(/"/g, '""')}"`;
@@ -1508,6 +1918,71 @@ function csvEscape(value) {
1508
1918
  return value;
1509
1919
  }
1510
1920
 
1921
+ // src/core/lazy-tracker.ts
1922
+ var CSV_HEADER = "timestamp,model,inputTokens,outputTokens,reasoningTokens,cachedTokens,cacheCreationTokens,costUSD,sessionId,userId,feature";
1923
+ function emptyReport() {
1924
+ const now = (/* @__PURE__ */ new Date()).toISOString();
1925
+ return {
1926
+ totalCostUSD: 0,
1927
+ totalTokens: { input: 0, output: 0 },
1928
+ byModel: {},
1929
+ bySession: {},
1930
+ byUser: {},
1931
+ byFeature: {},
1932
+ period: { from: now, to: now }
1933
+ };
1934
+ }
1935
+ function zeroForecast() {
1936
+ return {
1937
+ burnRatePerHour: 0,
1938
+ projectedDailyCostUSD: 0,
1939
+ projectedMonthlyCostUSD: 0,
1940
+ basedOnHours: 0,
1941
+ basedOnPeriod: null
1942
+ };
1943
+ }
1944
+ function createLazyTracker() {
1945
+ let delegate = null;
1946
+ return {
1947
+ init(config) {
1948
+ if (delegate !== null) {
1949
+ throw new Error(
1950
+ "[tokenwatch] LazyTracker already initialized. init() may only be called once."
1951
+ );
1952
+ }
1953
+ try {
1954
+ delegate = createTracker(config ?? {});
1955
+ } catch (err) {
1956
+ throw err;
1957
+ }
1958
+ },
1959
+ track(entry) {
1960
+ delegate?.track(entry);
1961
+ },
1962
+ async getReport(options) {
1963
+ return delegate?.getReport(options) ?? emptyReport();
1964
+ },
1965
+ async getCostForecast(options) {
1966
+ return delegate?.getCostForecast(options) ?? zeroForecast();
1967
+ },
1968
+ async reset() {
1969
+ await delegate?.reset();
1970
+ },
1971
+ async resetSession(sessionId) {
1972
+ await delegate?.resetSession(sessionId);
1973
+ },
1974
+ async exportJSON() {
1975
+ return delegate?.exportJSON() ?? "{}";
1976
+ },
1977
+ async exportCSV() {
1978
+ return delegate?.exportCSV() ?? CSV_HEADER;
1979
+ },
1980
+ getModelInfo(model) {
1981
+ return delegate?.getModelInfo(model) ?? null;
1982
+ }
1983
+ };
1984
+ }
1985
+
1511
1986
  // src/providers/openai.ts
1512
1987
  function extractMeta(params) {
1513
1988
  const { __sessionId, __userId, __feature, ...cleaned } = params;
@@ -1519,19 +1994,24 @@ function extractMeta(params) {
1519
1994
  };
1520
1995
  }
1521
1996
  function extractUsage(usage) {
1522
- if (!usage) return { inputTokens: 0, outputTokens: 0, reasoningTokens: 0 };
1997
+ if (!usage) return { inputTokens: 0, outputTokens: 0, reasoningTokens: 0, cachedTokens: 0 };
1998
+ const totalInput = usage.prompt_tokens ?? usage.input_tokens ?? 0;
1999
+ const cachedTokens = usage.prompt_tokens_details?.cached_tokens ?? 0;
1523
2000
  return {
1524
- inputTokens: usage.prompt_tokens ?? usage.input_tokens ?? 0,
2001
+ // inputTokens = regular (non-cached) input; OpenAI prompt_tokens includes cached tokens
2002
+ inputTokens: totalInput - cachedTokens,
1525
2003
  outputTokens: usage.completion_tokens ?? usage.output_tokens ?? 0,
1526
- reasoningTokens: usage.completion_tokens_details?.reasoning_tokens ?? 0
2004
+ reasoningTokens: usage.completion_tokens_details?.reasoning_tokens ?? 0,
2005
+ cachedTokens
1527
2006
  };
1528
2007
  }
1529
- function trackWithMeta(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature) {
2008
+ function trackWithMeta(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature, cachedTokens = 0) {
1530
2009
  tracker.track({
1531
2010
  model,
1532
2011
  inputTokens,
1533
- outputTokens,
2012
+ outputTokens: outputTokens + reasoningTokens,
1534
2013
  ...reasoningTokens > 0 && { reasoningTokens },
2014
+ ...cachedTokens > 0 && { cachedTokens },
1535
2015
  ...sessionId !== void 0 && { sessionId },
1536
2016
  ...userId !== void 0 && { userId },
1537
2017
  ...feature !== void 0 && { feature }
@@ -1543,13 +2023,13 @@ async function* wrapStream(stream, model, sessionId, userId, feature, tracker) {
1543
2023
  lastChunk = chunk;
1544
2024
  yield chunk;
1545
2025
  }
1546
- const { inputTokens, outputTokens, reasoningTokens } = extractUsage(lastChunk?.usage);
2026
+ const { inputTokens, outputTokens, reasoningTokens, cachedTokens } = extractUsage(lastChunk?.usage);
1547
2027
  if (!lastChunk?.usage) {
1548
2028
  console.warn(
1549
2029
  `[tokenwatch] No usage data in stream for model "${model}". Cost recorded as $0. Pass stream_options: { include_usage: true } to get accurate costs.`
1550
2030
  );
1551
2031
  }
1552
- trackWithMeta(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature);
2032
+ trackWithMeta(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature, cachedTokens);
1553
2033
  }
1554
2034
  function wrapOpenAI(client, tracker) {
1555
2035
  const proxiedCompletions = new Proxy(client.chat.completions, {
@@ -1571,7 +2051,7 @@ function wrapOpenAI(client, tracker) {
1571
2051
  );
1572
2052
  }
1573
2053
  const completion = result;
1574
- const { inputTokens, outputTokens, reasoningTokens } = extractUsage(completion.usage);
2054
+ const { inputTokens, outputTokens, reasoningTokens, cachedTokens } = extractUsage(completion.usage);
1575
2055
  trackWithMeta(
1576
2056
  tracker,
1577
2057
  completion.model ?? model,
@@ -1580,7 +2060,8 @@ function wrapOpenAI(client, tracker) {
1580
2060
  reasoningTokens,
1581
2061
  sessionId,
1582
2062
  userId,
1583
- feature
2063
+ feature,
2064
+ cachedTokens
1584
2065
  );
1585
2066
  return result;
1586
2067
  };
@@ -1627,10 +2108,12 @@ function extractMeta2(params) {
1627
2108
  };
1628
2109
  }
1629
2110
  function extractUsage2(usage) {
1630
- if (!usage) return { inputTokens: 0, outputTokens: 0 };
2111
+ if (!usage) return { inputTokens: 0, outputTokens: 0, cachedTokens: 0, cacheCreationTokens: 0 };
1631
2112
  return {
1632
2113
  inputTokens: usage.input_tokens ?? 0,
1633
- outputTokens: usage.output_tokens ?? 0
2114
+ outputTokens: usage.output_tokens ?? 0,
2115
+ cachedTokens: usage.cache_read_input_tokens ?? 0,
2116
+ cacheCreationTokens: usage.cache_creation_input_tokens ?? 0
1634
2117
  };
1635
2118
  }
1636
2119
  function extractThinkingTokenApprox(content) {
@@ -1638,30 +2121,32 @@ function extractThinkingTokenApprox(content) {
1638
2121
  const chars = content.filter((b) => b.type === "thinking").reduce((sum, b) => sum + (b.thinking?.length ?? 0), 0);
1639
2122
  return chars > 0 ? Math.round(chars / 4) : 0;
1640
2123
  }
1641
- function trackWithMeta2(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature) {
2124
+ function trackWithMeta2(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature, cachedTokens = 0, cacheCreationTokens = 0) {
1642
2125
  tracker.track({
1643
2126
  model,
1644
2127
  inputTokens,
1645
2128
  outputTokens,
1646
- // For Anthropic, reasoningTokens is informational (thinking already in outputTokens).
1647
- // Pass 0 so tracker does not add it to cost (tracker only adds when > 0 AND separate).
1648
- // We store it as a field but the tracker cost formula adds reasoningTokens to outputTokens,
1649
- // so we must NOT pass it here to avoid double-counting.
2129
+ ...reasoningTokens > 0 && { reasoningTokens },
2130
+ ...cachedTokens > 0 && { cachedTokens },
2131
+ ...cacheCreationTokens > 0 && { cacheCreationTokens },
1650
2132
  ...sessionId !== void 0 && { sessionId },
1651
2133
  ...userId !== void 0 && { userId },
1652
- ...feature !== void 0 && { feature },
1653
- ...reasoningTokens > 0 && { reasoningTokens }
2134
+ ...feature !== void 0 && { feature }
1654
2135
  });
1655
2136
  }
1656
2137
  async function* wrapStream2(stream, model, sessionId, userId, feature, tracker) {
1657
2138
  let inputTokens = 0;
1658
2139
  let outputTokens = 0;
2140
+ let cachedTokens = 0;
2141
+ let cacheCreationTokens = 0;
1659
2142
  let currentBlockIsThinking = false;
1660
2143
  let thinkingCharCount = 0;
1661
2144
  for await (const event of stream) {
1662
2145
  yield event;
1663
2146
  if (event.type === "message_start" && event.message?.usage) {
1664
2147
  inputTokens = event.message.usage.input_tokens ?? 0;
2148
+ cachedTokens = event.message.usage.cache_read_input_tokens ?? 0;
2149
+ cacheCreationTokens = event.message.usage.cache_creation_input_tokens ?? 0;
1665
2150
  }
1666
2151
  if (event.type === "message_delta" && event.usage) {
1667
2152
  outputTokens = event.usage.output_tokens ?? 0;
@@ -1677,7 +2162,7 @@ async function* wrapStream2(stream, model, sessionId, userId, feature, tracker)
1677
2162
  }
1678
2163
  }
1679
2164
  const reasoningTokens = thinkingCharCount > 0 ? Math.round(thinkingCharCount / 4) : 0;
1680
- trackWithMeta2(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature);
2165
+ trackWithMeta2(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature, cachedTokens, cacheCreationTokens);
1681
2166
  }
1682
2167
  function wrapAnthropic(client, tracker) {
1683
2168
  const proxiedMessages = new Proxy(client.messages, {
@@ -1699,7 +2184,7 @@ function wrapAnthropic(client, tracker) {
1699
2184
  );
1700
2185
  }
1701
2186
  const message = result;
1702
- const { inputTokens, outputTokens } = extractUsage2(message.usage);
2187
+ const { inputTokens, outputTokens, cachedTokens, cacheCreationTokens } = extractUsage2(message.usage);
1703
2188
  const reasoningTokens = extractThinkingTokenApprox(message.content);
1704
2189
  trackWithMeta2(
1705
2190
  tracker,
@@ -1709,7 +2194,9 @@ function wrapAnthropic(client, tracker) {
1709
2194
  reasoningTokens,
1710
2195
  sessionId,
1711
2196
  userId,
1712
- feature
2197
+ feature,
2198
+ cachedTokens,
2199
+ cacheCreationTokens
1713
2200
  );
1714
2201
  return result;
1715
2202
  };
@@ -1779,6 +2266,7 @@ function wrapGemini(client, tracker) {
1779
2266
  });
1780
2267
  }
1781
2268
  export {
2269
+ createLazyTracker,
1782
2270
  createTracker,
1783
2271
  wrapAnthropic,
1784
2272
  wrapOpenAI as wrapDeepSeek,