@diogonzafe/tokenwatch 0.2.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -25,8 +25,45 @@ function lookupInMap(model, map) {
25
25
  }
26
26
  return void 0;
27
27
  }
28
- function calculateCost(inputTokens, outputTokens, price) {
29
- return inputTokens / 1e6 * price.input + outputTokens / 1e6 * price.output;
28
+ function calculateCost(inputTokens, outputTokens, price, cachedTokens = 0, cacheCreationTokens = 0) {
29
+ const regularInputCost = inputTokens / 1e6 * price.input;
30
+ const cachedReadCost = cachedTokens / 1e6 * (price.cachedInput ?? price.input);
31
+ const cacheCreationCost = cacheCreationTokens / 1e6 * (price.cacheCreationInput ?? price.input * 1.25);
32
+ const outputCost = outputTokens / 1e6 * price.output;
33
+ return regularInputCost + cachedReadCost + cacheCreationCost + outputCost;
34
+ }
35
+
36
+ // src/core/suggestions.ts
37
+ var PROVIDER_PREFIXES = ["gpt-", "claude-", "gemini-", "deepseek-"];
38
+ function getProviderPrefix(model) {
39
+ return PROVIDER_PREFIXES.find((p) => model.startsWith(p));
40
+ }
41
+ function maybeSuggestCheaperModel(model, costUSD, inputTokens, outputTokens, layers) {
42
+ if (costUSD <= 0) return;
43
+ const prefix = getProviderPrefix(model);
44
+ if (!prefix) return;
45
+ const mergedMap = {
46
+ ...layers.bundledPrices,
47
+ ...layers.remotePrices ?? {},
48
+ ...layers.customPrices ?? {}
49
+ };
50
+ let cheapestModel;
51
+ let cheapestCost = Infinity;
52
+ for (const key of Object.keys(mergedMap)) {
53
+ if (key === model || !key.startsWith(prefix)) continue;
54
+ const price = mergedMap[key];
55
+ if (!price) continue;
56
+ const candidateCost = calculateCost(inputTokens, outputTokens, price);
57
+ if (candidateCost < cheapestCost) {
58
+ cheapestCost = candidateCost;
59
+ cheapestModel = key;
60
+ }
61
+ }
62
+ if (cheapestModel === void 0 || cheapestCost >= costUSD * 0.5) return;
63
+ const savingsPct = Math.round((1 - cheapestCost / costUSD) * 100);
64
+ console.log(
65
+ `[tokenwatch] Suggestion: ${cheapestModel} could handle this for ~$${cheapestCost.toFixed(4)} (${savingsPct}% cheaper than ${model})`
66
+ );
30
67
  }
31
68
 
32
69
  // src/core/storage.ts
@@ -74,16 +111,18 @@ var SqliteStorage = class {
74
111
  migrate() {
75
112
  this.db.exec(`
76
113
  CREATE TABLE IF NOT EXISTS usage (
77
- id INTEGER PRIMARY KEY AUTOINCREMENT,
78
- model TEXT NOT NULL,
79
- input_tokens INTEGER NOT NULL,
80
- output_tokens INTEGER NOT NULL,
81
- reasoning_tokens INTEGER NOT NULL DEFAULT 0,
82
- cost_usd REAL NOT NULL,
83
- session_id TEXT,
84
- user_id TEXT,
85
- feature TEXT,
86
- timestamp TEXT NOT NULL
114
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
115
+ model TEXT NOT NULL,
116
+ input_tokens INTEGER NOT NULL,
117
+ output_tokens INTEGER NOT NULL,
118
+ reasoning_tokens INTEGER NOT NULL DEFAULT 0,
119
+ cached_tokens INTEGER NOT NULL DEFAULT 0,
120
+ cache_creation_tokens INTEGER NOT NULL DEFAULT 0,
121
+ cost_usd REAL NOT NULL,
122
+ session_id TEXT,
123
+ user_id TEXT,
124
+ feature TEXT,
125
+ timestamp TEXT NOT NULL
87
126
  )
88
127
  `);
89
128
  const cols = this.db.prepare(`PRAGMA table_info(usage)`).all().map((c) => c.name);
@@ -93,17 +132,26 @@ var SqliteStorage = class {
93
132
  if (!cols.includes("feature")) {
94
133
  this.db.exec(`ALTER TABLE usage ADD COLUMN feature TEXT`);
95
134
  }
135
+ if (!cols.includes("cached_tokens")) {
136
+ this.db.exec(`ALTER TABLE usage ADD COLUMN cached_tokens INTEGER NOT NULL DEFAULT 0`);
137
+ }
138
+ if (!cols.includes("cache_creation_tokens")) {
139
+ this.db.exec(`ALTER TABLE usage ADD COLUMN cache_creation_tokens INTEGER NOT NULL DEFAULT 0`);
140
+ }
96
141
  }
97
142
  record(entry) {
98
143
  this.db.prepare(
99
144
  `INSERT INTO usage
100
- (model, input_tokens, output_tokens, reasoning_tokens, cost_usd, session_id, user_id, feature, timestamp)
101
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`
145
+ (model, input_tokens, output_tokens, reasoning_tokens, cached_tokens, cache_creation_tokens,
146
+ cost_usd, session_id, user_id, feature, timestamp)
147
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
102
148
  ).run(
103
149
  entry.model,
104
150
  entry.inputTokens,
105
151
  entry.outputTokens,
106
152
  entry.reasoningTokens ?? 0,
153
+ entry.cachedTokens ?? 0,
154
+ entry.cacheCreationTokens ?? 0,
107
155
  entry.costUSD,
108
156
  entry.sessionId ?? null,
109
157
  entry.userId ?? null,
@@ -118,6 +166,8 @@ var SqliteStorage = class {
118
166
  inputTokens: r.input_tokens,
119
167
  outputTokens: r.output_tokens,
120
168
  ...r.reasoning_tokens > 0 && { reasoningTokens: r.reasoning_tokens },
169
+ ...r.cached_tokens > 0 && { cachedTokens: r.cached_tokens },
170
+ ...r.cache_creation_tokens > 0 && { cacheCreationTokens: r.cache_creation_tokens },
121
171
  costUSD: r.cost_usd,
122
172
  ...r.session_id != null && { sessionId: r.session_id },
123
173
  ...r.user_id != null && { userId: r.user_id },
@@ -153,7 +203,7 @@ async function fetchRemotePrices(url = REMOTE_URL) {
153
203
  const data = await res.json();
154
204
  if (!data?.models) return null;
155
205
  await persistCache(data);
156
- return data.models;
206
+ return { models: data.models, updated_at: data.updated_at ?? "" };
157
207
  } catch {
158
208
  return null;
159
209
  }
@@ -165,7 +215,8 @@ async function loadCachedPrices() {
165
215
  const data = JSON.parse(raw);
166
216
  const age = Date.now() - (data._cachedAt ?? 0);
167
217
  if (age > CACHE_TTL_MS) return null;
168
- return data.models ?? null;
218
+ if (!data.models) return null;
219
+ return { models: data.models, updated_at: data.updated_at ?? "" };
169
220
  } catch {
170
221
  return null;
171
222
  }
@@ -186,87 +237,110 @@ async function getRemotePrices() {
186
237
 
187
238
  // prices.json
188
239
  var prices_default = {
189
- updated_at: "2026-04-21",
240
+ updated_at: "2026-04-22",
190
241
  source: "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json",
191
242
  models: {
192
243
  "gpt-4o": {
193
244
  input: 2.5,
194
245
  output: 10,
246
+ cachedInput: 1.25,
195
247
  maxInputTokens: 128e3
196
248
  },
197
249
  "gpt-4o-mini": {
198
250
  input: 0.15,
199
251
  output: 0.6,
252
+ cachedInput: 0.075,
200
253
  maxInputTokens: 128e3
201
254
  },
202
255
  "gpt-5": {
203
256
  input: 1.25,
204
257
  output: 10,
258
+ cachedInput: 0.125,
205
259
  maxInputTokens: 272e3
206
260
  },
207
261
  "gpt-5-mini": {
208
262
  input: 0.25,
209
263
  output: 2,
264
+ cachedInput: 0.025,
210
265
  maxInputTokens: 272e3
211
266
  },
212
267
  "gpt-5-nano": {
213
268
  input: 0.05,
214
269
  output: 0.4,
270
+ cachedInput: 5e-3,
215
271
  maxInputTokens: 272e3
216
272
  },
217
273
  "claude-opus-4-6": {
218
274
  input: 5,
219
275
  output: 25,
276
+ cachedInput: 0.5,
277
+ cacheCreationInput: 6.25,
220
278
  maxInputTokens: 1e6
221
279
  },
222
280
  "claude-sonnet-4-6": {
223
281
  input: 3,
224
282
  output: 15,
283
+ cachedInput: 0.3,
284
+ cacheCreationInput: 3.75,
225
285
  maxInputTokens: 1e6
226
286
  },
227
287
  "claude-haiku-4-5": {
228
288
  input: 1,
229
289
  output: 5,
290
+ cachedInput: 0.1,
291
+ cacheCreationInput: 1.25,
230
292
  maxInputTokens: 2e5
231
293
  },
232
294
  "gemini-2.5-pro": {
233
295
  input: 1.25,
234
296
  output: 10,
297
+ cachedInput: 0.125,
235
298
  maxInputTokens: 1048576
236
299
  },
237
300
  "gemini-2.5-flash": {
238
301
  input: 0.3,
239
302
  output: 2.5,
303
+ cachedInput: 0.03,
240
304
  maxInputTokens: 1048576
241
305
  },
242
306
  "deepseek-chat": {
243
307
  input: 0.28,
244
308
  output: 0.42,
309
+ cachedInput: 0.028,
245
310
  maxInputTokens: 131072
246
311
  },
247
312
  "deepseek-reasoner": {
248
313
  input: 0.28,
249
314
  output: 0.42,
315
+ cachedInput: 0.028,
250
316
  maxInputTokens: 131072
251
317
  },
252
318
  "claude-opus-4-5": {
253
319
  input: 5,
254
320
  output: 25,
321
+ cachedInput: 0.5,
322
+ cacheCreationInput: 6.25,
255
323
  maxInputTokens: 2e5
256
324
  },
257
325
  "claude-opus-4-7": {
258
326
  input: 5,
259
327
  output: 25,
328
+ cachedInput: 0.5,
329
+ cacheCreationInput: 6.25,
260
330
  maxInputTokens: 1e6
261
331
  },
262
332
  "claude-opus-4-1": {
263
333
  input: 15,
264
334
  output: 75,
335
+ cachedInput: 1.5,
336
+ cacheCreationInput: 18.75,
265
337
  maxInputTokens: 2e5
266
338
  },
267
339
  "claude-sonnet-4-5": {
268
340
  input: 3,
269
341
  output: 15,
342
+ cachedInput: 0.3,
343
+ cacheCreationInput: 3.75,
270
344
  maxInputTokens: 2e5
271
345
  },
272
346
  "gpt-oss-120b": {
@@ -357,36 +431,43 @@ var prices_default = {
357
431
  "gpt-4.1": {
358
432
  input: 2,
359
433
  output: 8,
434
+ cachedInput: 0.5,
360
435
  maxInputTokens: 1047576
361
436
  },
362
437
  "gpt-4.1-2025-04-14": {
363
438
  input: 2,
364
439
  output: 8,
440
+ cachedInput: 0.5,
365
441
  maxInputTokens: 1047576
366
442
  },
367
443
  "gpt-4.1-mini": {
368
444
  input: 0.4,
369
445
  output: 1.6,
446
+ cachedInput: 0.1,
370
447
  maxInputTokens: 1047576
371
448
  },
372
449
  "gpt-4.1-mini-2025-04-14": {
373
450
  input: 0.4,
374
451
  output: 1.6,
452
+ cachedInput: 0.1,
375
453
  maxInputTokens: 1047576
376
454
  },
377
455
  "gpt-4.1-nano": {
378
456
  input: 0.1,
379
457
  output: 0.4,
458
+ cachedInput: 0.025,
380
459
  maxInputTokens: 1047576
381
460
  },
382
461
  "gpt-4.1-nano-2025-04-14": {
383
462
  input: 0.1,
384
463
  output: 0.4,
464
+ cachedInput: 0.025,
385
465
  maxInputTokens: 1047576
386
466
  },
387
467
  "gpt-4.5-preview": {
388
468
  input: 75,
389
469
  output: 150,
470
+ cachedInput: 37.5,
390
471
  maxInputTokens: 128e3
391
472
  },
392
473
  "gpt-4o-2024-05-13": {
@@ -397,11 +478,13 @@ var prices_default = {
397
478
  "gpt-4o-2024-08-06": {
398
479
  input: 2.5,
399
480
  output: 10,
481
+ cachedInput: 1.25,
400
482
  maxInputTokens: 128e3
401
483
  },
402
484
  "gpt-4o-2024-11-20": {
403
485
  input: 2.5,
404
486
  output: 10,
487
+ cachedInput: 1.25,
405
488
  maxInputTokens: 128e3
406
489
  },
407
490
  "gpt-audio-2025-08-28": {
@@ -427,6 +510,7 @@ var prices_default = {
427
510
  "gpt-4o-mini-2024-07-18": {
428
511
  input: 0.15,
429
512
  output: 0.6,
513
+ cachedInput: 0.075,
430
514
  maxInputTokens: 128e3
431
515
  },
432
516
  "gpt-4o-mini-audio-preview-2024-12-17": {
@@ -437,21 +521,25 @@ var prices_default = {
437
521
  "gpt-4o-mini-realtime-preview-2024-12-17": {
438
522
  input: 0.6,
439
523
  output: 2.4,
524
+ cachedInput: 0.3,
440
525
  maxInputTokens: 128e3
441
526
  },
442
527
  "gpt-realtime-2025-08-28": {
443
528
  input: 4,
444
529
  output: 16,
530
+ cachedInput: 0.4,
445
531
  maxInputTokens: 32e3
446
532
  },
447
533
  "gpt-realtime-1.5-2026-02-23": {
448
534
  input: 4,
449
535
  output: 16,
536
+ cachedInput: 4,
450
537
  maxInputTokens: 32e3
451
538
  },
452
539
  "gpt-realtime-mini-2025-10-06": {
453
540
  input: 0.6,
454
541
  output: 2.4,
542
+ cachedInput: 0.06,
455
543
  maxInputTokens: 128e3
456
544
  },
457
545
  "gpt-4o-mini-transcribe": {
@@ -462,11 +550,13 @@ var prices_default = {
462
550
  "gpt-4o-realtime-preview-2024-10-01": {
463
551
  input: 5,
464
552
  output: 20,
553
+ cachedInput: 2.5,
465
554
  maxInputTokens: 128e3
466
555
  },
467
556
  "gpt-4o-realtime-preview-2024-12-17": {
468
557
  input: 5,
469
558
  output: 20,
559
+ cachedInput: 2.5,
470
560
  maxInputTokens: 128e3
471
561
  },
472
562
  "gpt-4o-transcribe": {
@@ -482,51 +572,61 @@ var prices_default = {
482
572
  "gpt-5.1-2025-11-13": {
483
573
  input: 1.25,
484
574
  output: 10,
575
+ cachedInput: 0.125,
485
576
  maxInputTokens: 272e3
486
577
  },
487
578
  "gpt-5.1-chat-2025-11-13": {
488
579
  input: 1.25,
489
580
  output: 10,
581
+ cachedInput: 0.125,
490
582
  maxInputTokens: 128e3
491
583
  },
492
584
  "gpt-5.1-codex-2025-11-13": {
493
585
  input: 1.25,
494
586
  output: 10,
587
+ cachedInput: 0.125,
495
588
  maxInputTokens: 272e3
496
589
  },
497
590
  "gpt-5.1-codex-mini-2025-11-13": {
498
591
  input: 0.25,
499
592
  output: 2,
593
+ cachedInput: 0.025,
500
594
  maxInputTokens: 272e3
501
595
  },
502
596
  "gpt-5-2025-08-07": {
503
597
  input: 1.25,
504
598
  output: 10,
599
+ cachedInput: 0.125,
505
600
  maxInputTokens: 272e3
506
601
  },
507
602
  "gpt-5-chat": {
508
603
  input: 1.25,
509
604
  output: 10,
605
+ cachedInput: 0.125,
510
606
  maxInputTokens: 128e3
511
607
  },
512
608
  "gpt-5-chat-latest": {
513
609
  input: 1.25,
514
610
  output: 10,
611
+ cachedInput: 0.125,
515
612
  maxInputTokens: 128e3
516
613
  },
517
614
  "gpt-5-codex": {
518
615
  input: 1.25,
519
616
  output: 10,
617
+ cachedInput: 0.125,
520
618
  maxInputTokens: 272e3
521
619
  },
522
620
  "gpt-5-mini-2025-08-07": {
523
621
  input: 0.25,
524
622
  output: 2,
623
+ cachedInput: 0.025,
525
624
  maxInputTokens: 272e3
526
625
  },
527
626
  "gpt-5-nano-2025-08-07": {
528
627
  input: 0.05,
529
628
  output: 0.4,
629
+ cachedInput: 5e-3,
530
630
  maxInputTokens: 272e3
531
631
  },
532
632
  "gpt-5-pro": {
@@ -537,61 +637,73 @@ var prices_default = {
537
637
  "gpt-5.1": {
538
638
  input: 1.25,
539
639
  output: 10,
640
+ cachedInput: 0.125,
540
641
  maxInputTokens: 272e3
541
642
  },
542
643
  "gpt-5.1-chat": {
543
644
  input: 1.25,
544
645
  output: 10,
646
+ cachedInput: 0.125,
545
647
  maxInputTokens: 128e3
546
648
  },
547
649
  "gpt-5.1-codex": {
548
650
  input: 1.25,
549
651
  output: 10,
652
+ cachedInput: 0.125,
550
653
  maxInputTokens: 272e3
551
654
  },
552
655
  "gpt-5.1-codex-max": {
553
656
  input: 1.25,
554
657
  output: 10,
658
+ cachedInput: 0.125,
555
659
  maxInputTokens: 272e3
556
660
  },
557
661
  "gpt-5.1-codex-mini": {
558
662
  input: 0.25,
559
663
  output: 2,
664
+ cachedInput: 0.025,
560
665
  maxInputTokens: 272e3
561
666
  },
562
667
  "gpt-5.2": {
563
668
  input: 1.75,
564
669
  output: 14,
670
+ cachedInput: 0.175,
565
671
  maxInputTokens: 272e3
566
672
  },
567
673
  "gpt-5.2-2025-12-11": {
568
674
  input: 1.75,
569
675
  output: 14,
676
+ cachedInput: 0.175,
570
677
  maxInputTokens: 272e3
571
678
  },
572
679
  "gpt-5.2-chat": {
573
680
  input: 1.75,
574
681
  output: 14,
682
+ cachedInput: 0.175,
575
683
  maxInputTokens: 128e3
576
684
  },
577
685
  "gpt-5.2-chat-2025-12-11": {
578
686
  input: 1.75,
579
687
  output: 14,
688
+ cachedInput: 0.175,
580
689
  maxInputTokens: 128e3
581
690
  },
582
691
  "gpt-5.2-codex": {
583
692
  input: 1.75,
584
693
  output: 14,
694
+ cachedInput: 0.175,
585
695
  maxInputTokens: 272e3
586
696
  },
587
697
  "gpt-5.3-chat": {
588
698
  input: 1.75,
589
699
  output: 14,
700
+ cachedInput: 0.175,
590
701
  maxInputTokens: 128e3
591
702
  },
592
703
  "gpt-5.3-codex": {
593
704
  input: 1.75,
594
705
  output: 14,
706
+ cachedInput: 0.175,
595
707
  maxInputTokens: 272e3
596
708
  },
597
709
  "gpt-5.2-pro": {
@@ -607,71 +719,85 @@ var prices_default = {
607
719
  "gpt-5.4": {
608
720
  input: 2.5,
609
721
  output: 15,
722
+ cachedInput: 0.25,
610
723
  maxInputTokens: 105e4
611
724
  },
612
725
  "gpt-5.4-2026-03-05": {
613
726
  input: 2.5,
614
727
  output: 15,
728
+ cachedInput: 0.25,
615
729
  maxInputTokens: 105e4
616
730
  },
617
731
  "gpt-5.4-pro": {
618
732
  input: 30,
619
733
  output: 180,
734
+ cachedInput: 3,
620
735
  maxInputTokens: 105e4
621
736
  },
622
737
  "gpt-5.4-pro-2026-03-05": {
623
738
  input: 30,
624
739
  output: 180,
740
+ cachedInput: 3,
625
741
  maxInputTokens: 105e4
626
742
  },
627
743
  "gpt-5.4-mini": {
628
744
  input: 0.75,
629
745
  output: 4.5,
746
+ cachedInput: 0.075,
630
747
  maxInputTokens: 272e3
631
748
  },
632
749
  "gpt-5.4-nano": {
633
750
  input: 0.2,
634
751
  output: 1.25,
752
+ cachedInput: 0.02,
635
753
  maxInputTokens: 272e3
636
754
  },
637
755
  "o1-2024-12-17": {
638
756
  input: 15,
639
757
  output: 60,
758
+ cachedInput: 7.5,
640
759
  maxInputTokens: 2e5
641
760
  },
642
761
  "o1-mini": {
643
762
  input: 1.21,
644
763
  output: 4.84,
764
+ cachedInput: 0.605,
645
765
  maxInputTokens: 128e3
646
766
  },
647
767
  "o1-mini-2024-09-12": {
648
768
  input: 1.1,
649
769
  output: 4.4,
770
+ cachedInput: 0.55,
650
771
  maxInputTokens: 128e3
651
772
  },
652
773
  "o1-preview": {
653
774
  input: 15,
654
775
  output: 60,
776
+ cachedInput: 7.5,
655
777
  maxInputTokens: 128e3
656
778
  },
657
779
  "o1-preview-2024-09-12": {
658
780
  input: 15,
659
781
  output: 60,
782
+ cachedInput: 7.5,
660
783
  maxInputTokens: 128e3
661
784
  },
662
785
  "o3-2025-04-16": {
663
786
  input: 2,
664
787
  output: 8,
788
+ cachedInput: 0.5,
665
789
  maxInputTokens: 2e5
666
790
  },
667
791
  "o3-mini": {
668
792
  input: 1.1,
669
793
  output: 4.4,
794
+ cachedInput: 0.55,
670
795
  maxInputTokens: 2e5
671
796
  },
672
797
  "o3-mini-2025-01-31": {
673
798
  input: 1.1,
674
799
  output: 4.4,
800
+ cachedInput: 0.55,
675
801
  maxInputTokens: 2e5
676
802
  },
677
803
  "o3-pro": {
@@ -687,11 +813,13 @@ var prices_default = {
687
813
  "o4-mini": {
688
814
  input: 1.1,
689
815
  output: 4.4,
816
+ cachedInput: 0.275,
690
817
  maxInputTokens: 2e5
691
818
  },
692
819
  "o4-mini-2025-04-16": {
693
820
  input: 1.1,
694
821
  output: 4.4,
822
+ cachedInput: 0.275,
695
823
  maxInputTokens: 2e5
696
824
  },
697
825
  "deepseek-v3.2": {
@@ -712,6 +840,7 @@ var prices_default = {
712
840
  "deepseek-v3": {
713
841
  input: 0.27,
714
842
  output: 1.1,
843
+ cachedInput: 0.07,
715
844
  maxInputTokens: 65536
716
845
  },
717
846
  "deepseek-v3-0324": {
@@ -727,76 +856,105 @@ var prices_default = {
727
856
  "claude-haiku-4-5-20251001": {
728
857
  input: 1,
729
858
  output: 5,
859
+ cachedInput: 0.1,
860
+ cacheCreationInput: 1.25,
730
861
  maxInputTokens: 2e5
731
862
  },
732
863
  "claude-3-7-sonnet-20250219": {
733
864
  input: 3,
734
865
  output: 15,
866
+ cachedInput: 0.3,
867
+ cacheCreationInput: 3.75,
735
868
  maxInputTokens: 2e5
736
869
  },
737
870
  "claude-3-haiku-20240307": {
738
871
  input: 0.25,
739
872
  output: 1.25,
873
+ cachedInput: 0.03,
874
+ cacheCreationInput: 0.3,
740
875
  maxInputTokens: 2e5
741
876
  },
742
877
  "claude-3-opus-20240229": {
743
878
  input: 15,
744
879
  output: 75,
880
+ cachedInput: 1.5,
881
+ cacheCreationInput: 18.75,
745
882
  maxInputTokens: 2e5
746
883
  },
747
884
  "claude-4-opus-20250514": {
748
885
  input: 15,
749
886
  output: 75,
887
+ cachedInput: 1.5,
888
+ cacheCreationInput: 18.75,
750
889
  maxInputTokens: 2e5
751
890
  },
752
891
  "claude-4-sonnet-20250514": {
753
892
  input: 3,
754
893
  output: 15,
894
+ cachedInput: 0.3,
895
+ cacheCreationInput: 3.75,
755
896
  maxInputTokens: 1e6
756
897
  },
757
898
  "claude-sonnet-4-5-20250929": {
758
899
  input: 3,
759
900
  output: 15,
901
+ cachedInput: 0.3,
902
+ cacheCreationInput: 3.75,
760
903
  maxInputTokens: 2e5
761
904
  },
762
905
  "claude-sonnet-4-5-20250929-v1:0": {
763
906
  input: 3,
764
907
  output: 15,
908
+ cachedInput: 0.3,
909
+ cacheCreationInput: 3.75,
765
910
  maxInputTokens: 2e5
766
911
  },
767
912
  "claude-opus-4-1-20250805": {
768
913
  input: 15,
769
914
  output: 75,
915
+ cachedInput: 1.5,
916
+ cacheCreationInput: 18.75,
770
917
  maxInputTokens: 2e5
771
918
  },
772
919
  "claude-opus-4-20250514": {
773
920
  input: 15,
774
921
  output: 75,
922
+ cachedInput: 1.5,
923
+ cacheCreationInput: 18.75,
775
924
  maxInputTokens: 2e5
776
925
  },
777
926
  "claude-opus-4-5-20251101": {
778
927
  input: 5,
779
928
  output: 25,
929
+ cachedInput: 0.5,
930
+ cacheCreationInput: 6.25,
780
931
  maxInputTokens: 2e5
781
932
  },
782
933
  "claude-opus-4-6-20260205": {
783
934
  input: 5,
784
935
  output: 25,
936
+ cachedInput: 0.5,
937
+ cacheCreationInput: 6.25,
785
938
  maxInputTokens: 1e6
786
939
  },
787
940
  "claude-opus-4-7-20260416": {
788
941
  input: 5,
789
942
  output: 25,
943
+ cachedInput: 0.5,
944
+ cacheCreationInput: 6.25,
790
945
  maxInputTokens: 1e6
791
946
  },
792
947
  "claude-sonnet-4-20250514": {
793
948
  input: 3,
794
949
  output: 15,
950
+ cachedInput: 0.3,
951
+ cacheCreationInput: 3.75,
795
952
  maxInputTokens: 1e6
796
953
  },
797
954
  "codex-mini-latest": {
798
955
  input: 1.5,
799
956
  output: 6,
957
+ cachedInput: 0.375,
800
958
  maxInputTokens: 2e5
801
959
  },
802
960
  "deepseek-ai/deepseek-r1": {
@@ -846,6 +1004,7 @@ var prices_default = {
846
1004
  "deepseek-ai/deepseek-v3.1-terminus": {
847
1005
  input: 0.27,
848
1006
  output: 1,
1007
+ cachedInput: 0.216,
849
1008
  maxInputTokens: 163840
850
1009
  },
851
1010
  "deepseek-coder": {
@@ -856,26 +1015,31 @@ var prices_default = {
856
1015
  "gemini-2.0-flash": {
857
1016
  input: 0.1,
858
1017
  output: 0.4,
1018
+ cachedInput: 0.025,
859
1019
  maxInputTokens: 1048576
860
1020
  },
861
1021
  "gemini-2.0-flash-001": {
862
1022
  input: 0.1,
863
1023
  output: 0.4,
1024
+ cachedInput: 0.025,
864
1025
  maxInputTokens: 1048576
865
1026
  },
866
1027
  "gemini-2.0-flash-lite": {
867
1028
  input: 0.075,
868
1029
  output: 0.3,
1030
+ cachedInput: 0.01875,
869
1031
  maxInputTokens: 1048576
870
1032
  },
871
1033
  "gemini-2.0-flash-lite-001": {
872
1034
  input: 0.075,
873
1035
  output: 0.3,
1036
+ cachedInput: 0.01875,
874
1037
  maxInputTokens: 1048576
875
1038
  },
876
1039
  "gemini-2.5-flash-image": {
877
1040
  input: 0.3,
878
1041
  output: 2.5,
1042
+ cachedInput: 0.03,
879
1043
  maxInputTokens: 32768
880
1044
  },
881
1045
  "gemini-3-pro-image-preview": {
@@ -891,51 +1055,61 @@ var prices_default = {
891
1055
  "gemini-3.1-flash-lite-preview": {
892
1056
  input: 0.25,
893
1057
  output: 1.5,
1058
+ cachedInput: 0.025,
894
1059
  maxInputTokens: 1048576
895
1060
  },
896
1061
  "gemini-2.5-flash-lite": {
897
1062
  input: 0.1,
898
1063
  output: 0.4,
1064
+ cachedInput: 0.01,
899
1065
  maxInputTokens: 1048576
900
1066
  },
901
1067
  "gemini-2.5-flash-lite-preview-09-2025": {
902
1068
  input: 0.1,
903
1069
  output: 0.4,
1070
+ cachedInput: 0.01,
904
1071
  maxInputTokens: 1048576
905
1072
  },
906
1073
  "gemini-2.5-flash-preview-09-2025": {
907
1074
  input: 0.3,
908
1075
  output: 2.5,
1076
+ cachedInput: 0.075,
909
1077
  maxInputTokens: 1048576
910
1078
  },
911
1079
  "gemini-live-2.5-flash-preview-native-audio-09-2025": {
912
1080
  input: 0.3,
913
1081
  output: 2,
1082
+ cachedInput: 0.075,
914
1083
  maxInputTokens: 1048576
915
1084
  },
916
1085
  "gemini-2.5-flash-lite-preview-06-17": {
917
1086
  input: 0.1,
918
1087
  output: 0.4,
1088
+ cachedInput: 0.025,
919
1089
  maxInputTokens: 1048576
920
1090
  },
921
1091
  "gemini-3-pro-preview": {
922
1092
  input: 2,
923
1093
  output: 12,
1094
+ cachedInput: 0.2,
924
1095
  maxInputTokens: 1048576
925
1096
  },
926
1097
  "gemini-3.1-pro-preview": {
927
1098
  input: 2,
928
1099
  output: 12,
1100
+ cachedInput: 0.2,
929
1101
  maxInputTokens: 1048576
930
1102
  },
931
1103
  "gemini-3.1-pro-preview-customtools": {
932
1104
  input: 2,
933
1105
  output: 12,
1106
+ cachedInput: 0.2,
934
1107
  maxInputTokens: 1048576
935
1108
  },
936
1109
  "gemini-3-flash-preview": {
937
1110
  input: 0.5,
938
1111
  output: 3,
1112
+ cachedInput: 0.05,
939
1113
  maxInputTokens: 1048576
940
1114
  },
941
1115
  "gemini-robotics-er-1.5-preview": {
@@ -951,11 +1125,13 @@ var prices_default = {
951
1125
  "gemini-flash-latest": {
952
1126
  input: 0.3,
953
1127
  output: 2.5,
1128
+ cachedInput: 0.03,
954
1129
  maxInputTokens: 1048576
955
1130
  },
956
1131
  "gemini-flash-lite-latest": {
957
1132
  input: 0.1,
958
1133
  output: 0.4,
1134
+ cachedInput: 0.01,
959
1135
  maxInputTokens: 1048576
960
1136
  },
961
1137
  "gemini-gemma-2-27b-it": {
@@ -1031,39 +1207,47 @@ var prices_default = {
1031
1207
  "gpt-4o-mini-realtime-preview": {
1032
1208
  input: 0.6,
1033
1209
  output: 2.4,
1210
+ cachedInput: 0.3,
1034
1211
  maxInputTokens: 128e3
1035
1212
  },
1036
1213
  "gpt-4o-realtime-preview": {
1037
1214
  input: 5,
1038
1215
  output: 20,
1216
+ cachedInput: 2.5,
1039
1217
  maxInputTokens: 128e3
1040
1218
  },
1041
1219
  "gpt-4o-realtime-preview-2025-06-03": {
1042
1220
  input: 5,
1043
1221
  output: 20,
1222
+ cachedInput: 2.5,
1044
1223
  maxInputTokens: 128e3
1045
1224
  },
1046
1225
  "gpt-image-1.5": {
1047
1226
  input: 5,
1048
- output: 10
1227
+ output: 10,
1228
+ cachedInput: 1.25
1049
1229
  },
1050
1230
  "gpt-image-1.5-2025-12-16": {
1051
1231
  input: 5,
1052
- output: 10
1232
+ output: 10,
1233
+ cachedInput: 1.25
1053
1234
  },
1054
1235
  "gpt-5.1-chat-latest": {
1055
1236
  input: 1.25,
1056
1237
  output: 10,
1238
+ cachedInput: 0.125,
1057
1239
  maxInputTokens: 128e3
1058
1240
  },
1059
1241
  "gpt-5.2-chat-latest": {
1060
1242
  input: 1.75,
1061
1243
  output: 14,
1244
+ cachedInput: 0.175,
1062
1245
  maxInputTokens: 128e3
1063
1246
  },
1064
1247
  "gpt-5.3-chat-latest": {
1065
1248
  input: 1.75,
1066
1249
  output: 14,
1250
+ cachedInput: 0.175,
1067
1251
  maxInputTokens: 128e3
1068
1252
  },
1069
1253
  "gpt-5-pro-2025-10-06": {
@@ -1074,11 +1258,13 @@ var prices_default = {
1074
1258
  "gpt-realtime": {
1075
1259
  input: 4,
1076
1260
  output: 16,
1261
+ cachedInput: 0.4,
1077
1262
  maxInputTokens: 32e3
1078
1263
  },
1079
1264
  "gpt-realtime-1.5": {
1080
1265
  input: 4,
1081
1266
  output: 16,
1267
+ cachedInput: 0.4,
1082
1268
  maxInputTokens: 32e3
1083
1269
  },
1084
1270
  "gpt-realtime-mini": {
@@ -1125,6 +1311,7 @@ var prices_default = {
1125
1311
  o1: {
1126
1312
  input: 15,
1127
1313
  output: 60,
1314
+ cachedInput: 7.5,
1128
1315
  maxInputTokens: 2e5
1129
1316
  },
1130
1317
  "o1-pro": {
@@ -1140,6 +1327,7 @@ var prices_default = {
1140
1327
  o3: {
1141
1328
  input: 2,
1142
1329
  output: 8,
1330
+ cachedInput: 0.5,
1143
1331
  maxInputTokens: 2e5
1144
1332
  },
1145
1333
  "gpt-oss-20b": {
@@ -1164,6 +1352,8 @@ var prices_default = {
1164
1352
  "claude-haiku-4-5@20251001": {
1165
1353
  input: 1,
1166
1354
  output: 5,
1355
+ cachedInput: 0.1,
1356
+ cacheCreationInput: 1.25,
1167
1357
  maxInputTokens: 2e5
1168
1358
  },
1169
1359
  "claude-3-5-sonnet": {
@@ -1179,6 +1369,8 @@ var prices_default = {
1179
1369
  "claude-3-7-sonnet@20250219": {
1180
1370
  input: 3,
1181
1371
  output: 15,
1372
+ cachedInput: 0.3,
1373
+ cacheCreationInput: 3.75,
1182
1374
  maxInputTokens: 2e5
1183
1375
  },
1184
1376
  "claude-3-haiku": {
@@ -1214,46 +1406,64 @@ var prices_default = {
1214
1406
  "claude-opus-4": {
1215
1407
  input: 15,
1216
1408
  output: 75,
1409
+ cachedInput: 1.5,
1410
+ cacheCreationInput: 18.75,
1217
1411
  maxInputTokens: 2e5
1218
1412
  },
1219
1413
  "claude-opus-4-1@20250805": {
1220
1414
  input: 15,
1221
1415
  output: 75,
1416
+ cachedInput: 1.5,
1417
+ cacheCreationInput: 18.75,
1222
1418
  maxInputTokens: 2e5
1223
1419
  },
1224
1420
  "claude-opus-4-5@20251101": {
1225
1421
  input: 5,
1226
1422
  output: 25,
1423
+ cachedInput: 0.5,
1424
+ cacheCreationInput: 6.25,
1227
1425
  maxInputTokens: 2e5
1228
1426
  },
1229
1427
  "claude-opus-4-6@default": {
1230
1428
  input: 5,
1231
1429
  output: 25,
1430
+ cachedInput: 0.5,
1431
+ cacheCreationInput: 6.25,
1232
1432
  maxInputTokens: 1e6
1233
1433
  },
1234
1434
  "claude-opus-4-7@default": {
1235
1435
  input: 5,
1236
1436
  output: 25,
1437
+ cachedInput: 0.5,
1438
+ cacheCreationInput: 6.25,
1237
1439
  maxInputTokens: 1e6
1238
1440
  },
1239
1441
  "claude-sonnet-4-5@20250929": {
1240
1442
  input: 3,
1241
1443
  output: 15,
1444
+ cachedInput: 0.3,
1445
+ cacheCreationInput: 3.75,
1242
1446
  maxInputTokens: 2e5
1243
1447
  },
1244
1448
  "claude-opus-4@20250514": {
1245
1449
  input: 15,
1246
1450
  output: 75,
1451
+ cachedInput: 1.5,
1452
+ cacheCreationInput: 18.75,
1247
1453
  maxInputTokens: 2e5
1248
1454
  },
1249
1455
  "claude-sonnet-4": {
1250
1456
  input: 3,
1251
1457
  output: 15,
1458
+ cachedInput: 0.3,
1459
+ cacheCreationInput: 3.75,
1252
1460
  maxInputTokens: 1e6
1253
1461
  },
1254
1462
  "claude-sonnet-4@20250514": {
1255
1463
  input: 3,
1256
1464
  output: 15,
1465
+ cachedInput: 0.3,
1466
+ cacheCreationInput: 3.75,
1257
1467
  maxInputTokens: 1e6
1258
1468
  },
1259
1469
  "deepseek-ai/deepseek-v3.1-maas": {
@@ -1303,6 +1513,7 @@ var prices_default = {
1303
1513
  "gpt-realtime-mini-2025-12-15": {
1304
1514
  input: 0.6,
1305
1515
  output: 2.4,
1516
+ cachedInput: 0.06,
1306
1517
  maxInputTokens: 128e3
1307
1518
  },
1308
1519
  "gemini-2.5-flash-native-audio-latest": {
@@ -1328,16 +1539,20 @@ var prices_default = {
1328
1539
  "gemini-pro-latest": {
1329
1540
  input: 1.25,
1330
1541
  output: 10,
1542
+ cachedInput: 0.125,
1331
1543
  maxInputTokens: 1048576
1332
1544
  },
1333
1545
  "gemini-exp-1206": {
1334
1546
  input: 0.3,
1335
1547
  output: 2.5,
1548
+ cachedInput: 0.03,
1336
1549
  maxInputTokens: 1048576
1337
1550
  },
1338
1551
  "claude-sonnet-4-6@default": {
1339
1552
  input: 3,
1340
1553
  output: 15,
1554
+ cachedInput: 0.3,
1555
+ cacheCreationInput: 3.75,
1341
1556
  maxInputTokens: 1e6
1342
1557
  }
1343
1558
  }
@@ -1345,11 +1560,19 @@ var prices_default = {
1345
1560
 
1346
1561
  // src/core/tracker.ts
1347
1562
  var bundledPrices = prices_default.models;
1563
+ var bundledUpdatedAt = prices_default.updated_at ?? "";
1348
1564
  var ModelPriceSchema = z.object({
1349
1565
  input: z.number().nonnegative(),
1350
1566
  output: z.number().nonnegative(),
1567
+ cachedInput: z.number().nonnegative().optional(),
1568
+ cacheCreationInput: z.number().nonnegative().optional(),
1351
1569
  maxInputTokens: z.number().positive().optional()
1352
1570
  });
1571
+ var BudgetConfigSchema = z.object({
1572
+ threshold: z.number().positive(),
1573
+ webhookUrl: z.string().url(),
1574
+ mode: z.enum(["once", "always"]).optional().default("once")
1575
+ });
1353
1576
  var TrackerConfigSchema = z.object({
1354
1577
  storage: z.union([z.enum(["memory", "sqlite"]), z.custom((v) => {
1355
1578
  return v !== null && typeof v === "object" && typeof v.record === "function" && typeof v.getAll === "function" && typeof v.clearAll === "function" && typeof v.clearSession === "function";
@@ -1357,7 +1580,13 @@ var TrackerConfigSchema = z.object({
1357
1580
  alertThreshold: z.number().positive().optional(),
1358
1581
  webhookUrl: z.string().url().optional(),
1359
1582
  syncPrices: z.boolean().optional().default(true),
1360
- customPrices: z.record(z.string(), ModelPriceSchema).optional()
1583
+ customPrices: z.record(z.string(), ModelPriceSchema).optional(),
1584
+ warnIfStaleAfterHours: z.number().nonnegative().optional().default(72),
1585
+ budgets: z.object({
1586
+ perUser: BudgetConfigSchema.optional(),
1587
+ perSession: BudgetConfigSchema.optional()
1588
+ }).optional(),
1589
+ suggestions: z.boolean().optional().default(false)
1361
1590
  });
1362
1591
  function createTracker(config = {}) {
1363
1592
  const parsed = TrackerConfigSchema.safeParse(config);
@@ -1371,19 +1600,45 @@ ${issues}`);
1371
1600
  alertThreshold,
1372
1601
  webhookUrl,
1373
1602
  syncPrices,
1374
- customPrices
1603
+ customPrices,
1604
+ warnIfStaleAfterHours,
1605
+ budgets,
1606
+ suggestions
1375
1607
  } = parsed.data;
1376
1608
  const storage = typeof storageOption === "object" ? storageOption : createStorage(storageOption);
1377
1609
  let remotePrices;
1610
+ let pricesUpdatedAt = bundledUpdatedAt;
1378
1611
  if (syncPrices) {
1379
1612
  getRemotePrices().then((result) => {
1380
- if (result) remotePrices = result;
1613
+ if (result) {
1614
+ remotePrices = result.models;
1615
+ pricesUpdatedAt = result.updated_at;
1616
+ }
1381
1617
  }).catch(() => {
1382
1618
  });
1383
1619
  }
1620
+ let stalenessChecked = false;
1621
+ function maybeWarnStaleness() {
1622
+ if (stalenessChecked || !warnIfStaleAfterHours) return;
1623
+ stalenessChecked = true;
1624
+ if (!pricesUpdatedAt) return;
1625
+ try {
1626
+ const updatedMs = new Date(pricesUpdatedAt).getTime();
1627
+ const ageHours = (Date.now() - updatedMs) / (1e3 * 60 * 60);
1628
+ if (ageHours > warnIfStaleAfterHours) {
1629
+ console.warn(
1630
+ `[tokenwatch] Price data is ${Math.round(ageHours)}h old (updated_at: ${pricesUpdatedAt}). Run "tokenwatch sync" to refresh, or set warnIfStaleAfterHours: 0 to suppress.`
1631
+ );
1632
+ }
1633
+ } catch {
1634
+ }
1635
+ }
1384
1636
  let alertFired = false;
1637
+ const firedUserAlerts = /* @__PURE__ */ new Set();
1638
+ const firedSessionAlerts = /* @__PURE__ */ new Set();
1385
1639
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
1386
1640
  function resolveModelPrice(model) {
1641
+ maybeWarnStaleness();
1387
1642
  return resolvePrice(model, {
1388
1643
  bundledPrices,
1389
1644
  ...customPrices !== void 0 && { customPrices },
@@ -1392,39 +1647,94 @@ ${issues}`);
1392
1647
  }
1393
1648
  function track(entry) {
1394
1649
  const price = resolveModelPrice(entry.model);
1395
- const costUSD = calculateCost(entry.inputTokens, entry.outputTokens, price);
1650
+ const costUSD = calculateCost(
1651
+ entry.inputTokens,
1652
+ entry.outputTokens,
1653
+ price,
1654
+ entry.cachedTokens,
1655
+ entry.cacheCreationTokens
1656
+ );
1396
1657
  const full = {
1397
1658
  ...entry,
1398
1659
  costUSD,
1399
1660
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
1400
1661
  };
1401
1662
  storage.record(full);
1402
- maybeFireAlert();
1663
+ maybeFireAlerts(full);
1664
+ if (suggestions) {
1665
+ maybeSuggestCheaperModel(entry.model, costUSD, entry.inputTokens, entry.outputTokens, {
1666
+ bundledPrices,
1667
+ ...customPrices !== void 0 && { customPrices },
1668
+ ...remotePrices !== void 0 && { remotePrices }
1669
+ });
1670
+ }
1403
1671
  }
1404
- function maybeFireAlert() {
1405
- if (!alertThreshold || !webhookUrl || alertFired) return;
1406
- alertFired = true;
1407
- Promise.resolve(storage.getAll()).then((entries) => {
1408
- const total = computeTotal(entries);
1409
- if (total < alertThreshold) {
1410
- alertFired = false;
1411
- return;
1412
- }
1413
- const payload = {
1414
- text: `[tokenwatch] Alert: total cost reached $${total.toFixed(4)} USD (threshold: $${alertThreshold})`
1415
- };
1416
- fetch(webhookUrl, {
1417
- method: "POST",
1418
- headers: { "Content-Type": "application/json" },
1419
- body: JSON.stringify(payload)
1672
+ function maybeFireAlerts(entry) {
1673
+ if (alertThreshold && webhookUrl && !alertFired) {
1674
+ alertFired = true;
1675
+ Promise.resolve(storage.getAll()).then((entries) => {
1676
+ const total = computeTotal(entries);
1677
+ if (total < alertThreshold) {
1678
+ alertFired = false;
1679
+ return;
1680
+ }
1681
+ fireWebhook(webhookUrl, {
1682
+ text: `[tokenwatch] Alert: total cost reached $${total.toFixed(4)} USD (threshold: $${alertThreshold})`
1683
+ });
1420
1684
  }).catch(() => {
1685
+ alertFired = false;
1421
1686
  });
1687
+ }
1688
+ if (budgets?.perUser && entry.userId) {
1689
+ const cfg = budgets.perUser;
1690
+ const uid = entry.userId;
1691
+ if (cfg.mode === "always" || !firedUserAlerts.has(uid)) {
1692
+ if (cfg.mode !== "always") firedUserAlerts.add(uid);
1693
+ Promise.resolve(storage.getAll()).then((entries) => {
1694
+ const userCost = entries.filter((e) => e.userId === uid).reduce((s, e) => s + e.costUSD, 0);
1695
+ if (userCost >= cfg.threshold) {
1696
+ fireWebhook(cfg.webhookUrl, {
1697
+ text: `[tokenwatch] Budget alert: user "${uid}" reached $${userCost.toFixed(4)} USD (threshold: $${cfg.threshold})`
1698
+ });
1699
+ } else {
1700
+ if (cfg.mode !== "always") firedUserAlerts.delete(uid);
1701
+ }
1702
+ }).catch(() => {
1703
+ if (cfg.mode !== "always") firedUserAlerts.delete(uid);
1704
+ });
1705
+ }
1706
+ }
1707
+ if (budgets?.perSession && entry.sessionId) {
1708
+ const cfg = budgets.perSession;
1709
+ const sid = entry.sessionId;
1710
+ if (cfg.mode === "always" || !firedSessionAlerts.has(sid)) {
1711
+ if (cfg.mode !== "always") firedSessionAlerts.add(sid);
1712
+ Promise.resolve(storage.getAll()).then((entries) => {
1713
+ const sessionCost = entries.filter((e) => e.sessionId === sid).reduce((s, e) => s + e.costUSD, 0);
1714
+ if (sessionCost >= cfg.threshold) {
1715
+ fireWebhook(cfg.webhookUrl, {
1716
+ text: `[tokenwatch] Budget alert: session "${sid}" reached $${sessionCost.toFixed(4)} USD (threshold: $${cfg.threshold})`
1717
+ });
1718
+ } else {
1719
+ if (cfg.mode !== "always") firedSessionAlerts.delete(sid);
1720
+ }
1721
+ }).catch(() => {
1722
+ if (cfg.mode !== "always") firedSessionAlerts.delete(sid);
1723
+ });
1724
+ }
1725
+ }
1726
+ }
1727
+ function fireWebhook(url, payload) {
1728
+ fetch(url, {
1729
+ method: "POST",
1730
+ headers: { "Content-Type": "application/json" },
1731
+ body: JSON.stringify(payload)
1422
1732
  }).catch(() => {
1423
- alertFired = false;
1424
1733
  });
1425
1734
  }
1426
- async function getReport() {
1427
- const entries = await Promise.resolve(storage.getAll());
1735
+ async function getReport(options) {
1736
+ const allEntries = await Promise.resolve(storage.getAll());
1737
+ const entries = filterEntries(allEntries, options);
1428
1738
  const byModel = {};
1429
1739
  const bySession = {};
1430
1740
  const byUser = {};
@@ -1432,18 +1742,24 @@ ${issues}`);
1432
1742
  let totalInput = 0;
1433
1743
  let totalOutput = 0;
1434
1744
  let totalCost = 0;
1435
- let lastTimestamp = startedAt;
1745
+ let periodFrom = options ? entries[0]?.timestamp ?? startedAt : startedAt;
1746
+ let lastTimestamp = periodFrom;
1436
1747
  for (const e of entries) {
1437
- totalInput += e.inputTokens;
1748
+ totalInput += e.inputTokens + (e.cachedTokens ?? 0) + (e.cacheCreationTokens ?? 0);
1438
1749
  totalOutput += e.outputTokens;
1439
1750
  totalCost += e.costUSD;
1440
1751
  if (e.timestamp > lastTimestamp) lastTimestamp = e.timestamp;
1441
- const m = byModel[e.model] ??= { costUSD: 0, calls: 0, tokens: { input: 0, output: 0, reasoning: 0 } };
1752
+ const m = byModel[e.model] ??= {
1753
+ costUSD: 0,
1754
+ calls: 0,
1755
+ tokens: { input: 0, output: 0, reasoning: 0, cached: 0 }
1756
+ };
1442
1757
  m.costUSD += e.costUSD;
1443
1758
  m.calls += 1;
1444
- m.tokens.input += e.inputTokens;
1759
+ m.tokens.input += e.inputTokens + (e.cachedTokens ?? 0) + (e.cacheCreationTokens ?? 0);
1445
1760
  m.tokens.output += e.outputTokens;
1446
1761
  m.tokens.reasoning += e.reasoningTokens ?? 0;
1762
+ m.tokens.cached += e.cachedTokens ?? 0;
1447
1763
  if (e.sessionId) {
1448
1764
  const s = bySession[e.sessionId] ??= { costUSD: 0, calls: 0 };
1449
1765
  s.costUSD += e.costUSD;
@@ -1460,6 +1776,9 @@ ${issues}`);
1460
1776
  f.calls += 1;
1461
1777
  }
1462
1778
  }
1779
+ if (options && entries.length > 0) {
1780
+ periodFrom = entries[0]?.timestamp ?? periodFrom;
1781
+ }
1463
1782
  return {
1464
1783
  totalCostUSD: totalCost,
1465
1784
  totalTokens: { input: totalInput, output: totalOutput },
@@ -1467,22 +1786,66 @@ ${issues}`);
1467
1786
  bySession,
1468
1787
  byUser,
1469
1788
  byFeature,
1470
- period: { from: startedAt, to: lastTimestamp }
1789
+ period: { from: periodFrom, to: lastTimestamp },
1790
+ ...pricesUpdatedAt ? { pricesUpdatedAt } : {}
1791
+ };
1792
+ }
1793
+ async function getCostForecast(options = {}) {
1794
+ const windowHours = options.windowHours ?? 24;
1795
+ const allEntries = await Promise.resolve(storage.getAll());
1796
+ const now = Date.now();
1797
+ const windowStart = now - windowHours * 60 * 60 * 1e3;
1798
+ const windowEntries = allEntries.filter(
1799
+ (e) => new Date(e.timestamp).getTime() >= windowStart
1800
+ );
1801
+ if (windowEntries.length < 2) {
1802
+ return {
1803
+ burnRatePerHour: 0,
1804
+ projectedDailyCostUSD: 0,
1805
+ projectedMonthlyCostUSD: 0,
1806
+ basedOnHours: 0,
1807
+ basedOnPeriod: null
1808
+ };
1809
+ }
1810
+ const first = windowEntries[0]?.timestamp ?? "";
1811
+ const last = windowEntries[windowEntries.length - 1]?.timestamp ?? "";
1812
+ const actualMs = new Date(last).getTime() - new Date(first).getTime();
1813
+ const actualHours = actualMs / (1e3 * 60 * 60);
1814
+ if (actualHours < 1e-3) {
1815
+ return {
1816
+ burnRatePerHour: 0,
1817
+ projectedDailyCostUSD: 0,
1818
+ projectedMonthlyCostUSD: 0,
1819
+ basedOnHours: 0,
1820
+ basedOnPeriod: { from: first, to: last }
1821
+ };
1822
+ }
1823
+ const totalCost = windowEntries.reduce((s, e) => s + e.costUSD, 0);
1824
+ const burnRatePerHour = totalCost / actualHours;
1825
+ return {
1826
+ burnRatePerHour,
1827
+ projectedDailyCostUSD: burnRatePerHour * 24,
1828
+ projectedMonthlyCostUSD: burnRatePerHour * 24 * 30,
1829
+ basedOnHours: Math.round(actualHours * 100) / 100,
1830
+ basedOnPeriod: { from: first, to: last }
1471
1831
  };
1472
1832
  }
1473
1833
  async function reset() {
1474
1834
  await Promise.resolve(storage.clearAll());
1475
1835
  alertFired = false;
1836
+ firedUserAlerts.clear();
1837
+ firedSessionAlerts.clear();
1476
1838
  }
1477
1839
  async function resetSession(sessionId) {
1478
1840
  await Promise.resolve(storage.clearSession(sessionId));
1841
+ firedSessionAlerts.delete(sessionId);
1479
1842
  }
1480
1843
  async function exportJSON() {
1481
1844
  return JSON.stringify(await getReport(), null, 2);
1482
1845
  }
1483
1846
  async function exportCSV() {
1484
1847
  const entries = await Promise.resolve(storage.getAll());
1485
- const header = "timestamp,model,inputTokens,outputTokens,reasoningTokens,costUSD,sessionId,userId,feature";
1848
+ const header = "timestamp,model,inputTokens,outputTokens,reasoningTokens,cachedTokens,cacheCreationTokens,costUSD,sessionId,userId,feature";
1486
1849
  const rows = entries.map(
1487
1850
  (e) => [
1488
1851
  csvEscape(e.timestamp),
@@ -1490,6 +1853,8 @@ ${issues}`);
1490
1853
  e.inputTokens,
1491
1854
  e.outputTokens,
1492
1855
  e.reasoningTokens ?? 0,
1856
+ e.cachedTokens ?? 0,
1857
+ e.cacheCreationTokens ?? 0,
1493
1858
  e.costUSD.toFixed(8),
1494
1859
  csvEscape(e.sessionId ?? ""),
1495
1860
  csvEscape(e.userId ?? ""),
@@ -1505,11 +1870,47 @@ ${issues}`);
1505
1870
  ...remotePrices !== void 0 && { remotePrices }
1506
1871
  }) ?? null;
1507
1872
  }
1508
- return { track, getReport, reset, resetSession, exportJSON, exportCSV, getModelInfo };
1873
+ return {
1874
+ track,
1875
+ getReport,
1876
+ getCostForecast,
1877
+ reset,
1878
+ resetSession,
1879
+ exportJSON,
1880
+ exportCSV,
1881
+ getModelInfo
1882
+ };
1509
1883
  }
1510
1884
  function computeTotal(entries) {
1511
1885
  return entries.reduce((sum, e) => sum + e.costUSD, 0);
1512
1886
  }
1887
+ function parseLastMs(last) {
1888
+ const match = /^(\d+(?:\.\d+)?)(h|d)$/.exec(last.trim());
1889
+ if (!match) throw new Error(`[tokenwatch] Invalid "last" value: "${last}". Use e.g. "24h", "7d".`);
1890
+ const value = parseFloat(match[1] ?? "0");
1891
+ const unit = match[2] ?? "h";
1892
+ return unit === "h" ? value * 60 * 60 * 1e3 : value * 24 * 60 * 60 * 1e3;
1893
+ }
1894
+ function filterEntries(entries, options) {
1895
+ if (!options) return entries;
1896
+ let sinceMs;
1897
+ let untilMs;
1898
+ if (options.last) {
1899
+ sinceMs = Date.now() - parseLastMs(options.last);
1900
+ } else if (options.since) {
1901
+ sinceMs = new Date(options.since).getTime();
1902
+ }
1903
+ if (options.until) {
1904
+ untilMs = new Date(options.until).getTime();
1905
+ }
1906
+ if (sinceMs === void 0 && untilMs === void 0) return entries;
1907
+ return entries.filter((e) => {
1908
+ const ts = new Date(e.timestamp).getTime();
1909
+ if (sinceMs !== void 0 && ts < sinceMs) return false;
1910
+ if (untilMs !== void 0 && ts > untilMs) return false;
1911
+ return true;
1912
+ });
1913
+ }
1513
1914
  function csvEscape(value) {
1514
1915
  if (value.includes(",") || value.includes('"') || value.includes("\n")) {
1515
1916
  return `"${value.replace(/"/g, '""')}"`;
@@ -1517,6 +1918,71 @@ function csvEscape(value) {
1517
1918
  return value;
1518
1919
  }
1519
1920
 
1921
+ // src/core/lazy-tracker.ts
1922
+ var CSV_HEADER = "timestamp,model,inputTokens,outputTokens,reasoningTokens,cachedTokens,cacheCreationTokens,costUSD,sessionId,userId,feature";
1923
+ function emptyReport() {
1924
+ const now = (/* @__PURE__ */ new Date()).toISOString();
1925
+ return {
1926
+ totalCostUSD: 0,
1927
+ totalTokens: { input: 0, output: 0 },
1928
+ byModel: {},
1929
+ bySession: {},
1930
+ byUser: {},
1931
+ byFeature: {},
1932
+ period: { from: now, to: now }
1933
+ };
1934
+ }
1935
+ function zeroForecast() {
1936
+ return {
1937
+ burnRatePerHour: 0,
1938
+ projectedDailyCostUSD: 0,
1939
+ projectedMonthlyCostUSD: 0,
1940
+ basedOnHours: 0,
1941
+ basedOnPeriod: null
1942
+ };
1943
+ }
1944
+ function createLazyTracker() {
1945
+ let delegate = null;
1946
+ return {
1947
+ init(config) {
1948
+ if (delegate !== null) {
1949
+ throw new Error(
1950
+ "[tokenwatch] LazyTracker already initialized. init() may only be called once."
1951
+ );
1952
+ }
1953
+ try {
1954
+ delegate = createTracker(config ?? {});
1955
+ } catch (err) {
1956
+ throw err;
1957
+ }
1958
+ },
1959
+ track(entry) {
1960
+ delegate?.track(entry);
1961
+ },
1962
+ async getReport(options) {
1963
+ return delegate?.getReport(options) ?? emptyReport();
1964
+ },
1965
+ async getCostForecast(options) {
1966
+ return delegate?.getCostForecast(options) ?? zeroForecast();
1967
+ },
1968
+ async reset() {
1969
+ await delegate?.reset();
1970
+ },
1971
+ async resetSession(sessionId) {
1972
+ await delegate?.resetSession(sessionId);
1973
+ },
1974
+ async exportJSON() {
1975
+ return delegate?.exportJSON() ?? "{}";
1976
+ },
1977
+ async exportCSV() {
1978
+ return delegate?.exportCSV() ?? CSV_HEADER;
1979
+ },
1980
+ getModelInfo(model) {
1981
+ return delegate?.getModelInfo(model) ?? null;
1982
+ }
1983
+ };
1984
+ }
1985
+
1520
1986
  // src/providers/openai.ts
1521
1987
  function extractMeta(params) {
1522
1988
  const { __sessionId, __userId, __feature, ...cleaned } = params;
@@ -1528,19 +1994,24 @@ function extractMeta(params) {
1528
1994
  };
1529
1995
  }
1530
1996
  function extractUsage(usage) {
1531
- if (!usage) return { inputTokens: 0, outputTokens: 0, reasoningTokens: 0 };
1997
+ if (!usage) return { inputTokens: 0, outputTokens: 0, reasoningTokens: 0, cachedTokens: 0 };
1998
+ const totalInput = usage.prompt_tokens ?? usage.input_tokens ?? 0;
1999
+ const cachedTokens = usage.prompt_tokens_details?.cached_tokens ?? 0;
1532
2000
  return {
1533
- inputTokens: usage.prompt_tokens ?? usage.input_tokens ?? 0,
2001
+ // inputTokens = regular (non-cached) input; OpenAI prompt_tokens includes cached tokens
2002
+ inputTokens: totalInput - cachedTokens,
1534
2003
  outputTokens: usage.completion_tokens ?? usage.output_tokens ?? 0,
1535
- reasoningTokens: usage.completion_tokens_details?.reasoning_tokens ?? 0
2004
+ reasoningTokens: usage.completion_tokens_details?.reasoning_tokens ?? 0,
2005
+ cachedTokens
1536
2006
  };
1537
2007
  }
1538
- function trackWithMeta(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature) {
2008
+ function trackWithMeta(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature, cachedTokens = 0) {
1539
2009
  tracker.track({
1540
2010
  model,
1541
2011
  inputTokens,
1542
2012
  outputTokens: outputTokens + reasoningTokens,
1543
2013
  ...reasoningTokens > 0 && { reasoningTokens },
2014
+ ...cachedTokens > 0 && { cachedTokens },
1544
2015
  ...sessionId !== void 0 && { sessionId },
1545
2016
  ...userId !== void 0 && { userId },
1546
2017
  ...feature !== void 0 && { feature }
@@ -1552,13 +2023,13 @@ async function* wrapStream(stream, model, sessionId, userId, feature, tracker) {
1552
2023
  lastChunk = chunk;
1553
2024
  yield chunk;
1554
2025
  }
1555
- const { inputTokens, outputTokens, reasoningTokens } = extractUsage(lastChunk?.usage);
2026
+ const { inputTokens, outputTokens, reasoningTokens, cachedTokens } = extractUsage(lastChunk?.usage);
1556
2027
  if (!lastChunk?.usage) {
1557
2028
  console.warn(
1558
2029
  `[tokenwatch] No usage data in stream for model "${model}". Cost recorded as $0. Pass stream_options: { include_usage: true } to get accurate costs.`
1559
2030
  );
1560
2031
  }
1561
- trackWithMeta(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature);
2032
+ trackWithMeta(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature, cachedTokens);
1562
2033
  }
1563
2034
  function wrapOpenAI(client, tracker) {
1564
2035
  const proxiedCompletions = new Proxy(client.chat.completions, {
@@ -1580,7 +2051,7 @@ function wrapOpenAI(client, tracker) {
1580
2051
  );
1581
2052
  }
1582
2053
  const completion = result;
1583
- const { inputTokens, outputTokens, reasoningTokens } = extractUsage(completion.usage);
2054
+ const { inputTokens, outputTokens, reasoningTokens, cachedTokens } = extractUsage(completion.usage);
1584
2055
  trackWithMeta(
1585
2056
  tracker,
1586
2057
  completion.model ?? model,
@@ -1589,7 +2060,8 @@ function wrapOpenAI(client, tracker) {
1589
2060
  reasoningTokens,
1590
2061
  sessionId,
1591
2062
  userId,
1592
- feature
2063
+ feature,
2064
+ cachedTokens
1593
2065
  );
1594
2066
  return result;
1595
2067
  };
@@ -1636,10 +2108,12 @@ function extractMeta2(params) {
1636
2108
  };
1637
2109
  }
1638
2110
  function extractUsage2(usage) {
1639
- if (!usage) return { inputTokens: 0, outputTokens: 0 };
2111
+ if (!usage) return { inputTokens: 0, outputTokens: 0, cachedTokens: 0, cacheCreationTokens: 0 };
1640
2112
  return {
1641
2113
  inputTokens: usage.input_tokens ?? 0,
1642
- outputTokens: usage.output_tokens ?? 0
2114
+ outputTokens: usage.output_tokens ?? 0,
2115
+ cachedTokens: usage.cache_read_input_tokens ?? 0,
2116
+ cacheCreationTokens: usage.cache_creation_input_tokens ?? 0
1643
2117
  };
1644
2118
  }
1645
2119
  function extractThinkingTokenApprox(content) {
@@ -1647,12 +2121,14 @@ function extractThinkingTokenApprox(content) {
1647
2121
  const chars = content.filter((b) => b.type === "thinking").reduce((sum, b) => sum + (b.thinking?.length ?? 0), 0);
1648
2122
  return chars > 0 ? Math.round(chars / 4) : 0;
1649
2123
  }
1650
- function trackWithMeta2(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature) {
2124
+ function trackWithMeta2(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature, cachedTokens = 0, cacheCreationTokens = 0) {
1651
2125
  tracker.track({
1652
2126
  model,
1653
2127
  inputTokens,
1654
2128
  outputTokens,
1655
2129
  ...reasoningTokens > 0 && { reasoningTokens },
2130
+ ...cachedTokens > 0 && { cachedTokens },
2131
+ ...cacheCreationTokens > 0 && { cacheCreationTokens },
1656
2132
  ...sessionId !== void 0 && { sessionId },
1657
2133
  ...userId !== void 0 && { userId },
1658
2134
  ...feature !== void 0 && { feature }
@@ -1661,12 +2137,16 @@ function trackWithMeta2(tracker, model, inputTokens, outputTokens, reasoningToke
1661
2137
  async function* wrapStream2(stream, model, sessionId, userId, feature, tracker) {
1662
2138
  let inputTokens = 0;
1663
2139
  let outputTokens = 0;
2140
+ let cachedTokens = 0;
2141
+ let cacheCreationTokens = 0;
1664
2142
  let currentBlockIsThinking = false;
1665
2143
  let thinkingCharCount = 0;
1666
2144
  for await (const event of stream) {
1667
2145
  yield event;
1668
2146
  if (event.type === "message_start" && event.message?.usage) {
1669
2147
  inputTokens = event.message.usage.input_tokens ?? 0;
2148
+ cachedTokens = event.message.usage.cache_read_input_tokens ?? 0;
2149
+ cacheCreationTokens = event.message.usage.cache_creation_input_tokens ?? 0;
1670
2150
  }
1671
2151
  if (event.type === "message_delta" && event.usage) {
1672
2152
  outputTokens = event.usage.output_tokens ?? 0;
@@ -1682,7 +2162,7 @@ async function* wrapStream2(stream, model, sessionId, userId, feature, tracker)
1682
2162
  }
1683
2163
  }
1684
2164
  const reasoningTokens = thinkingCharCount > 0 ? Math.round(thinkingCharCount / 4) : 0;
1685
- trackWithMeta2(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature);
2165
+ trackWithMeta2(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature, cachedTokens, cacheCreationTokens);
1686
2166
  }
1687
2167
  function wrapAnthropic(client, tracker) {
1688
2168
  const proxiedMessages = new Proxy(client.messages, {
@@ -1704,7 +2184,7 @@ function wrapAnthropic(client, tracker) {
1704
2184
  );
1705
2185
  }
1706
2186
  const message = result;
1707
- const { inputTokens, outputTokens } = extractUsage2(message.usage);
2187
+ const { inputTokens, outputTokens, cachedTokens, cacheCreationTokens } = extractUsage2(message.usage);
1708
2188
  const reasoningTokens = extractThinkingTokenApprox(message.content);
1709
2189
  trackWithMeta2(
1710
2190
  tracker,
@@ -1714,7 +2194,9 @@ function wrapAnthropic(client, tracker) {
1714
2194
  reasoningTokens,
1715
2195
  sessionId,
1716
2196
  userId,
1717
- feature
2197
+ feature,
2198
+ cachedTokens,
2199
+ cacheCreationTokens
1718
2200
  );
1719
2201
  return result;
1720
2202
  };
@@ -1784,6 +2266,7 @@ function wrapGemini(client, tracker) {
1784
2266
  });
1785
2267
  }
1786
2268
  export {
2269
+ createLazyTracker,
1787
2270
  createTracker,
1788
2271
  wrapAnthropic,
1789
2272
  wrapOpenAI as wrapDeepSeek,