@diogonzafe/tokenwatch 0.2.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -20,6 +20,7 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
20
20
  // src/index.ts
21
21
  var src_exports = {};
22
22
  __export(src_exports, {
23
+ createLazyTracker: () => createLazyTracker,
23
24
  createTracker: () => createTracker,
24
25
  wrapAnthropic: () => wrapAnthropic,
25
26
  wrapDeepSeek: () => wrapOpenAI,
@@ -55,8 +56,45 @@ function lookupInMap(model, map) {
55
56
  }
56
57
  return void 0;
57
58
  }
58
- function calculateCost(inputTokens, outputTokens, price) {
59
- return inputTokens / 1e6 * price.input + outputTokens / 1e6 * price.output;
59
+ function calculateCost(inputTokens, outputTokens, price, cachedTokens = 0, cacheCreationTokens = 0) {
60
+ const regularInputCost = inputTokens / 1e6 * price.input;
61
+ const cachedReadCost = cachedTokens / 1e6 * (price.cachedInput ?? price.input);
62
+ const cacheCreationCost = cacheCreationTokens / 1e6 * (price.cacheCreationInput ?? price.input * 1.25);
63
+ const outputCost = outputTokens / 1e6 * price.output;
64
+ return regularInputCost + cachedReadCost + cacheCreationCost + outputCost;
65
+ }
66
+
67
+ // src/core/suggestions.ts
68
+ var PROVIDER_PREFIXES = ["gpt-", "claude-", "gemini-", "deepseek-"];
69
+ function getProviderPrefix(model) {
70
+ return PROVIDER_PREFIXES.find((p) => model.startsWith(p));
71
+ }
72
+ function maybeSuggestCheaperModel(model, costUSD, inputTokens, outputTokens, layers) {
73
+ if (costUSD <= 0) return;
74
+ const prefix = getProviderPrefix(model);
75
+ if (!prefix) return;
76
+ const mergedMap = {
77
+ ...layers.bundledPrices,
78
+ ...layers.remotePrices ?? {},
79
+ ...layers.customPrices ?? {}
80
+ };
81
+ let cheapestModel;
82
+ let cheapestCost = Infinity;
83
+ for (const key of Object.keys(mergedMap)) {
84
+ if (key === model || !key.startsWith(prefix)) continue;
85
+ const price = mergedMap[key];
86
+ if (!price) continue;
87
+ const candidateCost = calculateCost(inputTokens, outputTokens, price);
88
+ if (candidateCost < cheapestCost) {
89
+ cheapestCost = candidateCost;
90
+ cheapestModel = key;
91
+ }
92
+ }
93
+ if (cheapestModel === void 0 || cheapestCost >= costUSD * 0.5) return;
94
+ const savingsPct = Math.round((1 - cheapestCost / costUSD) * 100);
95
+ console.log(
96
+ `[tokenwatch] Suggestion: ${cheapestModel} could handle this for ~$${cheapestCost.toFixed(4)} (${savingsPct}% cheaper than ${model})`
97
+ );
60
98
  }
61
99
 
62
100
  // src/core/storage.ts
@@ -105,16 +143,18 @@ var SqliteStorage = class {
105
143
  migrate() {
106
144
  this.db.exec(`
107
145
  CREATE TABLE IF NOT EXISTS usage (
108
- id INTEGER PRIMARY KEY AUTOINCREMENT,
109
- model TEXT NOT NULL,
110
- input_tokens INTEGER NOT NULL,
111
- output_tokens INTEGER NOT NULL,
112
- reasoning_tokens INTEGER NOT NULL DEFAULT 0,
113
- cost_usd REAL NOT NULL,
114
- session_id TEXT,
115
- user_id TEXT,
116
- feature TEXT,
117
- timestamp TEXT NOT NULL
146
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
147
+ model TEXT NOT NULL,
148
+ input_tokens INTEGER NOT NULL,
149
+ output_tokens INTEGER NOT NULL,
150
+ reasoning_tokens INTEGER NOT NULL DEFAULT 0,
151
+ cached_tokens INTEGER NOT NULL DEFAULT 0,
152
+ cache_creation_tokens INTEGER NOT NULL DEFAULT 0,
153
+ cost_usd REAL NOT NULL,
154
+ session_id TEXT,
155
+ user_id TEXT,
156
+ feature TEXT,
157
+ timestamp TEXT NOT NULL
118
158
  )
119
159
  `);
120
160
  const cols = this.db.prepare(`PRAGMA table_info(usage)`).all().map((c) => c.name);
@@ -124,17 +164,26 @@ var SqliteStorage = class {
124
164
  if (!cols.includes("feature")) {
125
165
  this.db.exec(`ALTER TABLE usage ADD COLUMN feature TEXT`);
126
166
  }
167
+ if (!cols.includes("cached_tokens")) {
168
+ this.db.exec(`ALTER TABLE usage ADD COLUMN cached_tokens INTEGER NOT NULL DEFAULT 0`);
169
+ }
170
+ if (!cols.includes("cache_creation_tokens")) {
171
+ this.db.exec(`ALTER TABLE usage ADD COLUMN cache_creation_tokens INTEGER NOT NULL DEFAULT 0`);
172
+ }
127
173
  }
128
174
  record(entry) {
129
175
  this.db.prepare(
130
176
  `INSERT INTO usage
131
- (model, input_tokens, output_tokens, reasoning_tokens, cost_usd, session_id, user_id, feature, timestamp)
132
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`
177
+ (model, input_tokens, output_tokens, reasoning_tokens, cached_tokens, cache_creation_tokens,
178
+ cost_usd, session_id, user_id, feature, timestamp)
179
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
133
180
  ).run(
134
181
  entry.model,
135
182
  entry.inputTokens,
136
183
  entry.outputTokens,
137
184
  entry.reasoningTokens ?? 0,
185
+ entry.cachedTokens ?? 0,
186
+ entry.cacheCreationTokens ?? 0,
138
187
  entry.costUSD,
139
188
  entry.sessionId ?? null,
140
189
  entry.userId ?? null,
@@ -149,6 +198,8 @@ var SqliteStorage = class {
149
198
  inputTokens: r.input_tokens,
150
199
  outputTokens: r.output_tokens,
151
200
  ...r.reasoning_tokens > 0 && { reasoningTokens: r.reasoning_tokens },
201
+ ...r.cached_tokens > 0 && { cachedTokens: r.cached_tokens },
202
+ ...r.cache_creation_tokens > 0 && { cacheCreationTokens: r.cache_creation_tokens },
152
203
  costUSD: r.cost_usd,
153
204
  ...r.session_id != null && { sessionId: r.session_id },
154
205
  ...r.user_id != null && { userId: r.user_id },
@@ -184,7 +235,7 @@ async function fetchRemotePrices(url = REMOTE_URL) {
184
235
  const data = await res.json();
185
236
  if (!data?.models) return null;
186
237
  await persistCache(data);
187
- return data.models;
238
+ return { models: data.models, updated_at: data.updated_at ?? "" };
188
239
  } catch {
189
240
  return null;
190
241
  }
@@ -196,7 +247,8 @@ async function loadCachedPrices() {
196
247
  const data = JSON.parse(raw);
197
248
  const age = Date.now() - (data._cachedAt ?? 0);
198
249
  if (age > CACHE_TTL_MS) return null;
199
- return data.models ?? null;
250
+ if (!data.models) return null;
251
+ return { models: data.models, updated_at: data.updated_at ?? "" };
200
252
  } catch {
201
253
  return null;
202
254
  }
@@ -217,87 +269,110 @@ async function getRemotePrices() {
217
269
 
218
270
  // prices.json
219
271
  var prices_default = {
220
- updated_at: "2026-04-21",
272
+ updated_at: "2026-04-22",
221
273
  source: "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json",
222
274
  models: {
223
275
  "gpt-4o": {
224
276
  input: 2.5,
225
277
  output: 10,
278
+ cachedInput: 1.25,
226
279
  maxInputTokens: 128e3
227
280
  },
228
281
  "gpt-4o-mini": {
229
282
  input: 0.15,
230
283
  output: 0.6,
284
+ cachedInput: 0.075,
231
285
  maxInputTokens: 128e3
232
286
  },
233
287
  "gpt-5": {
234
288
  input: 1.25,
235
289
  output: 10,
290
+ cachedInput: 0.125,
236
291
  maxInputTokens: 272e3
237
292
  },
238
293
  "gpt-5-mini": {
239
294
  input: 0.25,
240
295
  output: 2,
296
+ cachedInput: 0.025,
241
297
  maxInputTokens: 272e3
242
298
  },
243
299
  "gpt-5-nano": {
244
300
  input: 0.05,
245
301
  output: 0.4,
302
+ cachedInput: 5e-3,
246
303
  maxInputTokens: 272e3
247
304
  },
248
305
  "claude-opus-4-6": {
249
306
  input: 5,
250
307
  output: 25,
308
+ cachedInput: 0.5,
309
+ cacheCreationInput: 6.25,
251
310
  maxInputTokens: 1e6
252
311
  },
253
312
  "claude-sonnet-4-6": {
254
313
  input: 3,
255
314
  output: 15,
315
+ cachedInput: 0.3,
316
+ cacheCreationInput: 3.75,
256
317
  maxInputTokens: 1e6
257
318
  },
258
319
  "claude-haiku-4-5": {
259
320
  input: 1,
260
321
  output: 5,
322
+ cachedInput: 0.1,
323
+ cacheCreationInput: 1.25,
261
324
  maxInputTokens: 2e5
262
325
  },
263
326
  "gemini-2.5-pro": {
264
327
  input: 1.25,
265
328
  output: 10,
329
+ cachedInput: 0.125,
266
330
  maxInputTokens: 1048576
267
331
  },
268
332
  "gemini-2.5-flash": {
269
333
  input: 0.3,
270
334
  output: 2.5,
335
+ cachedInput: 0.03,
271
336
  maxInputTokens: 1048576
272
337
  },
273
338
  "deepseek-chat": {
274
339
  input: 0.28,
275
340
  output: 0.42,
341
+ cachedInput: 0.028,
276
342
  maxInputTokens: 131072
277
343
  },
278
344
  "deepseek-reasoner": {
279
345
  input: 0.28,
280
346
  output: 0.42,
347
+ cachedInput: 0.028,
281
348
  maxInputTokens: 131072
282
349
  },
283
350
  "claude-opus-4-5": {
284
351
  input: 5,
285
352
  output: 25,
353
+ cachedInput: 0.5,
354
+ cacheCreationInput: 6.25,
286
355
  maxInputTokens: 2e5
287
356
  },
288
357
  "claude-opus-4-7": {
289
358
  input: 5,
290
359
  output: 25,
360
+ cachedInput: 0.5,
361
+ cacheCreationInput: 6.25,
291
362
  maxInputTokens: 1e6
292
363
  },
293
364
  "claude-opus-4-1": {
294
365
  input: 15,
295
366
  output: 75,
367
+ cachedInput: 1.5,
368
+ cacheCreationInput: 18.75,
296
369
  maxInputTokens: 2e5
297
370
  },
298
371
  "claude-sonnet-4-5": {
299
372
  input: 3,
300
373
  output: 15,
374
+ cachedInput: 0.3,
375
+ cacheCreationInput: 3.75,
301
376
  maxInputTokens: 2e5
302
377
  },
303
378
  "gpt-oss-120b": {
@@ -388,36 +463,43 @@ var prices_default = {
388
463
  "gpt-4.1": {
389
464
  input: 2,
390
465
  output: 8,
466
+ cachedInput: 0.5,
391
467
  maxInputTokens: 1047576
392
468
  },
393
469
  "gpt-4.1-2025-04-14": {
394
470
  input: 2,
395
471
  output: 8,
472
+ cachedInput: 0.5,
396
473
  maxInputTokens: 1047576
397
474
  },
398
475
  "gpt-4.1-mini": {
399
476
  input: 0.4,
400
477
  output: 1.6,
478
+ cachedInput: 0.1,
401
479
  maxInputTokens: 1047576
402
480
  },
403
481
  "gpt-4.1-mini-2025-04-14": {
404
482
  input: 0.4,
405
483
  output: 1.6,
484
+ cachedInput: 0.1,
406
485
  maxInputTokens: 1047576
407
486
  },
408
487
  "gpt-4.1-nano": {
409
488
  input: 0.1,
410
489
  output: 0.4,
490
+ cachedInput: 0.025,
411
491
  maxInputTokens: 1047576
412
492
  },
413
493
  "gpt-4.1-nano-2025-04-14": {
414
494
  input: 0.1,
415
495
  output: 0.4,
496
+ cachedInput: 0.025,
416
497
  maxInputTokens: 1047576
417
498
  },
418
499
  "gpt-4.5-preview": {
419
500
  input: 75,
420
501
  output: 150,
502
+ cachedInput: 37.5,
421
503
  maxInputTokens: 128e3
422
504
  },
423
505
  "gpt-4o-2024-05-13": {
@@ -428,11 +510,13 @@ var prices_default = {
428
510
  "gpt-4o-2024-08-06": {
429
511
  input: 2.5,
430
512
  output: 10,
513
+ cachedInput: 1.25,
431
514
  maxInputTokens: 128e3
432
515
  },
433
516
  "gpt-4o-2024-11-20": {
434
517
  input: 2.5,
435
518
  output: 10,
519
+ cachedInput: 1.25,
436
520
  maxInputTokens: 128e3
437
521
  },
438
522
  "gpt-audio-2025-08-28": {
@@ -458,6 +542,7 @@ var prices_default = {
458
542
  "gpt-4o-mini-2024-07-18": {
459
543
  input: 0.15,
460
544
  output: 0.6,
545
+ cachedInput: 0.075,
461
546
  maxInputTokens: 128e3
462
547
  },
463
548
  "gpt-4o-mini-audio-preview-2024-12-17": {
@@ -468,21 +553,25 @@ var prices_default = {
468
553
  "gpt-4o-mini-realtime-preview-2024-12-17": {
469
554
  input: 0.6,
470
555
  output: 2.4,
556
+ cachedInput: 0.3,
471
557
  maxInputTokens: 128e3
472
558
  },
473
559
  "gpt-realtime-2025-08-28": {
474
560
  input: 4,
475
561
  output: 16,
562
+ cachedInput: 0.4,
476
563
  maxInputTokens: 32e3
477
564
  },
478
565
  "gpt-realtime-1.5-2026-02-23": {
479
566
  input: 4,
480
567
  output: 16,
568
+ cachedInput: 4,
481
569
  maxInputTokens: 32e3
482
570
  },
483
571
  "gpt-realtime-mini-2025-10-06": {
484
572
  input: 0.6,
485
573
  output: 2.4,
574
+ cachedInput: 0.06,
486
575
  maxInputTokens: 128e3
487
576
  },
488
577
  "gpt-4o-mini-transcribe": {
@@ -493,11 +582,13 @@ var prices_default = {
493
582
  "gpt-4o-realtime-preview-2024-10-01": {
494
583
  input: 5,
495
584
  output: 20,
585
+ cachedInput: 2.5,
496
586
  maxInputTokens: 128e3
497
587
  },
498
588
  "gpt-4o-realtime-preview-2024-12-17": {
499
589
  input: 5,
500
590
  output: 20,
591
+ cachedInput: 2.5,
501
592
  maxInputTokens: 128e3
502
593
  },
503
594
  "gpt-4o-transcribe": {
@@ -513,51 +604,61 @@ var prices_default = {
513
604
  "gpt-5.1-2025-11-13": {
514
605
  input: 1.25,
515
606
  output: 10,
607
+ cachedInput: 0.125,
516
608
  maxInputTokens: 272e3
517
609
  },
518
610
  "gpt-5.1-chat-2025-11-13": {
519
611
  input: 1.25,
520
612
  output: 10,
613
+ cachedInput: 0.125,
521
614
  maxInputTokens: 128e3
522
615
  },
523
616
  "gpt-5.1-codex-2025-11-13": {
524
617
  input: 1.25,
525
618
  output: 10,
619
+ cachedInput: 0.125,
526
620
  maxInputTokens: 272e3
527
621
  },
528
622
  "gpt-5.1-codex-mini-2025-11-13": {
529
623
  input: 0.25,
530
624
  output: 2,
625
+ cachedInput: 0.025,
531
626
  maxInputTokens: 272e3
532
627
  },
533
628
  "gpt-5-2025-08-07": {
534
629
  input: 1.25,
535
630
  output: 10,
631
+ cachedInput: 0.125,
536
632
  maxInputTokens: 272e3
537
633
  },
538
634
  "gpt-5-chat": {
539
635
  input: 1.25,
540
636
  output: 10,
637
+ cachedInput: 0.125,
541
638
  maxInputTokens: 128e3
542
639
  },
543
640
  "gpt-5-chat-latest": {
544
641
  input: 1.25,
545
642
  output: 10,
643
+ cachedInput: 0.125,
546
644
  maxInputTokens: 128e3
547
645
  },
548
646
  "gpt-5-codex": {
549
647
  input: 1.25,
550
648
  output: 10,
649
+ cachedInput: 0.125,
551
650
  maxInputTokens: 272e3
552
651
  },
553
652
  "gpt-5-mini-2025-08-07": {
554
653
  input: 0.25,
555
654
  output: 2,
655
+ cachedInput: 0.025,
556
656
  maxInputTokens: 272e3
557
657
  },
558
658
  "gpt-5-nano-2025-08-07": {
559
659
  input: 0.05,
560
660
  output: 0.4,
661
+ cachedInput: 5e-3,
561
662
  maxInputTokens: 272e3
562
663
  },
563
664
  "gpt-5-pro": {
@@ -568,61 +669,73 @@ var prices_default = {
568
669
  "gpt-5.1": {
569
670
  input: 1.25,
570
671
  output: 10,
672
+ cachedInput: 0.125,
571
673
  maxInputTokens: 272e3
572
674
  },
573
675
  "gpt-5.1-chat": {
574
676
  input: 1.25,
575
677
  output: 10,
678
+ cachedInput: 0.125,
576
679
  maxInputTokens: 128e3
577
680
  },
578
681
  "gpt-5.1-codex": {
579
682
  input: 1.25,
580
683
  output: 10,
684
+ cachedInput: 0.125,
581
685
  maxInputTokens: 272e3
582
686
  },
583
687
  "gpt-5.1-codex-max": {
584
688
  input: 1.25,
585
689
  output: 10,
690
+ cachedInput: 0.125,
586
691
  maxInputTokens: 272e3
587
692
  },
588
693
  "gpt-5.1-codex-mini": {
589
694
  input: 0.25,
590
695
  output: 2,
696
+ cachedInput: 0.025,
591
697
  maxInputTokens: 272e3
592
698
  },
593
699
  "gpt-5.2": {
594
700
  input: 1.75,
595
701
  output: 14,
702
+ cachedInput: 0.175,
596
703
  maxInputTokens: 272e3
597
704
  },
598
705
  "gpt-5.2-2025-12-11": {
599
706
  input: 1.75,
600
707
  output: 14,
708
+ cachedInput: 0.175,
601
709
  maxInputTokens: 272e3
602
710
  },
603
711
  "gpt-5.2-chat": {
604
712
  input: 1.75,
605
713
  output: 14,
714
+ cachedInput: 0.175,
606
715
  maxInputTokens: 128e3
607
716
  },
608
717
  "gpt-5.2-chat-2025-12-11": {
609
718
  input: 1.75,
610
719
  output: 14,
720
+ cachedInput: 0.175,
611
721
  maxInputTokens: 128e3
612
722
  },
613
723
  "gpt-5.2-codex": {
614
724
  input: 1.75,
615
725
  output: 14,
726
+ cachedInput: 0.175,
616
727
  maxInputTokens: 272e3
617
728
  },
618
729
  "gpt-5.3-chat": {
619
730
  input: 1.75,
620
731
  output: 14,
732
+ cachedInput: 0.175,
621
733
  maxInputTokens: 128e3
622
734
  },
623
735
  "gpt-5.3-codex": {
624
736
  input: 1.75,
625
737
  output: 14,
738
+ cachedInput: 0.175,
626
739
  maxInputTokens: 272e3
627
740
  },
628
741
  "gpt-5.2-pro": {
@@ -638,71 +751,85 @@ var prices_default = {
638
751
  "gpt-5.4": {
639
752
  input: 2.5,
640
753
  output: 15,
754
+ cachedInput: 0.25,
641
755
  maxInputTokens: 105e4
642
756
  },
643
757
  "gpt-5.4-2026-03-05": {
644
758
  input: 2.5,
645
759
  output: 15,
760
+ cachedInput: 0.25,
646
761
  maxInputTokens: 105e4
647
762
  },
648
763
  "gpt-5.4-pro": {
649
764
  input: 30,
650
765
  output: 180,
766
+ cachedInput: 3,
651
767
  maxInputTokens: 105e4
652
768
  },
653
769
  "gpt-5.4-pro-2026-03-05": {
654
770
  input: 30,
655
771
  output: 180,
772
+ cachedInput: 3,
656
773
  maxInputTokens: 105e4
657
774
  },
658
775
  "gpt-5.4-mini": {
659
776
  input: 0.75,
660
777
  output: 4.5,
778
+ cachedInput: 0.075,
661
779
  maxInputTokens: 272e3
662
780
  },
663
781
  "gpt-5.4-nano": {
664
782
  input: 0.2,
665
783
  output: 1.25,
784
+ cachedInput: 0.02,
666
785
  maxInputTokens: 272e3
667
786
  },
668
787
  "o1-2024-12-17": {
669
788
  input: 15,
670
789
  output: 60,
790
+ cachedInput: 7.5,
671
791
  maxInputTokens: 2e5
672
792
  },
673
793
  "o1-mini": {
674
794
  input: 1.21,
675
795
  output: 4.84,
796
+ cachedInput: 0.605,
676
797
  maxInputTokens: 128e3
677
798
  },
678
799
  "o1-mini-2024-09-12": {
679
800
  input: 1.1,
680
801
  output: 4.4,
802
+ cachedInput: 0.55,
681
803
  maxInputTokens: 128e3
682
804
  },
683
805
  "o1-preview": {
684
806
  input: 15,
685
807
  output: 60,
808
+ cachedInput: 7.5,
686
809
  maxInputTokens: 128e3
687
810
  },
688
811
  "o1-preview-2024-09-12": {
689
812
  input: 15,
690
813
  output: 60,
814
+ cachedInput: 7.5,
691
815
  maxInputTokens: 128e3
692
816
  },
693
817
  "o3-2025-04-16": {
694
818
  input: 2,
695
819
  output: 8,
820
+ cachedInput: 0.5,
696
821
  maxInputTokens: 2e5
697
822
  },
698
823
  "o3-mini": {
699
824
  input: 1.1,
700
825
  output: 4.4,
826
+ cachedInput: 0.55,
701
827
  maxInputTokens: 2e5
702
828
  },
703
829
  "o3-mini-2025-01-31": {
704
830
  input: 1.1,
705
831
  output: 4.4,
832
+ cachedInput: 0.55,
706
833
  maxInputTokens: 2e5
707
834
  },
708
835
  "o3-pro": {
@@ -718,11 +845,13 @@ var prices_default = {
718
845
  "o4-mini": {
719
846
  input: 1.1,
720
847
  output: 4.4,
848
+ cachedInput: 0.275,
721
849
  maxInputTokens: 2e5
722
850
  },
723
851
  "o4-mini-2025-04-16": {
724
852
  input: 1.1,
725
853
  output: 4.4,
854
+ cachedInput: 0.275,
726
855
  maxInputTokens: 2e5
727
856
  },
728
857
  "deepseek-v3.2": {
@@ -743,6 +872,7 @@ var prices_default = {
743
872
  "deepseek-v3": {
744
873
  input: 0.27,
745
874
  output: 1.1,
875
+ cachedInput: 0.07,
746
876
  maxInputTokens: 65536
747
877
  },
748
878
  "deepseek-v3-0324": {
@@ -758,76 +888,105 @@ var prices_default = {
758
888
  "claude-haiku-4-5-20251001": {
759
889
  input: 1,
760
890
  output: 5,
891
+ cachedInput: 0.1,
892
+ cacheCreationInput: 1.25,
761
893
  maxInputTokens: 2e5
762
894
  },
763
895
  "claude-3-7-sonnet-20250219": {
764
896
  input: 3,
765
897
  output: 15,
898
+ cachedInput: 0.3,
899
+ cacheCreationInput: 3.75,
766
900
  maxInputTokens: 2e5
767
901
  },
768
902
  "claude-3-haiku-20240307": {
769
903
  input: 0.25,
770
904
  output: 1.25,
905
+ cachedInput: 0.03,
906
+ cacheCreationInput: 0.3,
771
907
  maxInputTokens: 2e5
772
908
  },
773
909
  "claude-3-opus-20240229": {
774
910
  input: 15,
775
911
  output: 75,
912
+ cachedInput: 1.5,
913
+ cacheCreationInput: 18.75,
776
914
  maxInputTokens: 2e5
777
915
  },
778
916
  "claude-4-opus-20250514": {
779
917
  input: 15,
780
918
  output: 75,
919
+ cachedInput: 1.5,
920
+ cacheCreationInput: 18.75,
781
921
  maxInputTokens: 2e5
782
922
  },
783
923
  "claude-4-sonnet-20250514": {
784
924
  input: 3,
785
925
  output: 15,
926
+ cachedInput: 0.3,
927
+ cacheCreationInput: 3.75,
786
928
  maxInputTokens: 1e6
787
929
  },
788
930
  "claude-sonnet-4-5-20250929": {
789
931
  input: 3,
790
932
  output: 15,
933
+ cachedInput: 0.3,
934
+ cacheCreationInput: 3.75,
791
935
  maxInputTokens: 2e5
792
936
  },
793
937
  "claude-sonnet-4-5-20250929-v1:0": {
794
938
  input: 3,
795
939
  output: 15,
940
+ cachedInput: 0.3,
941
+ cacheCreationInput: 3.75,
796
942
  maxInputTokens: 2e5
797
943
  },
798
944
  "claude-opus-4-1-20250805": {
799
945
  input: 15,
800
946
  output: 75,
947
+ cachedInput: 1.5,
948
+ cacheCreationInput: 18.75,
801
949
  maxInputTokens: 2e5
802
950
  },
803
951
  "claude-opus-4-20250514": {
804
952
  input: 15,
805
953
  output: 75,
954
+ cachedInput: 1.5,
955
+ cacheCreationInput: 18.75,
806
956
  maxInputTokens: 2e5
807
957
  },
808
958
  "claude-opus-4-5-20251101": {
809
959
  input: 5,
810
960
  output: 25,
961
+ cachedInput: 0.5,
962
+ cacheCreationInput: 6.25,
811
963
  maxInputTokens: 2e5
812
964
  },
813
965
  "claude-opus-4-6-20260205": {
814
966
  input: 5,
815
967
  output: 25,
968
+ cachedInput: 0.5,
969
+ cacheCreationInput: 6.25,
816
970
  maxInputTokens: 1e6
817
971
  },
818
972
  "claude-opus-4-7-20260416": {
819
973
  input: 5,
820
974
  output: 25,
975
+ cachedInput: 0.5,
976
+ cacheCreationInput: 6.25,
821
977
  maxInputTokens: 1e6
822
978
  },
823
979
  "claude-sonnet-4-20250514": {
824
980
  input: 3,
825
981
  output: 15,
982
+ cachedInput: 0.3,
983
+ cacheCreationInput: 3.75,
826
984
  maxInputTokens: 1e6
827
985
  },
828
986
  "codex-mini-latest": {
829
987
  input: 1.5,
830
988
  output: 6,
989
+ cachedInput: 0.375,
831
990
  maxInputTokens: 2e5
832
991
  },
833
992
  "deepseek-ai/deepseek-r1": {
@@ -877,6 +1036,7 @@ var prices_default = {
877
1036
  "deepseek-ai/deepseek-v3.1-terminus": {
878
1037
  input: 0.27,
879
1038
  output: 1,
1039
+ cachedInput: 0.216,
880
1040
  maxInputTokens: 163840
881
1041
  },
882
1042
  "deepseek-coder": {
@@ -887,26 +1047,31 @@ var prices_default = {
887
1047
  "gemini-2.0-flash": {
888
1048
  input: 0.1,
889
1049
  output: 0.4,
1050
+ cachedInput: 0.025,
890
1051
  maxInputTokens: 1048576
891
1052
  },
892
1053
  "gemini-2.0-flash-001": {
893
1054
  input: 0.1,
894
1055
  output: 0.4,
1056
+ cachedInput: 0.025,
895
1057
  maxInputTokens: 1048576
896
1058
  },
897
1059
  "gemini-2.0-flash-lite": {
898
1060
  input: 0.075,
899
1061
  output: 0.3,
1062
+ cachedInput: 0.01875,
900
1063
  maxInputTokens: 1048576
901
1064
  },
902
1065
  "gemini-2.0-flash-lite-001": {
903
1066
  input: 0.075,
904
1067
  output: 0.3,
1068
+ cachedInput: 0.01875,
905
1069
  maxInputTokens: 1048576
906
1070
  },
907
1071
  "gemini-2.5-flash-image": {
908
1072
  input: 0.3,
909
1073
  output: 2.5,
1074
+ cachedInput: 0.03,
910
1075
  maxInputTokens: 32768
911
1076
  },
912
1077
  "gemini-3-pro-image-preview": {
@@ -922,51 +1087,61 @@ var prices_default = {
922
1087
  "gemini-3.1-flash-lite-preview": {
923
1088
  input: 0.25,
924
1089
  output: 1.5,
1090
+ cachedInput: 0.025,
925
1091
  maxInputTokens: 1048576
926
1092
  },
927
1093
  "gemini-2.5-flash-lite": {
928
1094
  input: 0.1,
929
1095
  output: 0.4,
1096
+ cachedInput: 0.01,
930
1097
  maxInputTokens: 1048576
931
1098
  },
932
1099
  "gemini-2.5-flash-lite-preview-09-2025": {
933
1100
  input: 0.1,
934
1101
  output: 0.4,
1102
+ cachedInput: 0.01,
935
1103
  maxInputTokens: 1048576
936
1104
  },
937
1105
  "gemini-2.5-flash-preview-09-2025": {
938
1106
  input: 0.3,
939
1107
  output: 2.5,
1108
+ cachedInput: 0.075,
940
1109
  maxInputTokens: 1048576
941
1110
  },
942
1111
  "gemini-live-2.5-flash-preview-native-audio-09-2025": {
943
1112
  input: 0.3,
944
1113
  output: 2,
1114
+ cachedInput: 0.075,
945
1115
  maxInputTokens: 1048576
946
1116
  },
947
1117
  "gemini-2.5-flash-lite-preview-06-17": {
948
1118
  input: 0.1,
949
1119
  output: 0.4,
1120
+ cachedInput: 0.025,
950
1121
  maxInputTokens: 1048576
951
1122
  },
952
1123
  "gemini-3-pro-preview": {
953
1124
  input: 2,
954
1125
  output: 12,
1126
+ cachedInput: 0.2,
955
1127
  maxInputTokens: 1048576
956
1128
  },
957
1129
  "gemini-3.1-pro-preview": {
958
1130
  input: 2,
959
1131
  output: 12,
1132
+ cachedInput: 0.2,
960
1133
  maxInputTokens: 1048576
961
1134
  },
962
1135
  "gemini-3.1-pro-preview-customtools": {
963
1136
  input: 2,
964
1137
  output: 12,
1138
+ cachedInput: 0.2,
965
1139
  maxInputTokens: 1048576
966
1140
  },
967
1141
  "gemini-3-flash-preview": {
968
1142
  input: 0.5,
969
1143
  output: 3,
1144
+ cachedInput: 0.05,
970
1145
  maxInputTokens: 1048576
971
1146
  },
972
1147
  "gemini-robotics-er-1.5-preview": {
@@ -982,11 +1157,13 @@ var prices_default = {
982
1157
  "gemini-flash-latest": {
983
1158
  input: 0.3,
984
1159
  output: 2.5,
1160
+ cachedInput: 0.03,
985
1161
  maxInputTokens: 1048576
986
1162
  },
987
1163
  "gemini-flash-lite-latest": {
988
1164
  input: 0.1,
989
1165
  output: 0.4,
1166
+ cachedInput: 0.01,
990
1167
  maxInputTokens: 1048576
991
1168
  },
992
1169
  "gemini-gemma-2-27b-it": {
@@ -1062,39 +1239,47 @@ var prices_default = {
1062
1239
  "gpt-4o-mini-realtime-preview": {
1063
1240
  input: 0.6,
1064
1241
  output: 2.4,
1242
+ cachedInput: 0.3,
1065
1243
  maxInputTokens: 128e3
1066
1244
  },
1067
1245
  "gpt-4o-realtime-preview": {
1068
1246
  input: 5,
1069
1247
  output: 20,
1248
+ cachedInput: 2.5,
1070
1249
  maxInputTokens: 128e3
1071
1250
  },
1072
1251
  "gpt-4o-realtime-preview-2025-06-03": {
1073
1252
  input: 5,
1074
1253
  output: 20,
1254
+ cachedInput: 2.5,
1075
1255
  maxInputTokens: 128e3
1076
1256
  },
1077
1257
  "gpt-image-1.5": {
1078
1258
  input: 5,
1079
- output: 10
1259
+ output: 10,
1260
+ cachedInput: 1.25
1080
1261
  },
1081
1262
  "gpt-image-1.5-2025-12-16": {
1082
1263
  input: 5,
1083
- output: 10
1264
+ output: 10,
1265
+ cachedInput: 1.25
1084
1266
  },
1085
1267
  "gpt-5.1-chat-latest": {
1086
1268
  input: 1.25,
1087
1269
  output: 10,
1270
+ cachedInput: 0.125,
1088
1271
  maxInputTokens: 128e3
1089
1272
  },
1090
1273
  "gpt-5.2-chat-latest": {
1091
1274
  input: 1.75,
1092
1275
  output: 14,
1276
+ cachedInput: 0.175,
1093
1277
  maxInputTokens: 128e3
1094
1278
  },
1095
1279
  "gpt-5.3-chat-latest": {
1096
1280
  input: 1.75,
1097
1281
  output: 14,
1282
+ cachedInput: 0.175,
1098
1283
  maxInputTokens: 128e3
1099
1284
  },
1100
1285
  "gpt-5-pro-2025-10-06": {
@@ -1105,11 +1290,13 @@ var prices_default = {
1105
1290
  "gpt-realtime": {
1106
1291
  input: 4,
1107
1292
  output: 16,
1293
+ cachedInput: 0.4,
1108
1294
  maxInputTokens: 32e3
1109
1295
  },
1110
1296
  "gpt-realtime-1.5": {
1111
1297
  input: 4,
1112
1298
  output: 16,
1299
+ cachedInput: 0.4,
1113
1300
  maxInputTokens: 32e3
1114
1301
  },
1115
1302
  "gpt-realtime-mini": {
@@ -1156,6 +1343,7 @@ var prices_default = {
1156
1343
  o1: {
1157
1344
  input: 15,
1158
1345
  output: 60,
1346
+ cachedInput: 7.5,
1159
1347
  maxInputTokens: 2e5
1160
1348
  },
1161
1349
  "o1-pro": {
@@ -1171,6 +1359,7 @@ var prices_default = {
1171
1359
  o3: {
1172
1360
  input: 2,
1173
1361
  output: 8,
1362
+ cachedInput: 0.5,
1174
1363
  maxInputTokens: 2e5
1175
1364
  },
1176
1365
  "gpt-oss-20b": {
@@ -1195,6 +1384,8 @@ var prices_default = {
1195
1384
  "claude-haiku-4-5@20251001": {
1196
1385
  input: 1,
1197
1386
  output: 5,
1387
+ cachedInput: 0.1,
1388
+ cacheCreationInput: 1.25,
1198
1389
  maxInputTokens: 2e5
1199
1390
  },
1200
1391
  "claude-3-5-sonnet": {
@@ -1210,6 +1401,8 @@ var prices_default = {
1210
1401
  "claude-3-7-sonnet@20250219": {
1211
1402
  input: 3,
1212
1403
  output: 15,
1404
+ cachedInput: 0.3,
1405
+ cacheCreationInput: 3.75,
1213
1406
  maxInputTokens: 2e5
1214
1407
  },
1215
1408
  "claude-3-haiku": {
@@ -1245,46 +1438,64 @@ var prices_default = {
1245
1438
  "claude-opus-4": {
1246
1439
  input: 15,
1247
1440
  output: 75,
1441
+ cachedInput: 1.5,
1442
+ cacheCreationInput: 18.75,
1248
1443
  maxInputTokens: 2e5
1249
1444
  },
1250
1445
  "claude-opus-4-1@20250805": {
1251
1446
  input: 15,
1252
1447
  output: 75,
1448
+ cachedInput: 1.5,
1449
+ cacheCreationInput: 18.75,
1253
1450
  maxInputTokens: 2e5
1254
1451
  },
1255
1452
  "claude-opus-4-5@20251101": {
1256
1453
  input: 5,
1257
1454
  output: 25,
1455
+ cachedInput: 0.5,
1456
+ cacheCreationInput: 6.25,
1258
1457
  maxInputTokens: 2e5
1259
1458
  },
1260
1459
  "claude-opus-4-6@default": {
1261
1460
  input: 5,
1262
1461
  output: 25,
1462
+ cachedInput: 0.5,
1463
+ cacheCreationInput: 6.25,
1263
1464
  maxInputTokens: 1e6
1264
1465
  },
1265
1466
  "claude-opus-4-7@default": {
1266
1467
  input: 5,
1267
1468
  output: 25,
1469
+ cachedInput: 0.5,
1470
+ cacheCreationInput: 6.25,
1268
1471
  maxInputTokens: 1e6
1269
1472
  },
1270
1473
  "claude-sonnet-4-5@20250929": {
1271
1474
  input: 3,
1272
1475
  output: 15,
1476
+ cachedInput: 0.3,
1477
+ cacheCreationInput: 3.75,
1273
1478
  maxInputTokens: 2e5
1274
1479
  },
1275
1480
  "claude-opus-4@20250514": {
1276
1481
  input: 15,
1277
1482
  output: 75,
1483
+ cachedInput: 1.5,
1484
+ cacheCreationInput: 18.75,
1278
1485
  maxInputTokens: 2e5
1279
1486
  },
1280
1487
  "claude-sonnet-4": {
1281
1488
  input: 3,
1282
1489
  output: 15,
1490
+ cachedInput: 0.3,
1491
+ cacheCreationInput: 3.75,
1283
1492
  maxInputTokens: 1e6
1284
1493
  },
1285
1494
  "claude-sonnet-4@20250514": {
1286
1495
  input: 3,
1287
1496
  output: 15,
1497
+ cachedInput: 0.3,
1498
+ cacheCreationInput: 3.75,
1288
1499
  maxInputTokens: 1e6
1289
1500
  },
1290
1501
  "deepseek-ai/deepseek-v3.1-maas": {
@@ -1334,6 +1545,7 @@ var prices_default = {
1334
1545
  "gpt-realtime-mini-2025-12-15": {
1335
1546
  input: 0.6,
1336
1547
  output: 2.4,
1548
+ cachedInput: 0.06,
1337
1549
  maxInputTokens: 128e3
1338
1550
  },
1339
1551
  "gemini-2.5-flash-native-audio-latest": {
@@ -1359,16 +1571,20 @@ var prices_default = {
1359
1571
  "gemini-pro-latest": {
1360
1572
  input: 1.25,
1361
1573
  output: 10,
1574
+ cachedInput: 0.125,
1362
1575
  maxInputTokens: 1048576
1363
1576
  },
1364
1577
  "gemini-exp-1206": {
1365
1578
  input: 0.3,
1366
1579
  output: 2.5,
1580
+ cachedInput: 0.03,
1367
1581
  maxInputTokens: 1048576
1368
1582
  },
1369
1583
  "claude-sonnet-4-6@default": {
1370
1584
  input: 3,
1371
1585
  output: 15,
1586
+ cachedInput: 0.3,
1587
+ cacheCreationInput: 3.75,
1372
1588
  maxInputTokens: 1e6
1373
1589
  }
1374
1590
  }
@@ -1376,11 +1592,19 @@ var prices_default = {
1376
1592
 
1377
1593
  // src/core/tracker.ts
1378
1594
  var bundledPrices = prices_default.models;
1595
+ var bundledUpdatedAt = prices_default.updated_at ?? "";
1379
1596
  var ModelPriceSchema = import_zod.z.object({
1380
1597
  input: import_zod.z.number().nonnegative(),
1381
1598
  output: import_zod.z.number().nonnegative(),
1599
+ cachedInput: import_zod.z.number().nonnegative().optional(),
1600
+ cacheCreationInput: import_zod.z.number().nonnegative().optional(),
1382
1601
  maxInputTokens: import_zod.z.number().positive().optional()
1383
1602
  });
1603
+ var BudgetConfigSchema = import_zod.z.object({
1604
+ threshold: import_zod.z.number().positive(),
1605
+ webhookUrl: import_zod.z.string().url(),
1606
+ mode: import_zod.z.enum(["once", "always"]).optional().default("once")
1607
+ });
1384
1608
  var TrackerConfigSchema = import_zod.z.object({
1385
1609
  storage: import_zod.z.union([import_zod.z.enum(["memory", "sqlite"]), import_zod.z.custom((v) => {
1386
1610
  return v !== null && typeof v === "object" && typeof v.record === "function" && typeof v.getAll === "function" && typeof v.clearAll === "function" && typeof v.clearSession === "function";
@@ -1388,7 +1612,13 @@ var TrackerConfigSchema = import_zod.z.object({
1388
1612
  alertThreshold: import_zod.z.number().positive().optional(),
1389
1613
  webhookUrl: import_zod.z.string().url().optional(),
1390
1614
  syncPrices: import_zod.z.boolean().optional().default(true),
1391
- customPrices: import_zod.z.record(import_zod.z.string(), ModelPriceSchema).optional()
1615
+ customPrices: import_zod.z.record(import_zod.z.string(), ModelPriceSchema).optional(),
1616
+ warnIfStaleAfterHours: import_zod.z.number().nonnegative().optional().default(72),
1617
+ budgets: import_zod.z.object({
1618
+ perUser: BudgetConfigSchema.optional(),
1619
+ perSession: BudgetConfigSchema.optional()
1620
+ }).optional(),
1621
+ suggestions: import_zod.z.boolean().optional().default(false)
1392
1622
  });
1393
1623
  function createTracker(config = {}) {
1394
1624
  const parsed = TrackerConfigSchema.safeParse(config);
@@ -1402,19 +1632,45 @@ ${issues}`);
1402
1632
  alertThreshold,
1403
1633
  webhookUrl,
1404
1634
  syncPrices,
1405
- customPrices
1635
+ customPrices,
1636
+ warnIfStaleAfterHours,
1637
+ budgets,
1638
+ suggestions
1406
1639
  } = parsed.data;
1407
1640
  const storage = typeof storageOption === "object" ? storageOption : createStorage(storageOption);
1408
1641
  let remotePrices;
1642
+ let pricesUpdatedAt = bundledUpdatedAt;
1409
1643
  if (syncPrices) {
1410
1644
  getRemotePrices().then((result) => {
1411
- if (result) remotePrices = result;
1645
+ if (result) {
1646
+ remotePrices = result.models;
1647
+ pricesUpdatedAt = result.updated_at;
1648
+ }
1412
1649
  }).catch(() => {
1413
1650
  });
1414
1651
  }
1652
+ let stalenessChecked = false;
1653
+ function maybeWarnStaleness() {
1654
+ if (stalenessChecked || !warnIfStaleAfterHours) return;
1655
+ stalenessChecked = true;
1656
+ if (!pricesUpdatedAt) return;
1657
+ try {
1658
+ const updatedMs = new Date(pricesUpdatedAt).getTime();
1659
+ const ageHours = (Date.now() - updatedMs) / (1e3 * 60 * 60);
1660
+ if (ageHours > warnIfStaleAfterHours) {
1661
+ console.warn(
1662
+ `[tokenwatch] Price data is ${Math.round(ageHours)}h old (updated_at: ${pricesUpdatedAt}). Run "tokenwatch sync" to refresh, or set warnIfStaleAfterHours: 0 to suppress.`
1663
+ );
1664
+ }
1665
+ } catch {
1666
+ }
1667
+ }
1415
1668
  let alertFired = false;
1669
+ const firedUserAlerts = /* @__PURE__ */ new Set();
1670
+ const firedSessionAlerts = /* @__PURE__ */ new Set();
1416
1671
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
1417
1672
  function resolveModelPrice(model) {
1673
+ maybeWarnStaleness();
1418
1674
  return resolvePrice(model, {
1419
1675
  bundledPrices,
1420
1676
  ...customPrices !== void 0 && { customPrices },
@@ -1423,39 +1679,94 @@ ${issues}`);
1423
1679
  }
1424
1680
  function track(entry) {
1425
1681
  const price = resolveModelPrice(entry.model);
1426
- const costUSD = calculateCost(entry.inputTokens, entry.outputTokens, price);
1682
+ const costUSD = calculateCost(
1683
+ entry.inputTokens,
1684
+ entry.outputTokens,
1685
+ price,
1686
+ entry.cachedTokens,
1687
+ entry.cacheCreationTokens
1688
+ );
1427
1689
  const full = {
1428
1690
  ...entry,
1429
1691
  costUSD,
1430
1692
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
1431
1693
  };
1432
1694
  storage.record(full);
1433
- maybeFireAlert();
1695
+ maybeFireAlerts(full);
1696
+ if (suggestions) {
1697
+ maybeSuggestCheaperModel(entry.model, costUSD, entry.inputTokens, entry.outputTokens, {
1698
+ bundledPrices,
1699
+ ...customPrices !== void 0 && { customPrices },
1700
+ ...remotePrices !== void 0 && { remotePrices }
1701
+ });
1702
+ }
1434
1703
  }
1435
- function maybeFireAlert() {
1436
- if (!alertThreshold || !webhookUrl || alertFired) return;
1437
- alertFired = true;
1438
- Promise.resolve(storage.getAll()).then((entries) => {
1439
- const total = computeTotal(entries);
1440
- if (total < alertThreshold) {
1441
- alertFired = false;
1442
- return;
1443
- }
1444
- const payload = {
1445
- text: `[tokenwatch] Alert: total cost reached $${total.toFixed(4)} USD (threshold: $${alertThreshold})`
1446
- };
1447
- fetch(webhookUrl, {
1448
- method: "POST",
1449
- headers: { "Content-Type": "application/json" },
1450
- body: JSON.stringify(payload)
1704
+ function maybeFireAlerts(entry) {
1705
+ if (alertThreshold && webhookUrl && !alertFired) {
1706
+ alertFired = true;
1707
+ Promise.resolve(storage.getAll()).then((entries) => {
1708
+ const total = computeTotal(entries);
1709
+ if (total < alertThreshold) {
1710
+ alertFired = false;
1711
+ return;
1712
+ }
1713
+ fireWebhook(webhookUrl, {
1714
+ text: `[tokenwatch] Alert: total cost reached $${total.toFixed(4)} USD (threshold: $${alertThreshold})`
1715
+ });
1451
1716
  }).catch(() => {
1717
+ alertFired = false;
1452
1718
  });
1719
+ }
1720
+ if (budgets?.perUser && entry.userId) {
1721
+ const cfg = budgets.perUser;
1722
+ const uid = entry.userId;
1723
+ if (cfg.mode === "always" || !firedUserAlerts.has(uid)) {
1724
+ if (cfg.mode !== "always") firedUserAlerts.add(uid);
1725
+ Promise.resolve(storage.getAll()).then((entries) => {
1726
+ const userCost = entries.filter((e) => e.userId === uid).reduce((s, e) => s + e.costUSD, 0);
1727
+ if (userCost >= cfg.threshold) {
1728
+ fireWebhook(cfg.webhookUrl, {
1729
+ text: `[tokenwatch] Budget alert: user "${uid}" reached $${userCost.toFixed(4)} USD (threshold: $${cfg.threshold})`
1730
+ });
1731
+ } else {
1732
+ if (cfg.mode !== "always") firedUserAlerts.delete(uid);
1733
+ }
1734
+ }).catch(() => {
1735
+ if (cfg.mode !== "always") firedUserAlerts.delete(uid);
1736
+ });
1737
+ }
1738
+ }
1739
+ if (budgets?.perSession && entry.sessionId) {
1740
+ const cfg = budgets.perSession;
1741
+ const sid = entry.sessionId;
1742
+ if (cfg.mode === "always" || !firedSessionAlerts.has(sid)) {
1743
+ if (cfg.mode !== "always") firedSessionAlerts.add(sid);
1744
+ Promise.resolve(storage.getAll()).then((entries) => {
1745
+ const sessionCost = entries.filter((e) => e.sessionId === sid).reduce((s, e) => s + e.costUSD, 0);
1746
+ if (sessionCost >= cfg.threshold) {
1747
+ fireWebhook(cfg.webhookUrl, {
1748
+ text: `[tokenwatch] Budget alert: session "${sid}" reached $${sessionCost.toFixed(4)} USD (threshold: $${cfg.threshold})`
1749
+ });
1750
+ } else {
1751
+ if (cfg.mode !== "always") firedSessionAlerts.delete(sid);
1752
+ }
1753
+ }).catch(() => {
1754
+ if (cfg.mode !== "always") firedSessionAlerts.delete(sid);
1755
+ });
1756
+ }
1757
+ }
1758
+ }
1759
+ function fireWebhook(url, payload) {
1760
+ fetch(url, {
1761
+ method: "POST",
1762
+ headers: { "Content-Type": "application/json" },
1763
+ body: JSON.stringify(payload)
1453
1764
  }).catch(() => {
1454
- alertFired = false;
1455
1765
  });
1456
1766
  }
1457
- async function getReport() {
1458
- const entries = await Promise.resolve(storage.getAll());
1767
+ async function getReport(options) {
1768
+ const allEntries = await Promise.resolve(storage.getAll());
1769
+ const entries = filterEntries(allEntries, options);
1459
1770
  const byModel = {};
1460
1771
  const bySession = {};
1461
1772
  const byUser = {};
@@ -1463,18 +1774,24 @@ ${issues}`);
1463
1774
  let totalInput = 0;
1464
1775
  let totalOutput = 0;
1465
1776
  let totalCost = 0;
1466
- let lastTimestamp = startedAt;
1777
+ let periodFrom = options ? entries[0]?.timestamp ?? startedAt : startedAt;
1778
+ let lastTimestamp = periodFrom;
1467
1779
  for (const e of entries) {
1468
- totalInput += e.inputTokens;
1780
+ totalInput += e.inputTokens + (e.cachedTokens ?? 0) + (e.cacheCreationTokens ?? 0);
1469
1781
  totalOutput += e.outputTokens;
1470
1782
  totalCost += e.costUSD;
1471
1783
  if (e.timestamp > lastTimestamp) lastTimestamp = e.timestamp;
1472
- const m = byModel[e.model] ??= { costUSD: 0, calls: 0, tokens: { input: 0, output: 0, reasoning: 0 } };
1784
+ const m = byModel[e.model] ??= {
1785
+ costUSD: 0,
1786
+ calls: 0,
1787
+ tokens: { input: 0, output: 0, reasoning: 0, cached: 0 }
1788
+ };
1473
1789
  m.costUSD += e.costUSD;
1474
1790
  m.calls += 1;
1475
- m.tokens.input += e.inputTokens;
1791
+ m.tokens.input += e.inputTokens + (e.cachedTokens ?? 0) + (e.cacheCreationTokens ?? 0);
1476
1792
  m.tokens.output += e.outputTokens;
1477
1793
  m.tokens.reasoning += e.reasoningTokens ?? 0;
1794
+ m.tokens.cached += e.cachedTokens ?? 0;
1478
1795
  if (e.sessionId) {
1479
1796
  const s = bySession[e.sessionId] ??= { costUSD: 0, calls: 0 };
1480
1797
  s.costUSD += e.costUSD;
@@ -1491,6 +1808,9 @@ ${issues}`);
1491
1808
  f.calls += 1;
1492
1809
  }
1493
1810
  }
1811
+ if (options && entries.length > 0) {
1812
+ periodFrom = entries[0]?.timestamp ?? periodFrom;
1813
+ }
1494
1814
  return {
1495
1815
  totalCostUSD: totalCost,
1496
1816
  totalTokens: { input: totalInput, output: totalOutput },
@@ -1498,22 +1818,66 @@ ${issues}`);
1498
1818
  bySession,
1499
1819
  byUser,
1500
1820
  byFeature,
1501
- period: { from: startedAt, to: lastTimestamp }
1821
+ period: { from: periodFrom, to: lastTimestamp },
1822
+ ...pricesUpdatedAt ? { pricesUpdatedAt } : {}
1823
+ };
1824
+ }
1825
+ async function getCostForecast(options = {}) {
1826
+ const windowHours = options.windowHours ?? 24;
1827
+ const allEntries = await Promise.resolve(storage.getAll());
1828
+ const now = Date.now();
1829
+ const windowStart = now - windowHours * 60 * 60 * 1e3;
1830
+ const windowEntries = allEntries.filter(
1831
+ (e) => new Date(e.timestamp).getTime() >= windowStart
1832
+ );
1833
+ if (windowEntries.length < 2) {
1834
+ return {
1835
+ burnRatePerHour: 0,
1836
+ projectedDailyCostUSD: 0,
1837
+ projectedMonthlyCostUSD: 0,
1838
+ basedOnHours: 0,
1839
+ basedOnPeriod: null
1840
+ };
1841
+ }
1842
+ const first = windowEntries[0]?.timestamp ?? "";
1843
+ const last = windowEntries[windowEntries.length - 1]?.timestamp ?? "";
1844
+ const actualMs = new Date(last).getTime() - new Date(first).getTime();
1845
+ const actualHours = actualMs / (1e3 * 60 * 60);
1846
+ if (actualHours < 1e-3) {
1847
+ return {
1848
+ burnRatePerHour: 0,
1849
+ projectedDailyCostUSD: 0,
1850
+ projectedMonthlyCostUSD: 0,
1851
+ basedOnHours: 0,
1852
+ basedOnPeriod: { from: first, to: last }
1853
+ };
1854
+ }
1855
+ const totalCost = windowEntries.reduce((s, e) => s + e.costUSD, 0);
1856
+ const burnRatePerHour = totalCost / actualHours;
1857
+ return {
1858
+ burnRatePerHour,
1859
+ projectedDailyCostUSD: burnRatePerHour * 24,
1860
+ projectedMonthlyCostUSD: burnRatePerHour * 24 * 30,
1861
+ basedOnHours: Math.round(actualHours * 100) / 100,
1862
+ basedOnPeriod: { from: first, to: last }
1502
1863
  };
1503
1864
  }
1504
1865
  async function reset() {
1505
1866
  await Promise.resolve(storage.clearAll());
1506
1867
  alertFired = false;
1868
+ firedUserAlerts.clear();
1869
+ firedSessionAlerts.clear();
1507
1870
  }
1508
1871
  async function resetSession(sessionId) {
1509
1872
  await Promise.resolve(storage.clearSession(sessionId));
1873
+ firedSessionAlerts.delete(sessionId);
1510
1874
  }
1511
1875
  async function exportJSON() {
1512
1876
  return JSON.stringify(await getReport(), null, 2);
1513
1877
  }
1514
1878
  async function exportCSV() {
1515
1879
  const entries = await Promise.resolve(storage.getAll());
1516
- const header = "timestamp,model,inputTokens,outputTokens,reasoningTokens,costUSD,sessionId,userId,feature";
1880
+ const header = "timestamp,model,inputTokens,outputTokens,reasoningTokens,cachedTokens,cacheCreationTokens,costUSD,sessionId,userId,feature";
1517
1881
  const rows = entries.map(
1518
1882
  (e) => [
1519
1883
  csvEscape(e.timestamp),
@@ -1521,6 +1885,8 @@ ${issues}`);
1521
1885
  e.inputTokens,
1522
1886
  e.outputTokens,
1523
1887
  e.reasoningTokens ?? 0,
1888
+ e.cachedTokens ?? 0,
1889
+ e.cacheCreationTokens ?? 0,
1524
1890
  e.costUSD.toFixed(8),
1525
1891
  csvEscape(e.sessionId ?? ""),
1526
1892
  csvEscape(e.userId ?? ""),
@@ -1536,11 +1902,47 @@ ${issues}`);
1536
1902
  ...remotePrices !== void 0 && { remotePrices }
1537
1903
  }) ?? null;
1538
1904
  }
1539
- return { track, getReport, reset, resetSession, exportJSON, exportCSV, getModelInfo };
1905
+ return {
1906
+ track,
1907
+ getReport,
1908
+ getCostForecast,
1909
+ reset,
1910
+ resetSession,
1911
+ exportJSON,
1912
+ exportCSV,
1913
+ getModelInfo
1914
+ };
1540
1915
  }
1541
1916
  function computeTotal(entries) {
1542
1917
  return entries.reduce((sum, e) => sum + e.costUSD, 0);
1543
1918
  }
1919
+ function parseLastMs(last) {
1920
+ const match = /^(\d+(?:\.\d+)?)(h|d)$/.exec(last.trim());
1921
+ if (!match) throw new Error(`[tokenwatch] Invalid "last" value: "${last}". Use e.g. "24h", "7d".`);
1922
+ const value = parseFloat(match[1] ?? "0");
1923
+ const unit = match[2] ?? "h";
1924
+ return unit === "h" ? value * 60 * 60 * 1e3 : value * 24 * 60 * 60 * 1e3;
1925
+ }
1926
+ function filterEntries(entries, options) {
1927
+ if (!options) return entries;
1928
+ let sinceMs;
1929
+ let untilMs;
1930
+ if (options.last) {
1931
+ sinceMs = Date.now() - parseLastMs(options.last);
1932
+ } else if (options.since) {
1933
+ sinceMs = new Date(options.since).getTime();
1934
+ }
1935
+ if (options.until) {
1936
+ untilMs = new Date(options.until).getTime();
1937
+ }
1938
+ if (sinceMs === void 0 && untilMs === void 0) return entries;
1939
+ return entries.filter((e) => {
1940
+ const ts = new Date(e.timestamp).getTime();
1941
+ if (sinceMs !== void 0 && ts < sinceMs) return false;
1942
+ if (untilMs !== void 0 && ts > untilMs) return false;
1943
+ return true;
1944
+ });
1945
+ }
1544
1946
  function csvEscape(value) {
1545
1947
  if (value.includes(",") || value.includes('"') || value.includes("\n")) {
1546
1948
  return `"${value.replace(/"/g, '""')}"`;
@@ -1548,6 +1950,71 @@ function csvEscape(value) {
1548
1950
  return value;
1549
1951
  }
1550
1952
 
1953
+ // src/core/lazy-tracker.ts
1954
+ var CSV_HEADER = "timestamp,model,inputTokens,outputTokens,reasoningTokens,cachedTokens,cacheCreationTokens,costUSD,sessionId,userId,feature";
1955
+ function emptyReport() {
1956
+ const now = (/* @__PURE__ */ new Date()).toISOString();
1957
+ return {
1958
+ totalCostUSD: 0,
1959
+ totalTokens: { input: 0, output: 0 },
1960
+ byModel: {},
1961
+ bySession: {},
1962
+ byUser: {},
1963
+ byFeature: {},
1964
+ period: { from: now, to: now }
1965
+ };
1966
+ }
1967
+ function zeroForecast() {
1968
+ return {
1969
+ burnRatePerHour: 0,
1970
+ projectedDailyCostUSD: 0,
1971
+ projectedMonthlyCostUSD: 0,
1972
+ basedOnHours: 0,
1973
+ basedOnPeriod: null
1974
+ };
1975
+ }
1976
+ function createLazyTracker() {
1977
+ let delegate = null;
1978
+ return {
1979
+ init(config) {
1980
+ if (delegate !== null) {
1981
+ throw new Error(
1982
+ "[tokenwatch] LazyTracker already initialized. init() may only be called once."
1983
+ );
1984
+ }
1985
+ try {
1986
+ delegate = createTracker(config ?? {});
1987
+ } catch (err) {
1988
+ throw err;
1989
+ }
1990
+ },
1991
+ track(entry) {
1992
+ delegate?.track(entry);
1993
+ },
1994
+ async getReport(options) {
1995
+ return delegate?.getReport(options) ?? emptyReport();
1996
+ },
1997
+ async getCostForecast(options) {
1998
+ return delegate?.getCostForecast(options) ?? zeroForecast();
1999
+ },
2000
+ async reset() {
2001
+ await delegate?.reset();
2002
+ },
2003
+ async resetSession(sessionId) {
2004
+ await delegate?.resetSession(sessionId);
2005
+ },
2006
+ async exportJSON() {
2007
+ return delegate?.exportJSON() ?? "{}";
2008
+ },
2009
+ async exportCSV() {
2010
+ return delegate?.exportCSV() ?? CSV_HEADER;
2011
+ },
2012
+ getModelInfo(model) {
2013
+ return delegate?.getModelInfo(model) ?? null;
2014
+ }
2015
+ };
2016
+ }
2017
+
1551
2018
  // src/providers/openai.ts
1552
2019
  function extractMeta(params) {
1553
2020
  const { __sessionId, __userId, __feature, ...cleaned } = params;
@@ -1559,19 +2026,24 @@ function extractMeta(params) {
1559
2026
  };
1560
2027
  }
1561
2028
  function extractUsage(usage) {
1562
- if (!usage) return { inputTokens: 0, outputTokens: 0, reasoningTokens: 0 };
2029
+ if (!usage) return { inputTokens: 0, outputTokens: 0, reasoningTokens: 0, cachedTokens: 0 };
2030
+ const totalInput = usage.prompt_tokens ?? usage.input_tokens ?? 0;
2031
+ const cachedTokens = usage.prompt_tokens_details?.cached_tokens ?? 0;
1563
2032
  return {
1564
- inputTokens: usage.prompt_tokens ?? usage.input_tokens ?? 0,
2033
+ // inputTokens = regular (non-cached) input; OpenAI prompt_tokens includes cached tokens
2034
+ inputTokens: totalInput - cachedTokens,
1565
2035
  outputTokens: usage.completion_tokens ?? usage.output_tokens ?? 0,
1566
- reasoningTokens: usage.completion_tokens_details?.reasoning_tokens ?? 0
2036
+ reasoningTokens: usage.completion_tokens_details?.reasoning_tokens ?? 0,
2037
+ cachedTokens
1567
2038
  };
1568
2039
  }
1569
- function trackWithMeta(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature) {
2040
+ function trackWithMeta(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature, cachedTokens = 0) {
1570
2041
  tracker.track({
1571
2042
  model,
1572
2043
  inputTokens,
1573
2044
  outputTokens: outputTokens + reasoningTokens,
1574
2045
  ...reasoningTokens > 0 && { reasoningTokens },
2046
+ ...cachedTokens > 0 && { cachedTokens },
1575
2047
  ...sessionId !== void 0 && { sessionId },
1576
2048
  ...userId !== void 0 && { userId },
1577
2049
  ...feature !== void 0 && { feature }
@@ -1583,13 +2055,13 @@ async function* wrapStream(stream, model, sessionId, userId, feature, tracker) {
1583
2055
  lastChunk = chunk;
1584
2056
  yield chunk;
1585
2057
  }
1586
- const { inputTokens, outputTokens, reasoningTokens } = extractUsage(lastChunk?.usage);
2058
+ const { inputTokens, outputTokens, reasoningTokens, cachedTokens } = extractUsage(lastChunk?.usage);
1587
2059
  if (!lastChunk?.usage) {
1588
2060
  console.warn(
1589
2061
  `[tokenwatch] No usage data in stream for model "${model}". Cost recorded as $0. Pass stream_options: { include_usage: true } to get accurate costs.`
1590
2062
  );
1591
2063
  }
1592
- trackWithMeta(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature);
2064
+ trackWithMeta(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature, cachedTokens);
1593
2065
  }
1594
2066
  function wrapOpenAI(client, tracker) {
1595
2067
  const proxiedCompletions = new Proxy(client.chat.completions, {
@@ -1611,7 +2083,7 @@ function wrapOpenAI(client, tracker) {
1611
2083
  );
1612
2084
  }
1613
2085
  const completion = result;
1614
- const { inputTokens, outputTokens, reasoningTokens } = extractUsage(completion.usage);
2086
+ const { inputTokens, outputTokens, reasoningTokens, cachedTokens } = extractUsage(completion.usage);
1615
2087
  trackWithMeta(
1616
2088
  tracker,
1617
2089
  completion.model ?? model,
@@ -1620,7 +2092,8 @@ function wrapOpenAI(client, tracker) {
1620
2092
  reasoningTokens,
1621
2093
  sessionId,
1622
2094
  userId,
1623
- feature
2095
+ feature,
2096
+ cachedTokens
1624
2097
  );
1625
2098
  return result;
1626
2099
  };
@@ -1667,10 +2140,12 @@ function extractMeta2(params) {
1667
2140
  };
1668
2141
  }
1669
2142
  function extractUsage2(usage) {
1670
- if (!usage) return { inputTokens: 0, outputTokens: 0 };
2143
+ if (!usage) return { inputTokens: 0, outputTokens: 0, cachedTokens: 0, cacheCreationTokens: 0 };
1671
2144
  return {
1672
2145
  inputTokens: usage.input_tokens ?? 0,
1673
- outputTokens: usage.output_tokens ?? 0
2146
+ outputTokens: usage.output_tokens ?? 0,
2147
+ cachedTokens: usage.cache_read_input_tokens ?? 0,
2148
+ cacheCreationTokens: usage.cache_creation_input_tokens ?? 0
1674
2149
  };
1675
2150
  }
1676
2151
  function extractThinkingTokenApprox(content) {
@@ -1678,12 +2153,14 @@ function extractThinkingTokenApprox(content) {
1678
2153
  const chars = content.filter((b) => b.type === "thinking").reduce((sum, b) => sum + (b.thinking?.length ?? 0), 0);
1679
2154
  return chars > 0 ? Math.round(chars / 4) : 0;
1680
2155
  }
1681
- function trackWithMeta2(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature) {
2156
+ function trackWithMeta2(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature, cachedTokens = 0, cacheCreationTokens = 0) {
1682
2157
  tracker.track({
1683
2158
  model,
1684
2159
  inputTokens,
1685
2160
  outputTokens,
1686
2161
  ...reasoningTokens > 0 && { reasoningTokens },
2162
+ ...cachedTokens > 0 && { cachedTokens },
2163
+ ...cacheCreationTokens > 0 && { cacheCreationTokens },
1687
2164
  ...sessionId !== void 0 && { sessionId },
1688
2165
  ...userId !== void 0 && { userId },
1689
2166
  ...feature !== void 0 && { feature }
@@ -1692,12 +2169,16 @@ function trackWithMeta2(tracker, model, inputTokens, outputTokens, reasoningToke
1692
2169
  async function* wrapStream2(stream, model, sessionId, userId, feature, tracker) {
1693
2170
  let inputTokens = 0;
1694
2171
  let outputTokens = 0;
2172
+ let cachedTokens = 0;
2173
+ let cacheCreationTokens = 0;
1695
2174
  let currentBlockIsThinking = false;
1696
2175
  let thinkingCharCount = 0;
1697
2176
  for await (const event of stream) {
1698
2177
  yield event;
1699
2178
  if (event.type === "message_start" && event.message?.usage) {
1700
2179
  inputTokens = event.message.usage.input_tokens ?? 0;
2180
+ cachedTokens = event.message.usage.cache_read_input_tokens ?? 0;
2181
+ cacheCreationTokens = event.message.usage.cache_creation_input_tokens ?? 0;
1701
2182
  }
1702
2183
  if (event.type === "message_delta" && event.usage) {
1703
2184
  outputTokens = event.usage.output_tokens ?? 0;
@@ -1713,7 +2194,7 @@ async function* wrapStream2(stream, model, sessionId, userId, feature, tracker)
1713
2194
  }
1714
2195
  }
1715
2196
  const reasoningTokens = thinkingCharCount > 0 ? Math.round(thinkingCharCount / 4) : 0;
1716
- trackWithMeta2(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature);
2197
+ trackWithMeta2(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature, cachedTokens, cacheCreationTokens);
1717
2198
  }
1718
2199
  function wrapAnthropic(client, tracker) {
1719
2200
  const proxiedMessages = new Proxy(client.messages, {
@@ -1735,7 +2216,7 @@ function wrapAnthropic(client, tracker) {
1735
2216
  );
1736
2217
  }
1737
2218
  const message = result;
1738
- const { inputTokens, outputTokens } = extractUsage2(message.usage);
2219
+ const { inputTokens, outputTokens, cachedTokens, cacheCreationTokens } = extractUsage2(message.usage);
1739
2220
  const reasoningTokens = extractThinkingTokenApprox(message.content);
1740
2221
  trackWithMeta2(
1741
2222
  tracker,
@@ -1745,7 +2226,9 @@ function wrapAnthropic(client, tracker) {
1745
2226
  reasoningTokens,
1746
2227
  sessionId,
1747
2228
  userId,
1748
- feature
2229
+ feature,
2230
+ cachedTokens,
2231
+ cacheCreationTokens
1749
2232
  );
1750
2233
  return result;
1751
2234
  };
@@ -1816,6 +2299,7 @@ function wrapGemini(client, tracker) {
1816
2299
  }
1817
2300
  // Annotate the CommonJS export names for ESM import in node:
1818
2301
  0 && (module.exports = {
2302
+ createLazyTracker,
1819
2303
  createTracker,
1820
2304
  wrapAnthropic,
1821
2305
  wrapDeepSeek,