@diogonzafe/tokenwatch 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -20,6 +20,7 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
20
20
  // src/index.ts
21
21
  var src_exports = {};
22
22
  __export(src_exports, {
23
+ createLazyTracker: () => createLazyTracker,
23
24
  createTracker: () => createTracker,
24
25
  wrapAnthropic: () => wrapAnthropic,
25
26
  wrapDeepSeek: () => wrapOpenAI,
@@ -55,8 +56,45 @@ function lookupInMap(model, map) {
55
56
  }
56
57
  return void 0;
57
58
  }
58
- function calculateCost(inputTokens, outputTokens, price) {
59
- return inputTokens / 1e6 * price.input + outputTokens / 1e6 * price.output;
59
+ function calculateCost(inputTokens, outputTokens, price, cachedTokens = 0, cacheCreationTokens = 0) {
60
+ const regularInputCost = inputTokens / 1e6 * price.input;
61
+ const cachedReadCost = cachedTokens / 1e6 * (price.cachedInput ?? price.input);
62
+ const cacheCreationCost = cacheCreationTokens / 1e6 * (price.cacheCreationInput ?? price.input * 1.25);
63
+ const outputCost = outputTokens / 1e6 * price.output;
64
+ return regularInputCost + cachedReadCost + cacheCreationCost + outputCost;
65
+ }
66
+
67
+ // src/core/suggestions.ts
68
+ var PROVIDER_PREFIXES = ["gpt-", "claude-", "gemini-", "deepseek-"];
69
+ function getProviderPrefix(model) {
70
+ return PROVIDER_PREFIXES.find((p) => model.startsWith(p));
71
+ }
72
+ function maybeSuggestCheaperModel(model, costUSD, inputTokens, outputTokens, layers) {
73
+ if (costUSD <= 0) return;
74
+ const prefix = getProviderPrefix(model);
75
+ if (!prefix) return;
76
+ const mergedMap = {
77
+ ...layers.bundledPrices,
78
+ ...layers.remotePrices ?? {},
79
+ ...layers.customPrices ?? {}
80
+ };
81
+ let cheapestModel;
82
+ let cheapestCost = Infinity;
83
+ for (const key of Object.keys(mergedMap)) {
84
+ if (key === model || !key.startsWith(prefix)) continue;
85
+ const price = mergedMap[key];
86
+ if (!price) continue;
87
+ const candidateCost = calculateCost(inputTokens, outputTokens, price);
88
+ if (candidateCost < cheapestCost) {
89
+ cheapestCost = candidateCost;
90
+ cheapestModel = key;
91
+ }
92
+ }
93
+ if (cheapestModel === void 0 || cheapestCost >= costUSD * 0.5) return;
94
+ const savingsPct = Math.round((1 - cheapestCost / costUSD) * 100);
95
+ console.log(
96
+ `[tokenwatch] Suggestion: ${cheapestModel} could handle this for ~$${cheapestCost.toFixed(4)} (${savingsPct}% cheaper than ${model})`
97
+ );
60
98
  }
61
99
 
62
100
  // src/core/storage.ts
@@ -105,29 +143,51 @@ var SqliteStorage = class {
105
143
  migrate() {
106
144
  this.db.exec(`
107
145
  CREATE TABLE IF NOT EXISTS usage (
108
- id INTEGER PRIMARY KEY AUTOINCREMENT,
109
- model TEXT NOT NULL,
110
- input_tokens INTEGER NOT NULL,
111
- output_tokens INTEGER NOT NULL,
112
- cost_usd REAL NOT NULL,
113
- session_id TEXT,
114
- user_id TEXT,
115
- timestamp TEXT NOT NULL
146
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
147
+ model TEXT NOT NULL,
148
+ input_tokens INTEGER NOT NULL,
149
+ output_tokens INTEGER NOT NULL,
150
+ reasoning_tokens INTEGER NOT NULL DEFAULT 0,
151
+ cached_tokens INTEGER NOT NULL DEFAULT 0,
152
+ cache_creation_tokens INTEGER NOT NULL DEFAULT 0,
153
+ cost_usd REAL NOT NULL,
154
+ session_id TEXT,
155
+ user_id TEXT,
156
+ feature TEXT,
157
+ timestamp TEXT NOT NULL
116
158
  )
117
159
  `);
160
+ const cols = this.db.prepare(`PRAGMA table_info(usage)`).all().map((c) => c.name);
161
+ if (!cols.includes("reasoning_tokens")) {
162
+ this.db.exec(`ALTER TABLE usage ADD COLUMN reasoning_tokens INTEGER NOT NULL DEFAULT 0`);
163
+ }
164
+ if (!cols.includes("feature")) {
165
+ this.db.exec(`ALTER TABLE usage ADD COLUMN feature TEXT`);
166
+ }
167
+ if (!cols.includes("cached_tokens")) {
168
+ this.db.exec(`ALTER TABLE usage ADD COLUMN cached_tokens INTEGER NOT NULL DEFAULT 0`);
169
+ }
170
+ if (!cols.includes("cache_creation_tokens")) {
171
+ this.db.exec(`ALTER TABLE usage ADD COLUMN cache_creation_tokens INTEGER NOT NULL DEFAULT 0`);
172
+ }
118
173
  }
119
174
  record(entry) {
120
175
  this.db.prepare(
121
176
  `INSERT INTO usage
122
- (model, input_tokens, output_tokens, cost_usd, session_id, user_id, timestamp)
123
- VALUES (?, ?, ?, ?, ?, ?, ?)`
177
+ (model, input_tokens, output_tokens, reasoning_tokens, cached_tokens, cache_creation_tokens,
178
+ cost_usd, session_id, user_id, feature, timestamp)
179
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
124
180
  ).run(
125
181
  entry.model,
126
182
  entry.inputTokens,
127
183
  entry.outputTokens,
184
+ entry.reasoningTokens ?? 0,
185
+ entry.cachedTokens ?? 0,
186
+ entry.cacheCreationTokens ?? 0,
128
187
  entry.costUSD,
129
188
  entry.sessionId ?? null,
130
189
  entry.userId ?? null,
190
+ entry.feature ?? null,
131
191
  entry.timestamp
132
192
  );
133
193
  }
@@ -137,9 +197,13 @@ var SqliteStorage = class {
137
197
  model: r.model,
138
198
  inputTokens: r.input_tokens,
139
199
  outputTokens: r.output_tokens,
200
+ ...r.reasoning_tokens > 0 && { reasoningTokens: r.reasoning_tokens },
201
+ ...r.cached_tokens > 0 && { cachedTokens: r.cached_tokens },
202
+ ...r.cache_creation_tokens > 0 && { cacheCreationTokens: r.cache_creation_tokens },
140
203
  costUSD: r.cost_usd,
141
204
  ...r.session_id != null && { sessionId: r.session_id },
142
205
  ...r.user_id != null && { userId: r.user_id },
206
+ ...r.feature != null && { feature: r.feature },
143
207
  timestamp: r.timestamp
144
208
  }));
145
209
  }
@@ -171,7 +235,7 @@ async function fetchRemotePrices(url = REMOTE_URL) {
171
235
  const data = await res.json();
172
236
  if (!data?.models) return null;
173
237
  await persistCache(data);
174
- return data.models;
238
+ return { models: data.models, updated_at: data.updated_at ?? "" };
175
239
  } catch {
176
240
  return null;
177
241
  }
@@ -183,7 +247,8 @@ async function loadCachedPrices() {
183
247
  const data = JSON.parse(raw);
184
248
  const age = Date.now() - (data._cachedAt ?? 0);
185
249
  if (age > CACHE_TTL_MS) return null;
186
- return data.models ?? null;
250
+ if (!data.models) return null;
251
+ return { models: data.models, updated_at: data.updated_at ?? "" };
187
252
  } catch {
188
253
  return null;
189
254
  }
@@ -204,87 +269,110 @@ async function getRemotePrices() {
204
269
 
205
270
  // prices.json
206
271
  var prices_default = {
207
- updated_at: "2026-04-21",
272
+ updated_at: "2026-04-22",
208
273
  source: "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json",
209
274
  models: {
210
275
  "gpt-4o": {
211
276
  input: 2.5,
212
277
  output: 10,
278
+ cachedInput: 1.25,
213
279
  maxInputTokens: 128e3
214
280
  },
215
281
  "gpt-4o-mini": {
216
282
  input: 0.15,
217
283
  output: 0.6,
284
+ cachedInput: 0.075,
218
285
  maxInputTokens: 128e3
219
286
  },
220
287
  "gpt-5": {
221
288
  input: 1.25,
222
289
  output: 10,
290
+ cachedInput: 0.125,
223
291
  maxInputTokens: 272e3
224
292
  },
225
293
  "gpt-5-mini": {
226
294
  input: 0.25,
227
295
  output: 2,
296
+ cachedInput: 0.025,
228
297
  maxInputTokens: 272e3
229
298
  },
230
299
  "gpt-5-nano": {
231
300
  input: 0.05,
232
301
  output: 0.4,
302
+ cachedInput: 5e-3,
233
303
  maxInputTokens: 272e3
234
304
  },
235
305
  "claude-opus-4-6": {
236
306
  input: 5,
237
307
  output: 25,
308
+ cachedInput: 0.5,
309
+ cacheCreationInput: 6.25,
238
310
  maxInputTokens: 1e6
239
311
  },
240
312
  "claude-sonnet-4-6": {
241
313
  input: 3,
242
314
  output: 15,
315
+ cachedInput: 0.3,
316
+ cacheCreationInput: 3.75,
243
317
  maxInputTokens: 1e6
244
318
  },
245
319
  "claude-haiku-4-5": {
246
320
  input: 1,
247
321
  output: 5,
322
+ cachedInput: 0.1,
323
+ cacheCreationInput: 1.25,
248
324
  maxInputTokens: 2e5
249
325
  },
250
326
  "gemini-2.5-pro": {
251
327
  input: 1.25,
252
328
  output: 10,
329
+ cachedInput: 0.125,
253
330
  maxInputTokens: 1048576
254
331
  },
255
332
  "gemini-2.5-flash": {
256
333
  input: 0.3,
257
334
  output: 2.5,
335
+ cachedInput: 0.03,
258
336
  maxInputTokens: 1048576
259
337
  },
260
338
  "deepseek-chat": {
261
339
  input: 0.28,
262
340
  output: 0.42,
341
+ cachedInput: 0.028,
263
342
  maxInputTokens: 131072
264
343
  },
265
344
  "deepseek-reasoner": {
266
345
  input: 0.28,
267
346
  output: 0.42,
347
+ cachedInput: 0.028,
268
348
  maxInputTokens: 131072
269
349
  },
270
350
  "claude-opus-4-5": {
271
351
  input: 5,
272
352
  output: 25,
353
+ cachedInput: 0.5,
354
+ cacheCreationInput: 6.25,
273
355
  maxInputTokens: 2e5
274
356
  },
275
357
  "claude-opus-4-7": {
276
358
  input: 5,
277
359
  output: 25,
360
+ cachedInput: 0.5,
361
+ cacheCreationInput: 6.25,
278
362
  maxInputTokens: 1e6
279
363
  },
280
364
  "claude-opus-4-1": {
281
365
  input: 15,
282
366
  output: 75,
367
+ cachedInput: 1.5,
368
+ cacheCreationInput: 18.75,
283
369
  maxInputTokens: 2e5
284
370
  },
285
371
  "claude-sonnet-4-5": {
286
372
  input: 3,
287
373
  output: 15,
374
+ cachedInput: 0.3,
375
+ cacheCreationInput: 3.75,
288
376
  maxInputTokens: 2e5
289
377
  },
290
378
  "gpt-oss-120b": {
@@ -375,36 +463,43 @@ var prices_default = {
375
463
  "gpt-4.1": {
376
464
  input: 2,
377
465
  output: 8,
466
+ cachedInput: 0.5,
378
467
  maxInputTokens: 1047576
379
468
  },
380
469
  "gpt-4.1-2025-04-14": {
381
470
  input: 2,
382
471
  output: 8,
472
+ cachedInput: 0.5,
383
473
  maxInputTokens: 1047576
384
474
  },
385
475
  "gpt-4.1-mini": {
386
476
  input: 0.4,
387
477
  output: 1.6,
478
+ cachedInput: 0.1,
388
479
  maxInputTokens: 1047576
389
480
  },
390
481
  "gpt-4.1-mini-2025-04-14": {
391
482
  input: 0.4,
392
483
  output: 1.6,
484
+ cachedInput: 0.1,
393
485
  maxInputTokens: 1047576
394
486
  },
395
487
  "gpt-4.1-nano": {
396
488
  input: 0.1,
397
489
  output: 0.4,
490
+ cachedInput: 0.025,
398
491
  maxInputTokens: 1047576
399
492
  },
400
493
  "gpt-4.1-nano-2025-04-14": {
401
494
  input: 0.1,
402
495
  output: 0.4,
496
+ cachedInput: 0.025,
403
497
  maxInputTokens: 1047576
404
498
  },
405
499
  "gpt-4.5-preview": {
406
500
  input: 75,
407
501
  output: 150,
502
+ cachedInput: 37.5,
408
503
  maxInputTokens: 128e3
409
504
  },
410
505
  "gpt-4o-2024-05-13": {
@@ -415,11 +510,13 @@ var prices_default = {
415
510
  "gpt-4o-2024-08-06": {
416
511
  input: 2.5,
417
512
  output: 10,
513
+ cachedInput: 1.25,
418
514
  maxInputTokens: 128e3
419
515
  },
420
516
  "gpt-4o-2024-11-20": {
421
517
  input: 2.5,
422
518
  output: 10,
519
+ cachedInput: 1.25,
423
520
  maxInputTokens: 128e3
424
521
  },
425
522
  "gpt-audio-2025-08-28": {
@@ -445,6 +542,7 @@ var prices_default = {
445
542
  "gpt-4o-mini-2024-07-18": {
446
543
  input: 0.15,
447
544
  output: 0.6,
545
+ cachedInput: 0.075,
448
546
  maxInputTokens: 128e3
449
547
  },
450
548
  "gpt-4o-mini-audio-preview-2024-12-17": {
@@ -455,21 +553,25 @@ var prices_default = {
455
553
  "gpt-4o-mini-realtime-preview-2024-12-17": {
456
554
  input: 0.6,
457
555
  output: 2.4,
556
+ cachedInput: 0.3,
458
557
  maxInputTokens: 128e3
459
558
  },
460
559
  "gpt-realtime-2025-08-28": {
461
560
  input: 4,
462
561
  output: 16,
562
+ cachedInput: 0.4,
463
563
  maxInputTokens: 32e3
464
564
  },
465
565
  "gpt-realtime-1.5-2026-02-23": {
466
566
  input: 4,
467
567
  output: 16,
568
+ cachedInput: 4,
468
569
  maxInputTokens: 32e3
469
570
  },
470
571
  "gpt-realtime-mini-2025-10-06": {
471
572
  input: 0.6,
472
573
  output: 2.4,
574
+ cachedInput: 0.06,
473
575
  maxInputTokens: 128e3
474
576
  },
475
577
  "gpt-4o-mini-transcribe": {
@@ -480,11 +582,13 @@ var prices_default = {
480
582
  "gpt-4o-realtime-preview-2024-10-01": {
481
583
  input: 5,
482
584
  output: 20,
585
+ cachedInput: 2.5,
483
586
  maxInputTokens: 128e3
484
587
  },
485
588
  "gpt-4o-realtime-preview-2024-12-17": {
486
589
  input: 5,
487
590
  output: 20,
591
+ cachedInput: 2.5,
488
592
  maxInputTokens: 128e3
489
593
  },
490
594
  "gpt-4o-transcribe": {
@@ -500,51 +604,61 @@ var prices_default = {
500
604
  "gpt-5.1-2025-11-13": {
501
605
  input: 1.25,
502
606
  output: 10,
607
+ cachedInput: 0.125,
503
608
  maxInputTokens: 272e3
504
609
  },
505
610
  "gpt-5.1-chat-2025-11-13": {
506
611
  input: 1.25,
507
612
  output: 10,
613
+ cachedInput: 0.125,
508
614
  maxInputTokens: 128e3
509
615
  },
510
616
  "gpt-5.1-codex-2025-11-13": {
511
617
  input: 1.25,
512
618
  output: 10,
619
+ cachedInput: 0.125,
513
620
  maxInputTokens: 272e3
514
621
  },
515
622
  "gpt-5.1-codex-mini-2025-11-13": {
516
623
  input: 0.25,
517
624
  output: 2,
625
+ cachedInput: 0.025,
518
626
  maxInputTokens: 272e3
519
627
  },
520
628
  "gpt-5-2025-08-07": {
521
629
  input: 1.25,
522
630
  output: 10,
631
+ cachedInput: 0.125,
523
632
  maxInputTokens: 272e3
524
633
  },
525
634
  "gpt-5-chat": {
526
635
  input: 1.25,
527
636
  output: 10,
637
+ cachedInput: 0.125,
528
638
  maxInputTokens: 128e3
529
639
  },
530
640
  "gpt-5-chat-latest": {
531
641
  input: 1.25,
532
642
  output: 10,
643
+ cachedInput: 0.125,
533
644
  maxInputTokens: 128e3
534
645
  },
535
646
  "gpt-5-codex": {
536
647
  input: 1.25,
537
648
  output: 10,
649
+ cachedInput: 0.125,
538
650
  maxInputTokens: 272e3
539
651
  },
540
652
  "gpt-5-mini-2025-08-07": {
541
653
  input: 0.25,
542
654
  output: 2,
655
+ cachedInput: 0.025,
543
656
  maxInputTokens: 272e3
544
657
  },
545
658
  "gpt-5-nano-2025-08-07": {
546
659
  input: 0.05,
547
660
  output: 0.4,
661
+ cachedInput: 5e-3,
548
662
  maxInputTokens: 272e3
549
663
  },
550
664
  "gpt-5-pro": {
@@ -555,61 +669,73 @@ var prices_default = {
555
669
  "gpt-5.1": {
556
670
  input: 1.25,
557
671
  output: 10,
672
+ cachedInput: 0.125,
558
673
  maxInputTokens: 272e3
559
674
  },
560
675
  "gpt-5.1-chat": {
561
676
  input: 1.25,
562
677
  output: 10,
678
+ cachedInput: 0.125,
563
679
  maxInputTokens: 128e3
564
680
  },
565
681
  "gpt-5.1-codex": {
566
682
  input: 1.25,
567
683
  output: 10,
684
+ cachedInput: 0.125,
568
685
  maxInputTokens: 272e3
569
686
  },
570
687
  "gpt-5.1-codex-max": {
571
688
  input: 1.25,
572
689
  output: 10,
690
+ cachedInput: 0.125,
573
691
  maxInputTokens: 272e3
574
692
  },
575
693
  "gpt-5.1-codex-mini": {
576
694
  input: 0.25,
577
695
  output: 2,
696
+ cachedInput: 0.025,
578
697
  maxInputTokens: 272e3
579
698
  },
580
699
  "gpt-5.2": {
581
700
  input: 1.75,
582
701
  output: 14,
702
+ cachedInput: 0.175,
583
703
  maxInputTokens: 272e3
584
704
  },
585
705
  "gpt-5.2-2025-12-11": {
586
706
  input: 1.75,
587
707
  output: 14,
708
+ cachedInput: 0.175,
588
709
  maxInputTokens: 272e3
589
710
  },
590
711
  "gpt-5.2-chat": {
591
712
  input: 1.75,
592
713
  output: 14,
714
+ cachedInput: 0.175,
593
715
  maxInputTokens: 128e3
594
716
  },
595
717
  "gpt-5.2-chat-2025-12-11": {
596
718
  input: 1.75,
597
719
  output: 14,
720
+ cachedInput: 0.175,
598
721
  maxInputTokens: 128e3
599
722
  },
600
723
  "gpt-5.2-codex": {
601
724
  input: 1.75,
602
725
  output: 14,
726
+ cachedInput: 0.175,
603
727
  maxInputTokens: 272e3
604
728
  },
605
729
  "gpt-5.3-chat": {
606
730
  input: 1.75,
607
731
  output: 14,
732
+ cachedInput: 0.175,
608
733
  maxInputTokens: 128e3
609
734
  },
610
735
  "gpt-5.3-codex": {
611
736
  input: 1.75,
612
737
  output: 14,
738
+ cachedInput: 0.175,
613
739
  maxInputTokens: 272e3
614
740
  },
615
741
  "gpt-5.2-pro": {
@@ -625,71 +751,85 @@ var prices_default = {
625
751
  "gpt-5.4": {
626
752
  input: 2.5,
627
753
  output: 15,
754
+ cachedInput: 0.25,
628
755
  maxInputTokens: 105e4
629
756
  },
630
757
  "gpt-5.4-2026-03-05": {
631
758
  input: 2.5,
632
759
  output: 15,
760
+ cachedInput: 0.25,
633
761
  maxInputTokens: 105e4
634
762
  },
635
763
  "gpt-5.4-pro": {
636
764
  input: 30,
637
765
  output: 180,
766
+ cachedInput: 3,
638
767
  maxInputTokens: 105e4
639
768
  },
640
769
  "gpt-5.4-pro-2026-03-05": {
641
770
  input: 30,
642
771
  output: 180,
772
+ cachedInput: 3,
643
773
  maxInputTokens: 105e4
644
774
  },
645
775
  "gpt-5.4-mini": {
646
776
  input: 0.75,
647
777
  output: 4.5,
778
+ cachedInput: 0.075,
648
779
  maxInputTokens: 272e3
649
780
  },
650
781
  "gpt-5.4-nano": {
651
782
  input: 0.2,
652
783
  output: 1.25,
784
+ cachedInput: 0.02,
653
785
  maxInputTokens: 272e3
654
786
  },
655
787
  "o1-2024-12-17": {
656
788
  input: 15,
657
789
  output: 60,
790
+ cachedInput: 7.5,
658
791
  maxInputTokens: 2e5
659
792
  },
660
793
  "o1-mini": {
661
794
  input: 1.21,
662
795
  output: 4.84,
796
+ cachedInput: 0.605,
663
797
  maxInputTokens: 128e3
664
798
  },
665
799
  "o1-mini-2024-09-12": {
666
800
  input: 1.1,
667
801
  output: 4.4,
802
+ cachedInput: 0.55,
668
803
  maxInputTokens: 128e3
669
804
  },
670
805
  "o1-preview": {
671
806
  input: 15,
672
807
  output: 60,
808
+ cachedInput: 7.5,
673
809
  maxInputTokens: 128e3
674
810
  },
675
811
  "o1-preview-2024-09-12": {
676
812
  input: 15,
677
813
  output: 60,
814
+ cachedInput: 7.5,
678
815
  maxInputTokens: 128e3
679
816
  },
680
817
  "o3-2025-04-16": {
681
818
  input: 2,
682
819
  output: 8,
820
+ cachedInput: 0.5,
683
821
  maxInputTokens: 2e5
684
822
  },
685
823
  "o3-mini": {
686
824
  input: 1.1,
687
825
  output: 4.4,
826
+ cachedInput: 0.55,
688
827
  maxInputTokens: 2e5
689
828
  },
690
829
  "o3-mini-2025-01-31": {
691
830
  input: 1.1,
692
831
  output: 4.4,
832
+ cachedInput: 0.55,
693
833
  maxInputTokens: 2e5
694
834
  },
695
835
  "o3-pro": {
@@ -705,11 +845,13 @@ var prices_default = {
705
845
  "o4-mini": {
706
846
  input: 1.1,
707
847
  output: 4.4,
848
+ cachedInput: 0.275,
708
849
  maxInputTokens: 2e5
709
850
  },
710
851
  "o4-mini-2025-04-16": {
711
852
  input: 1.1,
712
853
  output: 4.4,
854
+ cachedInput: 0.275,
713
855
  maxInputTokens: 2e5
714
856
  },
715
857
  "deepseek-v3.2": {
@@ -730,6 +872,7 @@ var prices_default = {
730
872
  "deepseek-v3": {
731
873
  input: 0.27,
732
874
  output: 1.1,
875
+ cachedInput: 0.07,
733
876
  maxInputTokens: 65536
734
877
  },
735
878
  "deepseek-v3-0324": {
@@ -745,76 +888,105 @@ var prices_default = {
745
888
  "claude-haiku-4-5-20251001": {
746
889
  input: 1,
747
890
  output: 5,
891
+ cachedInput: 0.1,
892
+ cacheCreationInput: 1.25,
748
893
  maxInputTokens: 2e5
749
894
  },
750
895
  "claude-3-7-sonnet-20250219": {
751
896
  input: 3,
752
897
  output: 15,
898
+ cachedInput: 0.3,
899
+ cacheCreationInput: 3.75,
753
900
  maxInputTokens: 2e5
754
901
  },
755
902
  "claude-3-haiku-20240307": {
756
903
  input: 0.25,
757
904
  output: 1.25,
905
+ cachedInput: 0.03,
906
+ cacheCreationInput: 0.3,
758
907
  maxInputTokens: 2e5
759
908
  },
760
909
  "claude-3-opus-20240229": {
761
910
  input: 15,
762
911
  output: 75,
912
+ cachedInput: 1.5,
913
+ cacheCreationInput: 18.75,
763
914
  maxInputTokens: 2e5
764
915
  },
765
916
  "claude-4-opus-20250514": {
766
917
  input: 15,
767
918
  output: 75,
919
+ cachedInput: 1.5,
920
+ cacheCreationInput: 18.75,
768
921
  maxInputTokens: 2e5
769
922
  },
770
923
  "claude-4-sonnet-20250514": {
771
924
  input: 3,
772
925
  output: 15,
926
+ cachedInput: 0.3,
927
+ cacheCreationInput: 3.75,
773
928
  maxInputTokens: 1e6
774
929
  },
775
930
  "claude-sonnet-4-5-20250929": {
776
931
  input: 3,
777
932
  output: 15,
933
+ cachedInput: 0.3,
934
+ cacheCreationInput: 3.75,
778
935
  maxInputTokens: 2e5
779
936
  },
780
937
  "claude-sonnet-4-5-20250929-v1:0": {
781
938
  input: 3,
782
939
  output: 15,
940
+ cachedInput: 0.3,
941
+ cacheCreationInput: 3.75,
783
942
  maxInputTokens: 2e5
784
943
  },
785
944
  "claude-opus-4-1-20250805": {
786
945
  input: 15,
787
946
  output: 75,
947
+ cachedInput: 1.5,
948
+ cacheCreationInput: 18.75,
788
949
  maxInputTokens: 2e5
789
950
  },
790
951
  "claude-opus-4-20250514": {
791
952
  input: 15,
792
953
  output: 75,
954
+ cachedInput: 1.5,
955
+ cacheCreationInput: 18.75,
793
956
  maxInputTokens: 2e5
794
957
  },
795
958
  "claude-opus-4-5-20251101": {
796
959
  input: 5,
797
960
  output: 25,
961
+ cachedInput: 0.5,
962
+ cacheCreationInput: 6.25,
798
963
  maxInputTokens: 2e5
799
964
  },
800
965
  "claude-opus-4-6-20260205": {
801
966
  input: 5,
802
967
  output: 25,
968
+ cachedInput: 0.5,
969
+ cacheCreationInput: 6.25,
803
970
  maxInputTokens: 1e6
804
971
  },
805
972
  "claude-opus-4-7-20260416": {
806
973
  input: 5,
807
974
  output: 25,
975
+ cachedInput: 0.5,
976
+ cacheCreationInput: 6.25,
808
977
  maxInputTokens: 1e6
809
978
  },
810
979
  "claude-sonnet-4-20250514": {
811
980
  input: 3,
812
981
  output: 15,
982
+ cachedInput: 0.3,
983
+ cacheCreationInput: 3.75,
813
984
  maxInputTokens: 1e6
814
985
  },
815
986
  "codex-mini-latest": {
816
987
  input: 1.5,
817
988
  output: 6,
989
+ cachedInput: 0.375,
818
990
  maxInputTokens: 2e5
819
991
  },
820
992
  "deepseek-ai/deepseek-r1": {
@@ -864,6 +1036,7 @@ var prices_default = {
864
1036
  "deepseek-ai/deepseek-v3.1-terminus": {
865
1037
  input: 0.27,
866
1038
  output: 1,
1039
+ cachedInput: 0.216,
867
1040
  maxInputTokens: 163840
868
1041
  },
869
1042
  "deepseek-coder": {
@@ -874,26 +1047,31 @@ var prices_default = {
874
1047
  "gemini-2.0-flash": {
875
1048
  input: 0.1,
876
1049
  output: 0.4,
1050
+ cachedInput: 0.025,
877
1051
  maxInputTokens: 1048576
878
1052
  },
879
1053
  "gemini-2.0-flash-001": {
880
1054
  input: 0.1,
881
1055
  output: 0.4,
1056
+ cachedInput: 0.025,
882
1057
  maxInputTokens: 1048576
883
1058
  },
884
1059
  "gemini-2.0-flash-lite": {
885
1060
  input: 0.075,
886
1061
  output: 0.3,
1062
+ cachedInput: 0.01875,
887
1063
  maxInputTokens: 1048576
888
1064
  },
889
1065
  "gemini-2.0-flash-lite-001": {
890
1066
  input: 0.075,
891
1067
  output: 0.3,
1068
+ cachedInput: 0.01875,
892
1069
  maxInputTokens: 1048576
893
1070
  },
894
1071
  "gemini-2.5-flash-image": {
895
1072
  input: 0.3,
896
1073
  output: 2.5,
1074
+ cachedInput: 0.03,
897
1075
  maxInputTokens: 32768
898
1076
  },
899
1077
  "gemini-3-pro-image-preview": {
@@ -909,51 +1087,61 @@ var prices_default = {
909
1087
  "gemini-3.1-flash-lite-preview": {
910
1088
  input: 0.25,
911
1089
  output: 1.5,
1090
+ cachedInput: 0.025,
912
1091
  maxInputTokens: 1048576
913
1092
  },
914
1093
  "gemini-2.5-flash-lite": {
915
1094
  input: 0.1,
916
1095
  output: 0.4,
1096
+ cachedInput: 0.01,
917
1097
  maxInputTokens: 1048576
918
1098
  },
919
1099
  "gemini-2.5-flash-lite-preview-09-2025": {
920
1100
  input: 0.1,
921
1101
  output: 0.4,
1102
+ cachedInput: 0.01,
922
1103
  maxInputTokens: 1048576
923
1104
  },
924
1105
  "gemini-2.5-flash-preview-09-2025": {
925
1106
  input: 0.3,
926
1107
  output: 2.5,
1108
+ cachedInput: 0.075,
927
1109
  maxInputTokens: 1048576
928
1110
  },
929
1111
  "gemini-live-2.5-flash-preview-native-audio-09-2025": {
930
1112
  input: 0.3,
931
1113
  output: 2,
1114
+ cachedInput: 0.075,
932
1115
  maxInputTokens: 1048576
933
1116
  },
934
1117
  "gemini-2.5-flash-lite-preview-06-17": {
935
1118
  input: 0.1,
936
1119
  output: 0.4,
1120
+ cachedInput: 0.025,
937
1121
  maxInputTokens: 1048576
938
1122
  },
939
1123
  "gemini-3-pro-preview": {
940
1124
  input: 2,
941
1125
  output: 12,
1126
+ cachedInput: 0.2,
942
1127
  maxInputTokens: 1048576
943
1128
  },
944
1129
  "gemini-3.1-pro-preview": {
945
1130
  input: 2,
946
1131
  output: 12,
1132
+ cachedInput: 0.2,
947
1133
  maxInputTokens: 1048576
948
1134
  },
949
1135
  "gemini-3.1-pro-preview-customtools": {
950
1136
  input: 2,
951
1137
  output: 12,
1138
+ cachedInput: 0.2,
952
1139
  maxInputTokens: 1048576
953
1140
  },
954
1141
  "gemini-3-flash-preview": {
955
1142
  input: 0.5,
956
1143
  output: 3,
1144
+ cachedInput: 0.05,
957
1145
  maxInputTokens: 1048576
958
1146
  },
959
1147
  "gemini-robotics-er-1.5-preview": {
@@ -969,11 +1157,13 @@ var prices_default = {
969
1157
  "gemini-flash-latest": {
970
1158
  input: 0.3,
971
1159
  output: 2.5,
1160
+ cachedInput: 0.03,
972
1161
  maxInputTokens: 1048576
973
1162
  },
974
1163
  "gemini-flash-lite-latest": {
975
1164
  input: 0.1,
976
1165
  output: 0.4,
1166
+ cachedInput: 0.01,
977
1167
  maxInputTokens: 1048576
978
1168
  },
979
1169
  "gemini-gemma-2-27b-it": {
@@ -1049,39 +1239,47 @@ var prices_default = {
1049
1239
  "gpt-4o-mini-realtime-preview": {
1050
1240
  input: 0.6,
1051
1241
  output: 2.4,
1242
+ cachedInput: 0.3,
1052
1243
  maxInputTokens: 128e3
1053
1244
  },
1054
1245
  "gpt-4o-realtime-preview": {
1055
1246
  input: 5,
1056
1247
  output: 20,
1248
+ cachedInput: 2.5,
1057
1249
  maxInputTokens: 128e3
1058
1250
  },
1059
1251
  "gpt-4o-realtime-preview-2025-06-03": {
1060
1252
  input: 5,
1061
1253
  output: 20,
1254
+ cachedInput: 2.5,
1062
1255
  maxInputTokens: 128e3
1063
1256
  },
1064
1257
  "gpt-image-1.5": {
1065
1258
  input: 5,
1066
- output: 10
1259
+ output: 10,
1260
+ cachedInput: 1.25
1067
1261
  },
1068
1262
  "gpt-image-1.5-2025-12-16": {
1069
1263
  input: 5,
1070
- output: 10
1264
+ output: 10,
1265
+ cachedInput: 1.25
1071
1266
  },
1072
1267
  "gpt-5.1-chat-latest": {
1073
1268
  input: 1.25,
1074
1269
  output: 10,
1270
+ cachedInput: 0.125,
1075
1271
  maxInputTokens: 128e3
1076
1272
  },
1077
1273
  "gpt-5.2-chat-latest": {
1078
1274
  input: 1.75,
1079
1275
  output: 14,
1276
+ cachedInput: 0.175,
1080
1277
  maxInputTokens: 128e3
1081
1278
  },
1082
1279
  "gpt-5.3-chat-latest": {
1083
1280
  input: 1.75,
1084
1281
  output: 14,
1282
+ cachedInput: 0.175,
1085
1283
  maxInputTokens: 128e3
1086
1284
  },
1087
1285
  "gpt-5-pro-2025-10-06": {
@@ -1092,11 +1290,13 @@ var prices_default = {
1092
1290
  "gpt-realtime": {
1093
1291
  input: 4,
1094
1292
  output: 16,
1293
+ cachedInput: 0.4,
1095
1294
  maxInputTokens: 32e3
1096
1295
  },
1097
1296
  "gpt-realtime-1.5": {
1098
1297
  input: 4,
1099
1298
  output: 16,
1299
+ cachedInput: 0.4,
1100
1300
  maxInputTokens: 32e3
1101
1301
  },
1102
1302
  "gpt-realtime-mini": {
@@ -1143,6 +1343,7 @@ var prices_default = {
1143
1343
  o1: {
1144
1344
  input: 15,
1145
1345
  output: 60,
1346
+ cachedInput: 7.5,
1146
1347
  maxInputTokens: 2e5
1147
1348
  },
1148
1349
  "o1-pro": {
@@ -1158,6 +1359,7 @@ var prices_default = {
1158
1359
  o3: {
1159
1360
  input: 2,
1160
1361
  output: 8,
1362
+ cachedInput: 0.5,
1161
1363
  maxInputTokens: 2e5
1162
1364
  },
1163
1365
  "gpt-oss-20b": {
@@ -1182,6 +1384,8 @@ var prices_default = {
1182
1384
  "claude-haiku-4-5@20251001": {
1183
1385
  input: 1,
1184
1386
  output: 5,
1387
+ cachedInput: 0.1,
1388
+ cacheCreationInput: 1.25,
1185
1389
  maxInputTokens: 2e5
1186
1390
  },
1187
1391
  "claude-3-5-sonnet": {
@@ -1197,6 +1401,8 @@ var prices_default = {
1197
1401
  "claude-3-7-sonnet@20250219": {
1198
1402
  input: 3,
1199
1403
  output: 15,
1404
+ cachedInput: 0.3,
1405
+ cacheCreationInput: 3.75,
1200
1406
  maxInputTokens: 2e5
1201
1407
  },
1202
1408
  "claude-3-haiku": {
@@ -1232,46 +1438,64 @@ var prices_default = {
1232
1438
  "claude-opus-4": {
1233
1439
  input: 15,
1234
1440
  output: 75,
1441
+ cachedInput: 1.5,
1442
+ cacheCreationInput: 18.75,
1235
1443
  maxInputTokens: 2e5
1236
1444
  },
1237
1445
  "claude-opus-4-1@20250805": {
1238
1446
  input: 15,
1239
1447
  output: 75,
1448
+ cachedInput: 1.5,
1449
+ cacheCreationInput: 18.75,
1240
1450
  maxInputTokens: 2e5
1241
1451
  },
1242
1452
  "claude-opus-4-5@20251101": {
1243
1453
  input: 5,
1244
1454
  output: 25,
1455
+ cachedInput: 0.5,
1456
+ cacheCreationInput: 6.25,
1245
1457
  maxInputTokens: 2e5
1246
1458
  },
1247
1459
  "claude-opus-4-6@default": {
1248
1460
  input: 5,
1249
1461
  output: 25,
1462
+ cachedInput: 0.5,
1463
+ cacheCreationInput: 6.25,
1250
1464
  maxInputTokens: 1e6
1251
1465
  },
1252
1466
  "claude-opus-4-7@default": {
1253
1467
  input: 5,
1254
1468
  output: 25,
1469
+ cachedInput: 0.5,
1470
+ cacheCreationInput: 6.25,
1255
1471
  maxInputTokens: 1e6
1256
1472
  },
1257
1473
  "claude-sonnet-4-5@20250929": {
1258
1474
  input: 3,
1259
1475
  output: 15,
1476
+ cachedInput: 0.3,
1477
+ cacheCreationInput: 3.75,
1260
1478
  maxInputTokens: 2e5
1261
1479
  },
1262
1480
  "claude-opus-4@20250514": {
1263
1481
  input: 15,
1264
1482
  output: 75,
1483
+ cachedInput: 1.5,
1484
+ cacheCreationInput: 18.75,
1265
1485
  maxInputTokens: 2e5
1266
1486
  },
1267
1487
  "claude-sonnet-4": {
1268
1488
  input: 3,
1269
1489
  output: 15,
1490
+ cachedInput: 0.3,
1491
+ cacheCreationInput: 3.75,
1270
1492
  maxInputTokens: 1e6
1271
1493
  },
1272
1494
  "claude-sonnet-4@20250514": {
1273
1495
  input: 3,
1274
1496
  output: 15,
1497
+ cachedInput: 0.3,
1498
+ cacheCreationInput: 3.75,
1275
1499
  maxInputTokens: 1e6
1276
1500
  },
1277
1501
  "deepseek-ai/deepseek-v3.1-maas": {
@@ -1321,6 +1545,7 @@ var prices_default = {
1321
1545
  "gpt-realtime-mini-2025-12-15": {
1322
1546
  input: 0.6,
1323
1547
  output: 2.4,
1548
+ cachedInput: 0.06,
1324
1549
  maxInputTokens: 128e3
1325
1550
  },
1326
1551
  "gemini-2.5-flash-native-audio-latest": {
@@ -1346,16 +1571,20 @@ var prices_default = {
1346
1571
  "gemini-pro-latest": {
1347
1572
  input: 1.25,
1348
1573
  output: 10,
1574
+ cachedInput: 0.125,
1349
1575
  maxInputTokens: 1048576
1350
1576
  },
1351
1577
  "gemini-exp-1206": {
1352
1578
  input: 0.3,
1353
1579
  output: 2.5,
1580
+ cachedInput: 0.03,
1354
1581
  maxInputTokens: 1048576
1355
1582
  },
1356
1583
  "claude-sonnet-4-6@default": {
1357
1584
  input: 3,
1358
1585
  output: 15,
1586
+ cachedInput: 0.3,
1587
+ cacheCreationInput: 3.75,
1359
1588
  maxInputTokens: 1e6
1360
1589
  }
1361
1590
  }
@@ -1363,11 +1592,19 @@ var prices_default = {
1363
1592
 
1364
1593
  // src/core/tracker.ts
1365
1594
  var bundledPrices = prices_default.models;
1595
+ var bundledUpdatedAt = prices_default.updated_at ?? "";
1366
1596
  var ModelPriceSchema = import_zod.z.object({
1367
1597
  input: import_zod.z.number().nonnegative(),
1368
1598
  output: import_zod.z.number().nonnegative(),
1599
+ cachedInput: import_zod.z.number().nonnegative().optional(),
1600
+ cacheCreationInput: import_zod.z.number().nonnegative().optional(),
1369
1601
  maxInputTokens: import_zod.z.number().positive().optional()
1370
1602
  });
1603
+ var BudgetConfigSchema = import_zod.z.object({
1604
+ threshold: import_zod.z.number().positive(),
1605
+ webhookUrl: import_zod.z.string().url(),
1606
+ mode: import_zod.z.enum(["once", "always"]).optional().default("once")
1607
+ });
1371
1608
  var TrackerConfigSchema = import_zod.z.object({
1372
1609
  storage: import_zod.z.union([import_zod.z.enum(["memory", "sqlite"]), import_zod.z.custom((v) => {
1373
1610
  return v !== null && typeof v === "object" && typeof v.record === "function" && typeof v.getAll === "function" && typeof v.clearAll === "function" && typeof v.clearSession === "function";
@@ -1375,7 +1612,13 @@ var TrackerConfigSchema = import_zod.z.object({
1375
1612
  alertThreshold: import_zod.z.number().positive().optional(),
1376
1613
  webhookUrl: import_zod.z.string().url().optional(),
1377
1614
  syncPrices: import_zod.z.boolean().optional().default(true),
1378
- customPrices: import_zod.z.record(import_zod.z.string(), ModelPriceSchema).optional()
1615
+ customPrices: import_zod.z.record(import_zod.z.string(), ModelPriceSchema).optional(),
1616
+ warnIfStaleAfterHours: import_zod.z.number().nonnegative().optional().default(72),
1617
+ budgets: import_zod.z.object({
1618
+ perUser: BudgetConfigSchema.optional(),
1619
+ perSession: BudgetConfigSchema.optional()
1620
+ }).optional(),
1621
+ suggestions: import_zod.z.boolean().optional().default(false)
1379
1622
  });
1380
1623
  function createTracker(config = {}) {
1381
1624
  const parsed = TrackerConfigSchema.safeParse(config);
@@ -1389,19 +1632,45 @@ ${issues}`);
1389
1632
  alertThreshold,
1390
1633
  webhookUrl,
1391
1634
  syncPrices,
1392
- customPrices
1635
+ customPrices,
1636
+ warnIfStaleAfterHours,
1637
+ budgets,
1638
+ suggestions
1393
1639
  } = parsed.data;
1394
1640
  const storage = typeof storageOption === "object" ? storageOption : createStorage(storageOption);
1395
1641
  let remotePrices;
1642
+ let pricesUpdatedAt = bundledUpdatedAt;
1396
1643
  if (syncPrices) {
1397
1644
  getRemotePrices().then((result) => {
1398
- if (result) remotePrices = result;
1645
+ if (result) {
1646
+ remotePrices = result.models;
1647
+ pricesUpdatedAt = result.updated_at;
1648
+ }
1399
1649
  }).catch(() => {
1400
1650
  });
1401
1651
  }
1652
+ let stalenessChecked = false;
1653
+ function maybeWarnStaleness() {
1654
+ if (stalenessChecked || !warnIfStaleAfterHours) return;
1655
+ stalenessChecked = true;
1656
+ if (!pricesUpdatedAt) return;
1657
+ try {
1658
+ const updatedMs = new Date(pricesUpdatedAt).getTime();
1659
+ const ageHours = (Date.now() - updatedMs) / (1e3 * 60 * 60);
1660
+ if (ageHours > warnIfStaleAfterHours) {
1661
+ console.warn(
1662
+ `[tokenwatch] Price data is ${Math.round(ageHours)}h old (updated_at: ${pricesUpdatedAt}). Run "tokenwatch sync" to refresh, or set warnIfStaleAfterHours: 0 to suppress.`
1663
+ );
1664
+ }
1665
+ } catch {
1666
+ }
1667
+ }
1402
1668
  let alertFired = false;
1669
+ const firedUserAlerts = /* @__PURE__ */ new Set();
1670
+ const firedSessionAlerts = /* @__PURE__ */ new Set();
1403
1671
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
1404
1672
  function resolveModelPrice(model) {
1673
+ maybeWarnStaleness();
1405
1674
  return resolvePrice(model, {
1406
1675
  bundledPrices,
1407
1676
  ...customPrices !== void 0 && { customPrices },
@@ -1412,8 +1681,10 @@ ${issues}`);
1412
1681
  const price = resolveModelPrice(entry.model);
1413
1682
  const costUSD = calculateCost(
1414
1683
  entry.inputTokens,
1415
- entry.outputTokens + (entry.reasoningTokens ?? 0),
1416
- price
1684
+ entry.outputTokens,
1685
+ price,
1686
+ entry.cachedTokens,
1687
+ entry.cacheCreationTokens
1417
1688
  );
1418
1689
  const full = {
1419
1690
  ...entry,
@@ -1421,32 +1692,81 @@ ${issues}`);
1421
1692
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
1422
1693
  };
1423
1694
  storage.record(full);
1424
- maybeFireAlert();
1695
+ maybeFireAlerts(full);
1696
+ if (suggestions) {
1697
+ maybeSuggestCheaperModel(entry.model, costUSD, entry.inputTokens, entry.outputTokens, {
1698
+ bundledPrices,
1699
+ ...customPrices !== void 0 && { customPrices },
1700
+ ...remotePrices !== void 0 && { remotePrices }
1701
+ });
1702
+ }
1425
1703
  }
1426
- function maybeFireAlert() {
1427
- if (!alertThreshold || !webhookUrl || alertFired) return;
1428
- alertFired = true;
1429
- Promise.resolve(storage.getAll()).then((entries) => {
1430
- const total = computeTotal(entries);
1431
- if (total < alertThreshold) {
1432
- alertFired = false;
1433
- return;
1434
- }
1435
- const payload = {
1436
- text: `[tokenwatch] Alert: total cost reached $${total.toFixed(4)} USD (threshold: $${alertThreshold})`
1437
- };
1438
- fetch(webhookUrl, {
1439
- method: "POST",
1440
- headers: { "Content-Type": "application/json" },
1441
- body: JSON.stringify(payload)
1704
+ function maybeFireAlerts(entry) {
1705
+ if (alertThreshold && webhookUrl && !alertFired) {
1706
+ alertFired = true;
1707
+ Promise.resolve(storage.getAll()).then((entries) => {
1708
+ const total = computeTotal(entries);
1709
+ if (total < alertThreshold) {
1710
+ alertFired = false;
1711
+ return;
1712
+ }
1713
+ fireWebhook(webhookUrl, {
1714
+ text: `[tokenwatch] Alert: total cost reached $${total.toFixed(4)} USD (threshold: $${alertThreshold})`
1715
+ });
1442
1716
  }).catch(() => {
1717
+ alertFired = false;
1443
1718
  });
1719
+ }
1720
+ if (budgets?.perUser && entry.userId) {
1721
+ const cfg = budgets.perUser;
1722
+ const uid = entry.userId;
1723
+ if (cfg.mode === "always" || !firedUserAlerts.has(uid)) {
1724
+ if (cfg.mode !== "always") firedUserAlerts.add(uid);
1725
+ Promise.resolve(storage.getAll()).then((entries) => {
1726
+ const userCost = entries.filter((e) => e.userId === uid).reduce((s, e) => s + e.costUSD, 0);
1727
+ if (userCost >= cfg.threshold) {
1728
+ fireWebhook(cfg.webhookUrl, {
1729
+ text: `[tokenwatch] Budget alert: user "${uid}" reached $${userCost.toFixed(4)} USD (threshold: $${cfg.threshold})`
1730
+ });
1731
+ } else {
1732
+ if (cfg.mode !== "always") firedUserAlerts.delete(uid);
1733
+ }
1734
+ }).catch(() => {
1735
+ if (cfg.mode !== "always") firedUserAlerts.delete(uid);
1736
+ });
1737
+ }
1738
+ }
1739
+ if (budgets?.perSession && entry.sessionId) {
1740
+ const cfg = budgets.perSession;
1741
+ const sid = entry.sessionId;
1742
+ if (cfg.mode === "always" || !firedSessionAlerts.has(sid)) {
1743
+ if (cfg.mode !== "always") firedSessionAlerts.add(sid);
1744
+ Promise.resolve(storage.getAll()).then((entries) => {
1745
+ const sessionCost = entries.filter((e) => e.sessionId === sid).reduce((s, e) => s + e.costUSD, 0);
1746
+ if (sessionCost >= cfg.threshold) {
1747
+ fireWebhook(cfg.webhookUrl, {
1748
+ text: `[tokenwatch] Budget alert: session "${sid}" reached $${sessionCost.toFixed(4)} USD (threshold: $${cfg.threshold})`
1749
+ });
1750
+ } else {
1751
+ if (cfg.mode !== "always") firedSessionAlerts.delete(sid);
1752
+ }
1753
+ }).catch(() => {
1754
+ if (cfg.mode !== "always") firedSessionAlerts.delete(sid);
1755
+ });
1756
+ }
1757
+ }
1758
+ }
1759
+ function fireWebhook(url, payload) {
1760
+ fetch(url, {
1761
+ method: "POST",
1762
+ headers: { "Content-Type": "application/json" },
1763
+ body: JSON.stringify(payload)
1444
1764
  }).catch(() => {
1445
- alertFired = false;
1446
1765
  });
1447
1766
  }
1448
- async function getReport() {
1449
- const entries = await Promise.resolve(storage.getAll());
1767
+ async function getReport(options) {
1768
+ const allEntries = await Promise.resolve(storage.getAll());
1769
+ const entries = filterEntries(allEntries, options);
1450
1770
  const byModel = {};
1451
1771
  const bySession = {};
1452
1772
  const byUser = {};
@@ -1454,18 +1774,24 @@ ${issues}`);
1454
1774
  let totalInput = 0;
1455
1775
  let totalOutput = 0;
1456
1776
  let totalCost = 0;
1457
- let lastTimestamp = startedAt;
1777
+ let periodFrom = options ? entries[0]?.timestamp ?? startedAt : startedAt;
1778
+ let lastTimestamp = periodFrom;
1458
1779
  for (const e of entries) {
1459
- totalInput += e.inputTokens;
1780
+ totalInput += e.inputTokens + (e.cachedTokens ?? 0) + (e.cacheCreationTokens ?? 0);
1460
1781
  totalOutput += e.outputTokens;
1461
1782
  totalCost += e.costUSD;
1462
1783
  if (e.timestamp > lastTimestamp) lastTimestamp = e.timestamp;
1463
- const m = byModel[e.model] ??= { costUSD: 0, calls: 0, tokens: { input: 0, output: 0, reasoning: 0 } };
1784
+ const m = byModel[e.model] ??= {
1785
+ costUSD: 0,
1786
+ calls: 0,
1787
+ tokens: { input: 0, output: 0, reasoning: 0, cached: 0 }
1788
+ };
1464
1789
  m.costUSD += e.costUSD;
1465
1790
  m.calls += 1;
1466
- m.tokens.input += e.inputTokens;
1791
+ m.tokens.input += e.inputTokens + (e.cachedTokens ?? 0) + (e.cacheCreationTokens ?? 0);
1467
1792
  m.tokens.output += e.outputTokens;
1468
1793
  m.tokens.reasoning += e.reasoningTokens ?? 0;
1794
+ m.tokens.cached += e.cachedTokens ?? 0;
1469
1795
  if (e.sessionId) {
1470
1796
  const s = bySession[e.sessionId] ??= { costUSD: 0, calls: 0 };
1471
1797
  s.costUSD += e.costUSD;
@@ -1482,6 +1808,9 @@ ${issues}`);
1482
1808
  f.calls += 1;
1483
1809
  }
1484
1810
  }
1811
+ if (options && entries.length > 0) {
1812
+ periodFrom = entries[0]?.timestamp ?? periodFrom;
1813
+ }
1485
1814
  return {
1486
1815
  totalCostUSD: totalCost,
1487
1816
  totalTokens: { input: totalInput, output: totalOutput },
@@ -1489,22 +1818,66 @@ ${issues}`);
1489
1818
  bySession,
1490
1819
  byUser,
1491
1820
  byFeature,
1492
- period: { from: startedAt, to: lastTimestamp }
1821
+ period: { from: periodFrom, to: lastTimestamp },
1822
+ ...pricesUpdatedAt ? { pricesUpdatedAt } : {}
1823
+ };
1824
+ }
1825
+ async function getCostForecast(options = {}) {
1826
+ const windowHours = options.windowHours ?? 24;
1827
+ const allEntries = await Promise.resolve(storage.getAll());
1828
+ const now = Date.now();
1829
+ const windowStart = now - windowHours * 60 * 60 * 1e3;
1830
+ const windowEntries = allEntries.filter(
1831
+ (e) => new Date(e.timestamp).getTime() >= windowStart
1832
+ );
1833
+ if (windowEntries.length < 2) {
1834
+ return {
1835
+ burnRatePerHour: 0,
1836
+ projectedDailyCostUSD: 0,
1837
+ projectedMonthlyCostUSD: 0,
1838
+ basedOnHours: 0,
1839
+ basedOnPeriod: null
1840
+ };
1841
+ }
1842
+ const first = windowEntries[0]?.timestamp ?? "";
1843
+ const last = windowEntries[windowEntries.length - 1]?.timestamp ?? "";
1844
+ const actualMs = new Date(last).getTime() - new Date(first).getTime();
1845
+ const actualHours = actualMs / (1e3 * 60 * 60);
1846
+ if (actualHours < 1e-3) {
1847
+ return {
1848
+ burnRatePerHour: 0,
1849
+ projectedDailyCostUSD: 0,
1850
+ projectedMonthlyCostUSD: 0,
1851
+ basedOnHours: 0,
1852
+ basedOnPeriod: { from: first, to: last }
1853
+ };
1854
+ }
1855
+ const totalCost = windowEntries.reduce((s, e) => s + e.costUSD, 0);
1856
+ const burnRatePerHour = totalCost / actualHours;
1857
+ return {
1858
+ burnRatePerHour,
1859
+ projectedDailyCostUSD: burnRatePerHour * 24,
1860
+ projectedMonthlyCostUSD: burnRatePerHour * 24 * 30,
1861
+ basedOnHours: Math.round(actualHours * 100) / 100,
1862
+ basedOnPeriod: { from: first, to: last }
1493
1863
  };
1494
1864
  }
1495
1865
  async function reset() {
1496
1866
  await Promise.resolve(storage.clearAll());
1497
1867
  alertFired = false;
1868
+ firedUserAlerts.clear();
1869
+ firedSessionAlerts.clear();
1498
1870
  }
1499
1871
  async function resetSession(sessionId) {
1500
1872
  await Promise.resolve(storage.clearSession(sessionId));
1873
+ firedSessionAlerts.delete(sessionId);
1501
1874
  }
1502
1875
  async function exportJSON() {
1503
1876
  return JSON.stringify(await getReport(), null, 2);
1504
1877
  }
1505
1878
  async function exportCSV() {
1506
1879
  const entries = await Promise.resolve(storage.getAll());
1507
- const header = "timestamp,model,inputTokens,outputTokens,reasoningTokens,costUSD,sessionId,userId,feature";
1880
+ const header = "timestamp,model,inputTokens,outputTokens,reasoningTokens,cachedTokens,cacheCreationTokens,costUSD,sessionId,userId,feature";
1508
1881
  const rows = entries.map(
1509
1882
  (e) => [
1510
1883
  csvEscape(e.timestamp),
@@ -1512,6 +1885,8 @@ ${issues}`);
1512
1885
  e.inputTokens,
1513
1886
  e.outputTokens,
1514
1887
  e.reasoningTokens ?? 0,
1888
+ e.cachedTokens ?? 0,
1889
+ e.cacheCreationTokens ?? 0,
1515
1890
  e.costUSD.toFixed(8),
1516
1891
  csvEscape(e.sessionId ?? ""),
1517
1892
  csvEscape(e.userId ?? ""),
@@ -1527,11 +1902,47 @@ ${issues}`);
1527
1902
  ...remotePrices !== void 0 && { remotePrices }
1528
1903
  }) ?? null;
1529
1904
  }
1530
- return { track, getReport, reset, resetSession, exportJSON, exportCSV, getModelInfo };
1905
+ return {
1906
+ track,
1907
+ getReport,
1908
+ getCostForecast,
1909
+ reset,
1910
+ resetSession,
1911
+ exportJSON,
1912
+ exportCSV,
1913
+ getModelInfo
1914
+ };
1531
1915
  }
1532
1916
  function computeTotal(entries) {
1533
1917
  return entries.reduce((sum, e) => sum + e.costUSD, 0);
1534
1918
  }
1919
+ function parseLastMs(last) {
1920
+ const match = /^(\d+(?:\.\d+)?)(h|d)$/.exec(last.trim());
1921
+ if (!match) throw new Error(`[tokenwatch] Invalid "last" value: "${last}". Use e.g. "24h", "7d".`);
1922
+ const value = parseFloat(match[1] ?? "0");
1923
+ const unit = match[2] ?? "h";
1924
+ return unit === "h" ? value * 60 * 60 * 1e3 : value * 24 * 60 * 60 * 1e3;
1925
+ }
1926
+ function filterEntries(entries, options) {
1927
+ if (!options) return entries;
1928
+ let sinceMs;
1929
+ let untilMs;
1930
+ if (options.last) {
1931
+ sinceMs = Date.now() - parseLastMs(options.last);
1932
+ } else if (options.since) {
1933
+ sinceMs = new Date(options.since).getTime();
1934
+ }
1935
+ if (options.until) {
1936
+ untilMs = new Date(options.until).getTime();
1937
+ }
1938
+ if (sinceMs === void 0 && untilMs === void 0) return entries;
1939
+ return entries.filter((e) => {
1940
+ const ts = new Date(e.timestamp).getTime();
1941
+ if (sinceMs !== void 0 && ts < sinceMs) return false;
1942
+ if (untilMs !== void 0 && ts > untilMs) return false;
1943
+ return true;
1944
+ });
1945
+ }
1535
1946
  function csvEscape(value) {
1536
1947
  if (value.includes(",") || value.includes('"') || value.includes("\n")) {
1537
1948
  return `"${value.replace(/"/g, '""')}"`;
@@ -1539,6 +1950,71 @@ function csvEscape(value) {
1539
1950
  return value;
1540
1951
  }
1541
1952
 
1953
+ // src/core/lazy-tracker.ts
1954
+ var CSV_HEADER = "timestamp,model,inputTokens,outputTokens,reasoningTokens,cachedTokens,cacheCreationTokens,costUSD,sessionId,userId,feature";
1955
+ function emptyReport() {
1956
+ const now = (/* @__PURE__ */ new Date()).toISOString();
1957
+ return {
1958
+ totalCostUSD: 0,
1959
+ totalTokens: { input: 0, output: 0 },
1960
+ byModel: {},
1961
+ bySession: {},
1962
+ byUser: {},
1963
+ byFeature: {},
1964
+ period: { from: now, to: now }
1965
+ };
1966
+ }
1967
+ function zeroForecast() {
1968
+ return {
1969
+ burnRatePerHour: 0,
1970
+ projectedDailyCostUSD: 0,
1971
+ projectedMonthlyCostUSD: 0,
1972
+ basedOnHours: 0,
1973
+ basedOnPeriod: null
1974
+ };
1975
+ }
1976
+ function createLazyTracker() {
1977
+ let delegate = null;
1978
+ return {
1979
+ init(config) {
1980
+ if (delegate !== null) {
1981
+ throw new Error(
1982
+ "[tokenwatch] LazyTracker already initialized. init() may only be called once."
1983
+ );
1984
+ }
1985
+ try {
1986
+ delegate = createTracker(config ?? {});
1987
+ } catch (err) {
1988
+ throw err;
1989
+ }
1990
+ },
1991
+ track(entry) {
1992
+ delegate?.track(entry);
1993
+ },
1994
+ async getReport(options) {
1995
+ return delegate?.getReport(options) ?? emptyReport();
1996
+ },
1997
+ async getCostForecast(options) {
1998
+ return delegate?.getCostForecast(options) ?? zeroForecast();
1999
+ },
2000
+ async reset() {
2001
+ await delegate?.reset();
2002
+ },
2003
+ async resetSession(sessionId) {
2004
+ await delegate?.resetSession(sessionId);
2005
+ },
2006
+ async exportJSON() {
2007
+ return delegate?.exportJSON() ?? "{}";
2008
+ },
2009
+ async exportCSV() {
2010
+ return delegate?.exportCSV() ?? CSV_HEADER;
2011
+ },
2012
+ getModelInfo(model) {
2013
+ return delegate?.getModelInfo(model) ?? null;
2014
+ }
2015
+ };
2016
+ }
2017
+
1542
2018
  // src/providers/openai.ts
1543
2019
  function extractMeta(params) {
1544
2020
  const { __sessionId, __userId, __feature, ...cleaned } = params;
@@ -1550,19 +2026,24 @@ function extractMeta(params) {
1550
2026
  };
1551
2027
  }
1552
2028
  function extractUsage(usage) {
1553
- if (!usage) return { inputTokens: 0, outputTokens: 0, reasoningTokens: 0 };
2029
+ if (!usage) return { inputTokens: 0, outputTokens: 0, reasoningTokens: 0, cachedTokens: 0 };
2030
+ const totalInput = usage.prompt_tokens ?? usage.input_tokens ?? 0;
2031
+ const cachedTokens = usage.prompt_tokens_details?.cached_tokens ?? 0;
1554
2032
  return {
1555
- inputTokens: usage.prompt_tokens ?? usage.input_tokens ?? 0,
2033
+ // inputTokens = regular (non-cached) input; OpenAI prompt_tokens includes cached tokens
2034
+ inputTokens: totalInput - cachedTokens,
1556
2035
  outputTokens: usage.completion_tokens ?? usage.output_tokens ?? 0,
1557
- reasoningTokens: usage.completion_tokens_details?.reasoning_tokens ?? 0
2036
+ reasoningTokens: usage.completion_tokens_details?.reasoning_tokens ?? 0,
2037
+ cachedTokens
1558
2038
  };
1559
2039
  }
1560
- function trackWithMeta(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature) {
2040
+ function trackWithMeta(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature, cachedTokens = 0) {
1561
2041
  tracker.track({
1562
2042
  model,
1563
2043
  inputTokens,
1564
- outputTokens,
2044
+ outputTokens: outputTokens + reasoningTokens,
1565
2045
  ...reasoningTokens > 0 && { reasoningTokens },
2046
+ ...cachedTokens > 0 && { cachedTokens },
1566
2047
  ...sessionId !== void 0 && { sessionId },
1567
2048
  ...userId !== void 0 && { userId },
1568
2049
  ...feature !== void 0 && { feature }
@@ -1574,13 +2055,13 @@ async function* wrapStream(stream, model, sessionId, userId, feature, tracker) {
1574
2055
  lastChunk = chunk;
1575
2056
  yield chunk;
1576
2057
  }
1577
- const { inputTokens, outputTokens, reasoningTokens } = extractUsage(lastChunk?.usage);
2058
+ const { inputTokens, outputTokens, reasoningTokens, cachedTokens } = extractUsage(lastChunk?.usage);
1578
2059
  if (!lastChunk?.usage) {
1579
2060
  console.warn(
1580
2061
  `[tokenwatch] No usage data in stream for model "${model}". Cost recorded as $0. Pass stream_options: { include_usage: true } to get accurate costs.`
1581
2062
  );
1582
2063
  }
1583
- trackWithMeta(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature);
2064
+ trackWithMeta(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature, cachedTokens);
1584
2065
  }
1585
2066
  function wrapOpenAI(client, tracker) {
1586
2067
  const proxiedCompletions = new Proxy(client.chat.completions, {
@@ -1602,7 +2083,7 @@ function wrapOpenAI(client, tracker) {
1602
2083
  );
1603
2084
  }
1604
2085
  const completion = result;
1605
- const { inputTokens, outputTokens, reasoningTokens } = extractUsage(completion.usage);
2086
+ const { inputTokens, outputTokens, reasoningTokens, cachedTokens } = extractUsage(completion.usage);
1606
2087
  trackWithMeta(
1607
2088
  tracker,
1608
2089
  completion.model ?? model,
@@ -1611,7 +2092,8 @@ function wrapOpenAI(client, tracker) {
1611
2092
  reasoningTokens,
1612
2093
  sessionId,
1613
2094
  userId,
1614
- feature
2095
+ feature,
2096
+ cachedTokens
1615
2097
  );
1616
2098
  return result;
1617
2099
  };
@@ -1658,10 +2140,12 @@ function extractMeta2(params) {
1658
2140
  };
1659
2141
  }
1660
2142
  function extractUsage2(usage) {
1661
- if (!usage) return { inputTokens: 0, outputTokens: 0 };
2143
+ if (!usage) return { inputTokens: 0, outputTokens: 0, cachedTokens: 0, cacheCreationTokens: 0 };
1662
2144
  return {
1663
2145
  inputTokens: usage.input_tokens ?? 0,
1664
- outputTokens: usage.output_tokens ?? 0
2146
+ outputTokens: usage.output_tokens ?? 0,
2147
+ cachedTokens: usage.cache_read_input_tokens ?? 0,
2148
+ cacheCreationTokens: usage.cache_creation_input_tokens ?? 0
1665
2149
  };
1666
2150
  }
1667
2151
  function extractThinkingTokenApprox(content) {
@@ -1669,30 +2153,32 @@ function extractThinkingTokenApprox(content) {
1669
2153
  const chars = content.filter((b) => b.type === "thinking").reduce((sum, b) => sum + (b.thinking?.length ?? 0), 0);
1670
2154
  return chars > 0 ? Math.round(chars / 4) : 0;
1671
2155
  }
1672
- function trackWithMeta2(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature) {
2156
+ function trackWithMeta2(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature, cachedTokens = 0, cacheCreationTokens = 0) {
1673
2157
  tracker.track({
1674
2158
  model,
1675
2159
  inputTokens,
1676
2160
  outputTokens,
1677
- // For Anthropic, reasoningTokens is informational (thinking already in outputTokens).
1678
- // Pass 0 so tracker does not add it to cost (tracker only adds when > 0 AND separate).
1679
- // We store it as a field but the tracker cost formula adds reasoningTokens to outputTokens,
1680
- // so we must NOT pass it here to avoid double-counting.
2161
+ ...reasoningTokens > 0 && { reasoningTokens },
2162
+ ...cachedTokens > 0 && { cachedTokens },
2163
+ ...cacheCreationTokens > 0 && { cacheCreationTokens },
1681
2164
  ...sessionId !== void 0 && { sessionId },
1682
2165
  ...userId !== void 0 && { userId },
1683
- ...feature !== void 0 && { feature },
1684
- ...reasoningTokens > 0 && { reasoningTokens }
2166
+ ...feature !== void 0 && { feature }
1685
2167
  });
1686
2168
  }
1687
2169
  async function* wrapStream2(stream, model, sessionId, userId, feature, tracker) {
1688
2170
  let inputTokens = 0;
1689
2171
  let outputTokens = 0;
2172
+ let cachedTokens = 0;
2173
+ let cacheCreationTokens = 0;
1690
2174
  let currentBlockIsThinking = false;
1691
2175
  let thinkingCharCount = 0;
1692
2176
  for await (const event of stream) {
1693
2177
  yield event;
1694
2178
  if (event.type === "message_start" && event.message?.usage) {
1695
2179
  inputTokens = event.message.usage.input_tokens ?? 0;
2180
+ cachedTokens = event.message.usage.cache_read_input_tokens ?? 0;
2181
+ cacheCreationTokens = event.message.usage.cache_creation_input_tokens ?? 0;
1696
2182
  }
1697
2183
  if (event.type === "message_delta" && event.usage) {
1698
2184
  outputTokens = event.usage.output_tokens ?? 0;
@@ -1708,7 +2194,7 @@ async function* wrapStream2(stream, model, sessionId, userId, feature, tracker)
1708
2194
  }
1709
2195
  }
1710
2196
  const reasoningTokens = thinkingCharCount > 0 ? Math.round(thinkingCharCount / 4) : 0;
1711
- trackWithMeta2(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature);
2197
+ trackWithMeta2(tracker, model, inputTokens, outputTokens, reasoningTokens, sessionId, userId, feature, cachedTokens, cacheCreationTokens);
1712
2198
  }
1713
2199
  function wrapAnthropic(client, tracker) {
1714
2200
  const proxiedMessages = new Proxy(client.messages, {
@@ -1730,7 +2216,7 @@ function wrapAnthropic(client, tracker) {
1730
2216
  );
1731
2217
  }
1732
2218
  const message = result;
1733
- const { inputTokens, outputTokens } = extractUsage2(message.usage);
2219
+ const { inputTokens, outputTokens, cachedTokens, cacheCreationTokens } = extractUsage2(message.usage);
1734
2220
  const reasoningTokens = extractThinkingTokenApprox(message.content);
1735
2221
  trackWithMeta2(
1736
2222
  tracker,
@@ -1740,7 +2226,9 @@ function wrapAnthropic(client, tracker) {
1740
2226
  reasoningTokens,
1741
2227
  sessionId,
1742
2228
  userId,
1743
- feature
2229
+ feature,
2230
+ cachedTokens,
2231
+ cacheCreationTokens
1744
2232
  );
1745
2233
  return result;
1746
2234
  };
@@ -1811,6 +2299,7 @@ function wrapGemini(client, tracker) {
1811
2299
  }
1812
2300
  // Annotate the CommonJS export names for ESM import in node:
1813
2301
  0 && (module.exports = {
2302
+ createLazyTracker,
1814
2303
  createTracker,
1815
2304
  wrapAnthropic,
1816
2305
  wrapDeepSeek,