ai-token-estimator 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -410,11 +410,17 @@ interface EstimateInput {
410
410
  model: string; // Model ID (e.g., 'gpt-4o', 'claude-opus-4.5')
411
411
  rounding?: 'ceil' | 'round' | 'floor'; // Rounding strategy (default: 'ceil')
412
412
  tokenizer?: 'heuristic' | 'openai_exact' | 'auto'; // Token counting strategy (default: 'heuristic')
413
+
414
+ // Extended cost estimation (optional)
415
+ outputTokens?: number; // Output tokens for cost calculation
416
+ cachedInputTokens?: number; // Cached input tokens (OpenAI only, must be <= estimatedTokens)
417
+ mode?: 'standard' | 'batch'; // Pricing mode (default: 'standard')
413
418
  }
414
419
  ```
415
420
 
416
421
  Note:
417
422
  - Provider-backed modes (`anthropic_count_tokens`, `gemini_count_tokens`, `gemma_sentencepiece`) are only supported in `estimateAsync()`.
423
+ - When `outputTokens`, `cachedInputTokens`, or `mode` is provided, the model must have the corresponding pricing available or an error is thrown.
418
424
 
419
425
  **Returns:**
420
426
 
@@ -423,10 +429,16 @@ interface EstimateOutput {
423
429
  model: string; // The model used
424
430
  characterCount: number; // Number of Unicode code points
425
431
  estimatedTokens: number; // Estimated token count (integer)
426
- estimatedInputCost: number; // Estimated cost in USD
432
+ estimatedInputCost: number; // Estimated input cost in USD
427
433
  charsPerToken: number; // The ratio used for this model
428
434
  tokenizerMode?: 'heuristic' | 'openai_exact' | 'auto'; // Which strategy was used
429
435
  encodingUsed?: string; // OpenAI encoding when using exact tokenization
436
+
437
+ // Extended cost fields (when cost inputs are provided)
438
+ outputTokens?: number; // Echoed from input
439
+ estimatedOutputCost?: number; // Output token cost in USD
440
+ estimatedCachedInputCost?: number; // Cached input cost in USD
441
+ estimatedTotalCost: number; // Total cost (input + output + cached)
430
442
  }
431
443
  ```
432
444
 
@@ -555,8 +567,12 @@ Returns the configuration for a specific model. Throws if the model is not found
555
567
 
556
568
  ```typescript
557
569
  interface ModelConfig {
558
- charsPerToken: number; // Characters per token ratio
559
- inputCostPerMillion: number; // USD per 1M input tokens
570
+ charsPerToken: number; // Characters per token ratio
571
+ inputCostPerMillion: number; // USD per 1M input tokens
572
+ outputCostPerMillion?: number; // USD per 1M output tokens (when available)
573
+ cachedInputCostPerMillion?: number; // USD per 1M cached input tokens (OpenAI)
574
+ batchInputCostPerMillion?: number; // USD per 1M batch input tokens (OpenAI)
575
+ batchOutputCostPerMillion?: number; // USD per 1M batch output tokens (OpenAI)
560
576
  }
561
577
  ```
562
578
 
@@ -564,6 +580,84 @@ interface ModelConfig {
564
580
 
565
581
  Read-only object containing all model configurations. Frozen to prevent runtime mutation.
566
582
 
583
+ ### Cost Estimation API
584
+
585
+ #### `estimateCost(options): CostEstimate`
586
+
587
+ Calculate cost from explicit token counts. Provides detailed cost breakdown for input, output, cached, and batch pricing.
588
+
589
+ ```typescript
590
+ import { estimateCost } from 'ai-token-estimator';
591
+
592
+ const result = estimateCost({
593
+ model: 'gpt-4o',
594
+ inputTokens: 1_000_000,
595
+ outputTokens: 500_000,
596
+ cachedInputTokens: 200_000, // optional
597
+ mode: 'standard', // or 'batch'
598
+ });
599
+
600
+ console.log(result);
601
+ // {
602
+ // model: 'gpt-4o',
603
+ // mode: 'standard',
604
+ // tokens: { input: 1000000, cachedInput: 200000, nonCachedInput: 800000, output: 500000 },
605
+ // costs: { input: 2.0, cachedInput: 0.25, output: 5.0, total: 7.25 },
606
+ // rates: { inputPerMillion: 2.5, outputPerMillion: 10.0, cachedInputPerMillion: 1.25, ... }
607
+ // }
608
+ ```
609
+
610
+ Throws if:
611
+ - Model is unknown
612
+ - Token counts are negative or non-integer
613
+ - `cachedInputTokens > inputTokens`
614
+ - Required pricing is missing (output/cached/batch)
615
+ - `mode: 'batch'` with `cachedInputTokens > 0`
616
+
617
+ #### `estimateCostFromText(options): CostEstimate`
618
+
619
+ Sync version that counts input tokens from text. Uses heuristic/exact tokenization based on model.
620
+
621
+ ```typescript
622
+ import { estimateCostFromText } from 'ai-token-estimator';
623
+
624
+ const result = estimateCostFromText({
625
+ model: 'gpt-4o',
626
+ inputText: 'Hello, world!',
627
+ outputText: 'Hi there!', // optional: auto-count output tokens
628
+ outputTokens: 100, // or: explicit output count (takes precedence)
629
+ cachedInputTokens: 0,
630
+ mode: 'standard',
631
+ });
632
+ ```
633
+
634
+ #### `estimateCostFromTextAsync(options): Promise<CostEstimate>`
635
+
636
+ Async version that supports provider-backed tokenizers for accurate counts.
637
+
638
+ ```typescript
639
+ import { estimateCostFromTextAsync } from 'ai-token-estimator';
640
+
641
+ const result = await estimateCostFromTextAsync({
642
+ model: 'claude-sonnet-4',
643
+ inputText: 'Hello, world!',
644
+ outputText: 'Hi there!',
645
+ tokenizer: 'anthropic_count_tokens',
646
+ anthropic: { apiKey: process.env.ANTHROPIC_API_KEY },
647
+ });
648
+ ```
649
+
650
+ #### `getTotalCost(model, inputTokens, outputTokens?): number`
651
+
652
+ Quick helper to get total cost for a model.
653
+
654
+ ```typescript
655
+ import { getTotalCost } from 'ai-token-estimator';
656
+
657
+ const cost = getTotalCost('gpt-4o', 1_000_000, 500_000);
658
+ // 7.5 (USD)
659
+ ```
660
+
567
661
  ### SentencePiece API
568
662
 
569
663
  #### `loadSentencePieceTokenizer(options: FileOptions): Promise<SentencePieceTokenizer>`
package/dist/index.cjs CHANGED
@@ -49,10 +49,14 @@ __export(index_exports, {
49
49
  ensureSentencePieceModel: () => ensureSentencePieceModel,
50
50
  estimate: () => estimate,
51
51
  estimateAsync: () => estimateAsync,
52
+ estimateCost: () => estimateCost,
53
+ estimateCostFromText: () => estimateCostFromText,
54
+ estimateCostFromTextAsync: () => estimateCostFromTextAsync,
52
55
  getAvailableModels: () => getAvailableModels,
53
56
  getModelConfig: () => getModelConfig,
54
57
  getOpenAIEncoding: () => getOpenAIEncoding,
55
58
  getSentencePieceTokenizer: () => getSentencePieceTokenizer,
59
+ getTotalCost: () => getTotalCost,
56
60
  loadSentencePieceTokenizer: () => loadSentencePieceTokenizer,
57
61
  parseModelProto: () => parseModelProto
58
62
  });
@@ -159,7 +163,11 @@ var models = {
159
163
  },
160
164
  "gpt-4o": {
161
165
  charsPerToken: 4,
162
- inputCostPerMillion: 2.5
166
+ inputCostPerMillion: 2.5,
167
+ outputCostPerMillion: 10,
168
+ cachedInputCostPerMillion: 1.25,
169
+ batchInputCostPerMillion: 1.25,
170
+ batchOutputCostPerMillion: 5
163
171
  },
164
172
  "gpt-4o-2024-05-13": {
165
173
  charsPerToken: 4,
@@ -171,7 +179,11 @@ var models = {
171
179
  },
172
180
  "gpt-4o-mini": {
173
181
  charsPerToken: 4,
174
- inputCostPerMillion: 0.15
182
+ inputCostPerMillion: 0.15,
183
+ outputCostPerMillion: 0.6,
184
+ cachedInputCostPerMillion: 0.075,
185
+ batchInputCostPerMillion: 0.075,
186
+ batchOutputCostPerMillion: 0.3
175
187
  },
176
188
  "gpt-4o-mini-audio-preview": {
177
189
  charsPerToken: 4,
@@ -401921,67 +401933,30 @@ function decode(tokens, options) {
401921
401933
  return api.decode(tokens);
401922
401934
  }
401923
401935
 
401924
- // src/estimator.ts
401925
- function countCodePoints(text) {
401926
- let count = 0;
401927
- for (const _char of text) {
401928
- count++;
401929
- }
401930
- return count;
401936
+ // src/token-counter.ts
401937
+ function isNonOpenAIModel(model) {
401938
+ return model.startsWith("claude-") || model.startsWith("gemini-");
401931
401939
  }
401932
- function estimate(input) {
401933
- const { text, model, rounding = "ceil", tokenizer = "heuristic" } = input;
401934
- const config = getModelConfig(model);
401935
- const tokenizerStr = tokenizer;
401936
- if (tokenizerStr === "anthropic_count_tokens" || tokenizerStr === "gemini_count_tokens" || tokenizerStr === "gemma_sentencepiece") {
401937
- throw new Error(`Tokenizer mode "${tokenizerStr}" requires async execution. Use estimateAsync(...) instead.`);
401938
- }
401939
- const characterCount = countCodePoints(text);
401940
- const isNonOpenAIModel3 = model.startsWith("claude-") || model.startsWith("gemini-");
401941
- let estimatedTokens;
401942
- let tokenizerModeUsed = "heuristic";
401943
- let encodingUsed;
401944
- const shouldTryExact = tokenizer === "openai_exact" || tokenizer === "auto";
401945
- if (shouldTryExact && !isNonOpenAIModel3) {
401946
- try {
401947
- estimatedTokens = encode(text, { model, allowSpecial: "none" }).length;
401948
- tokenizerModeUsed = "openai_exact";
401949
- encodingUsed = getOpenAIEncoding({ model });
401950
- } catch (error) {
401951
- if (tokenizer === "openai_exact") {
401952
- throw error;
401953
- }
401954
- }
401955
- } else if (tokenizer === "openai_exact" && isNonOpenAIModel3) {
401956
- throw new Error(
401957
- `Tokenizer mode "openai_exact" requested for non-OpenAI model: "${model}"`
401958
- );
401940
+ function countTokens(input) {
401941
+ const { text, model } = input;
401942
+ if (isNonOpenAIModel(model)) {
401943
+ return {
401944
+ tokens: estimate({ text, model }).estimatedTokens,
401945
+ exact: false
401946
+ };
401959
401947
  }
401960
- if (estimatedTokens === void 0) {
401961
- const rawTokens = characterCount / config.charsPerToken;
401962
- switch (rounding) {
401963
- case "floor":
401964
- estimatedTokens = Math.floor(rawTokens);
401965
- break;
401966
- case "round":
401967
- estimatedTokens = Math.round(rawTokens);
401968
- break;
401969
- case "ceil":
401970
- default:
401971
- estimatedTokens = Math.ceil(rawTokens);
401972
- }
401973
- tokenizerModeUsed = "heuristic";
401948
+ try {
401949
+ return {
401950
+ tokens: encode(text, { model, allowSpecial: "none" }).length,
401951
+ exact: true,
401952
+ encoding: getOpenAIEncoding({ model })
401953
+ };
401954
+ } catch {
401955
+ return {
401956
+ tokens: estimate({ text, model }).estimatedTokens,
401957
+ exact: false
401958
+ };
401974
401959
  }
401975
- const estimatedInputCost = estimatedTokens * config.inputCostPerMillion / 1e6;
401976
- return {
401977
- model,
401978
- characterCount,
401979
- estimatedTokens,
401980
- estimatedInputCost,
401981
- charsPerToken: config.charsPerToken,
401982
- tokenizerMode: tokenizerModeUsed,
401983
- encodingUsed
401984
- };
401985
401960
  }
401986
401961
 
401987
401962
  // src/providers/anthropic.ts
@@ -404598,12 +404573,12 @@ async function countGemmaSentencePieceTokens(params) {
404598
404573
  }
404599
404574
 
404600
404575
  // src/estimator-async.ts
404601
- function countCodePoints2(text) {
404576
+ function countCodePoints(text) {
404602
404577
  let count = 0;
404603
404578
  for (const _char of text) count++;
404604
404579
  return count;
404605
404580
  }
404606
- function isNonOpenAIModel(model) {
404581
+ function isNonOpenAIModel2(model) {
404607
404582
  return model.startsWith("claude-") || model.startsWith("gemini-");
404608
404583
  }
404609
404584
  function shouldFallbackToHeuristic(err) {
@@ -404617,9 +404592,17 @@ function shouldFallbackToHeuristic(err) {
404617
404592
  return false;
404618
404593
  }
404619
404594
  async function estimateAsync(input) {
404620
- const { text, model, rounding = "ceil", tokenizer = "heuristic" } = input;
404595
+ const {
404596
+ text,
404597
+ model,
404598
+ rounding = "ceil",
404599
+ tokenizer = "heuristic",
404600
+ outputTokens,
404601
+ cachedInputTokens,
404602
+ mode
404603
+ } = input;
404621
404604
  const config = getModelConfig(model);
404622
- const characterCount = countCodePoints2(text);
404605
+ const characterCount = countCodePoints(text);
404623
404606
  let estimatedTokens;
404624
404607
  let tokenizerModeUsed = "heuristic";
404625
404608
  let encodingUsed;
@@ -404670,7 +404653,7 @@ async function estimateAsync(input) {
404670
404653
  tokenizerModeUsed = "gemma_sentencepiece";
404671
404654
  } else {
404672
404655
  const shouldTryExact = tokenizer === "openai_exact" || tokenizer === "auto";
404673
- if (shouldTryExact && !isNonOpenAIModel(model)) {
404656
+ if (shouldTryExact && !isNonOpenAIModel2(model)) {
404674
404657
  try {
404675
404658
  estimatedTokens = encode(text, { model, allowSpecial: "none" }).length;
404676
404659
  tokenizerModeUsed = "openai_exact";
@@ -404678,7 +404661,7 @@ async function estimateAsync(input) {
404678
404661
  } catch (error) {
404679
404662
  if (tokenizer === "openai_exact") throw error;
404680
404663
  }
404681
- } else if (tokenizer === "openai_exact" && isNonOpenAIModel(model)) {
404664
+ } else if (tokenizer === "openai_exact" && isNonOpenAIModel2(model)) {
404682
404665
  throw new Error(`Tokenizer mode "openai_exact" requested for non-OpenAI model: "${model}"`);
404683
404666
  }
404684
404667
  }
@@ -404698,6 +404681,26 @@ async function estimateAsync(input) {
404698
404681
  tokenizerModeUsed = "heuristic";
404699
404682
  }
404700
404683
  const estimatedInputCost = estimatedTokens * config.inputCostPerMillion / 1e6;
404684
+ let estimatedOutputCost;
404685
+ let estimatedCachedInputCost;
404686
+ let estimatedTotalCost = estimatedInputCost;
404687
+ const hasCostInputs = outputTokens !== void 0 || cachedInputTokens !== void 0 || mode !== void 0;
404688
+ if (hasCostInputs) {
404689
+ try {
404690
+ const costResult = estimateCost({
404691
+ model,
404692
+ inputTokens: estimatedTokens,
404693
+ outputTokens,
404694
+ cachedInputTokens,
404695
+ mode
404696
+ });
404697
+ estimatedOutputCost = costResult.costs.output > 0 ? costResult.costs.output : void 0;
404698
+ estimatedCachedInputCost = costResult.costs.cachedInput > 0 ? costResult.costs.cachedInput : void 0;
404699
+ estimatedTotalCost = costResult.costs.total;
404700
+ } catch (error) {
404701
+ throw error;
404702
+ }
404703
+ }
404701
404704
  return {
404702
404705
  model,
404703
404706
  characterCount,
@@ -404705,34 +404708,241 @@ async function estimateAsync(input) {
404705
404708
  estimatedInputCost,
404706
404709
  charsPerToken: config.charsPerToken,
404707
404710
  tokenizerMode: tokenizerModeUsed,
404708
- encodingUsed
404711
+ encodingUsed,
404712
+ outputTokens,
404713
+ estimatedOutputCost,
404714
+ estimatedCachedInputCost,
404715
+ estimatedTotalCost
404709
404716
  };
404710
404717
  }
404711
404718
 
404712
- // src/token-counter.ts
404713
- function isNonOpenAIModel2(model) {
404714
- return model.startsWith("claude-") || model.startsWith("gemini-");
404719
+ // src/cost.ts
404720
+ function validateTokenCount(value, name) {
404721
+ const n = value ?? 0;
404722
+ if (!Number.isFinite(n) || n < 0 || !Number.isInteger(n)) {
404723
+ throw new Error(`${name} must be a non-negative integer, got: ${n}`);
404724
+ }
404725
+ return n;
404715
404726
  }
404716
- function countTokens(input) {
404717
- const { text, model } = input;
404718
- if (isNonOpenAIModel2(model)) {
404719
- return {
404720
- tokens: estimate({ text, model }).estimatedTokens,
404721
- exact: false
404722
- };
404727
+ function estimateCost(options) {
404728
+ const { model, mode = "standard" } = options;
404729
+ const inputTokens = validateTokenCount(options.inputTokens, "inputTokens");
404730
+ const outputTokens = validateTokenCount(options.outputTokens, "outputTokens");
404731
+ const cachedInputTokens = validateTokenCount(options.cachedInputTokens, "cachedInputTokens");
404732
+ if (cachedInputTokens > inputTokens) {
404733
+ throw new Error(
404734
+ `cachedInputTokens (${cachedInputTokens}) cannot exceed inputTokens (${inputTokens})`
404735
+ );
404723
404736
  }
404724
- try {
404725
- return {
404726
- tokens: encode(text, { model, allowSpecial: "none" }).length,
404727
- exact: true,
404728
- encoding: getOpenAIEncoding({ model })
404729
- };
404730
- } catch {
404737
+ const config = getModelConfig(model);
404738
+ if (outputTokens > 0 && config.outputCostPerMillion === void 0) {
404739
+ throw new Error(
404740
+ `Output pricing not available for model "${model}". Cannot estimate cost for ${outputTokens} output tokens.`
404741
+ );
404742
+ }
404743
+ if (mode === "batch") {
404744
+ if (cachedInputTokens > 0) {
404745
+ throw new Error(
404746
+ `Batch mode does not support cached tokens. Got cachedInputTokens: ${cachedInputTokens}. Use mode: 'standard' for cached pricing.`
404747
+ );
404748
+ }
404749
+ if (config.batchInputCostPerMillion === void 0) {
404750
+ throw new Error(
404751
+ `Batch input pricing not available for model "${model}". Use mode: 'standard' or choose a model with batch pricing.`
404752
+ );
404753
+ }
404754
+ if (outputTokens > 0 && config.batchOutputCostPerMillion === void 0) {
404755
+ throw new Error(
404756
+ `Batch output pricing not available for model "${model}". Cannot estimate batch cost for ${outputTokens} output tokens.`
404757
+ );
404758
+ }
404759
+ }
404760
+ const nonCachedInputTokens = inputTokens - cachedInputTokens;
404761
+ if (mode === "batch") {
404762
+ const inputCost2 = inputTokens * config.batchInputCostPerMillion / 1e6;
404763
+ const outputCost2 = outputTokens > 0 ? outputTokens * config.batchOutputCostPerMillion / 1e6 : 0;
404731
404764
  return {
404732
- tokens: estimate({ text, model }).estimatedTokens,
404733
- exact: false
404765
+ model,
404766
+ mode: "batch",
404767
+ tokens: {
404768
+ input: inputTokens,
404769
+ cachedInput: 0,
404770
+ // Batch mode doesn't use cached pricing
404771
+ nonCachedInput: inputTokens,
404772
+ output: outputTokens
404773
+ },
404774
+ costs: {
404775
+ input: inputCost2,
404776
+ cachedInput: 0,
404777
+ output: outputCost2,
404778
+ total: inputCost2 + outputCost2
404779
+ },
404780
+ rates: {
404781
+ // In batch mode, inputPerMillion/outputPerMillion reflect the batch rates used
404782
+ inputPerMillion: config.batchInputCostPerMillion,
404783
+ outputPerMillion: config.batchOutputCostPerMillion,
404784
+ batchInputPerMillion: config.batchInputCostPerMillion,
404785
+ batchOutputPerMillion: config.batchOutputCostPerMillion
404786
+ }
404734
404787
  };
404735
404788
  }
404789
+ if (cachedInputTokens > 0 && config.cachedInputCostPerMillion === void 0) {
404790
+ throw new Error(
404791
+ `Cached input pricing not available for model "${model}". Cannot estimate cost for ${cachedInputTokens} cached input tokens.`
404792
+ );
404793
+ }
404794
+ const inputCost = nonCachedInputTokens * config.inputCostPerMillion / 1e6;
404795
+ const cachedInputCost = cachedInputTokens > 0 ? cachedInputTokens * config.cachedInputCostPerMillion / 1e6 : 0;
404796
+ const outputCost = outputTokens > 0 ? outputTokens * config.outputCostPerMillion / 1e6 : 0;
404797
+ return {
404798
+ model,
404799
+ mode: "standard",
404800
+ tokens: {
404801
+ input: inputTokens,
404802
+ cachedInput: cachedInputTokens,
404803
+ nonCachedInput: nonCachedInputTokens,
404804
+ output: outputTokens
404805
+ },
404806
+ costs: {
404807
+ input: inputCost,
404808
+ cachedInput: cachedInputCost,
404809
+ output: outputCost,
404810
+ total: inputCost + cachedInputCost + outputCost
404811
+ },
404812
+ rates: {
404813
+ inputPerMillion: config.inputCostPerMillion,
404814
+ outputPerMillion: config.outputCostPerMillion,
404815
+ cachedInputPerMillion: config.cachedInputCostPerMillion
404816
+ }
404817
+ };
404818
+ }
404819
+ function estimateCostFromText(options) {
404820
+ const { model, inputText, outputText, outputTokens: manualOutputTokens, ...rest } = options;
404821
+ const inputTokens = countTokens({ text: inputText, model }).tokens;
404822
+ let outputTokens = manualOutputTokens;
404823
+ if (manualOutputTokens === void 0 && outputText !== void 0) {
404824
+ outputTokens = countTokens({ text: outputText, model }).tokens;
404825
+ }
404826
+ return estimateCost({ model, inputTokens, outputTokens, ...rest });
404827
+ }
404828
+ async function estimateCostFromTextAsync(options) {
404829
+ const {
404830
+ inputText,
404831
+ outputText,
404832
+ outputTokens: manualOutputTokens,
404833
+ cachedInputTokens,
404834
+ mode,
404835
+ ...providerOptions
404836
+ // Includes model + all EstimateAsyncInput options
404837
+ } = options;
404838
+ const { model } = providerOptions;
404839
+ const inputResult = await estimateAsync({ text: inputText, ...providerOptions });
404840
+ const inputTokens = inputResult.estimatedTokens;
404841
+ let outputTokens = manualOutputTokens;
404842
+ if (manualOutputTokens === void 0 && outputText !== void 0) {
404843
+ const outputResult = await estimateAsync({ text: outputText, ...providerOptions });
404844
+ outputTokens = outputResult.estimatedTokens;
404845
+ }
404846
+ return estimateCost({ model, inputTokens, outputTokens, cachedInputTokens, mode });
404847
+ }
404848
+ function getTotalCost(model, inputTokens, outputTokens = 0) {
404849
+ const estimate2 = estimateCost({ model, inputTokens, outputTokens });
404850
+ return estimate2.costs.total;
404851
+ }
404852
+
404853
+ // src/estimator.ts
404854
+ function countCodePoints2(text) {
404855
+ let count = 0;
404856
+ for (const _char of text) {
404857
+ count++;
404858
+ }
404859
+ return count;
404860
+ }
404861
+ function estimate(input) {
404862
+ const {
404863
+ text,
404864
+ model,
404865
+ rounding = "ceil",
404866
+ tokenizer = "heuristic",
404867
+ outputTokens,
404868
+ cachedInputTokens,
404869
+ mode
404870
+ } = input;
404871
+ const config = getModelConfig(model);
404872
+ const tokenizerStr = tokenizer;
404873
+ if (tokenizerStr === "anthropic_count_tokens" || tokenizerStr === "gemini_count_tokens" || tokenizerStr === "gemma_sentencepiece") {
404874
+ throw new Error(`Tokenizer mode "${tokenizerStr}" requires async execution. Use estimateAsync(...) instead.`);
404875
+ }
404876
+ const characterCount = countCodePoints2(text);
404877
+ const isNonOpenAIModel3 = model.startsWith("claude-") || model.startsWith("gemini-");
404878
+ let estimatedTokens;
404879
+ let tokenizerModeUsed = "heuristic";
404880
+ let encodingUsed;
404881
+ const shouldTryExact = tokenizer === "openai_exact" || tokenizer === "auto";
404882
+ if (shouldTryExact && !isNonOpenAIModel3) {
404883
+ try {
404884
+ estimatedTokens = encode(text, { model, allowSpecial: "none" }).length;
404885
+ tokenizerModeUsed = "openai_exact";
404886
+ encodingUsed = getOpenAIEncoding({ model });
404887
+ } catch (error) {
404888
+ if (tokenizer === "openai_exact") {
404889
+ throw error;
404890
+ }
404891
+ }
404892
+ } else if (tokenizer === "openai_exact" && isNonOpenAIModel3) {
404893
+ throw new Error(
404894
+ `Tokenizer mode "openai_exact" requested for non-OpenAI model: "${model}"`
404895
+ );
404896
+ }
404897
+ if (estimatedTokens === void 0) {
404898
+ const rawTokens = characterCount / config.charsPerToken;
404899
+ switch (rounding) {
404900
+ case "floor":
404901
+ estimatedTokens = Math.floor(rawTokens);
404902
+ break;
404903
+ case "round":
404904
+ estimatedTokens = Math.round(rawTokens);
404905
+ break;
404906
+ case "ceil":
404907
+ default:
404908
+ estimatedTokens = Math.ceil(rawTokens);
404909
+ }
404910
+ tokenizerModeUsed = "heuristic";
404911
+ }
404912
+ const estimatedInputCost = estimatedTokens * config.inputCostPerMillion / 1e6;
404913
+ let estimatedOutputCost;
404914
+ let estimatedCachedInputCost;
404915
+ let estimatedTotalCost = estimatedInputCost;
404916
+ const hasCostInputs = outputTokens !== void 0 || cachedInputTokens !== void 0 || mode !== void 0;
404917
+ if (hasCostInputs) {
404918
+ try {
404919
+ const costResult = estimateCost({
404920
+ model,
404921
+ inputTokens: estimatedTokens,
404922
+ outputTokens,
404923
+ cachedInputTokens,
404924
+ mode
404925
+ });
404926
+ estimatedOutputCost = costResult.costs.output > 0 ? costResult.costs.output : void 0;
404927
+ estimatedCachedInputCost = costResult.costs.cachedInput > 0 ? costResult.costs.cachedInput : void 0;
404928
+ estimatedTotalCost = costResult.costs.total;
404929
+ } catch (error) {
404930
+ throw error;
404931
+ }
404932
+ }
404933
+ return {
404934
+ model,
404935
+ characterCount,
404936
+ estimatedTokens,
404937
+ estimatedInputCost,
404938
+ charsPerToken: config.charsPerToken,
404939
+ tokenizerMode: tokenizerModeUsed,
404940
+ encodingUsed,
404941
+ outputTokens,
404942
+ estimatedOutputCost,
404943
+ estimatedCachedInputCost,
404944
+ estimatedTotalCost
404945
+ };
404736
404946
  }
404737
404947
 
404738
404948
  // src/chat-token-constants.ts
@@ -405019,10 +405229,14 @@ function countChatCompletionTokens(input) {
405019
405229
  ensureSentencePieceModel,
405020
405230
  estimate,
405021
405231
  estimateAsync,
405232
+ estimateCost,
405233
+ estimateCostFromText,
405234
+ estimateCostFromTextAsync,
405022
405235
  getAvailableModels,
405023
405236
  getModelConfig,
405024
405237
  getOpenAIEncoding,
405025
405238
  getSentencePieceTokenizer,
405239
+ getTotalCost,
405026
405240
  loadSentencePieceTokenizer,
405027
405241
  parseModelProto
405028
405242
  });