ai-token-estimator 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -410,11 +410,17 @@ interface EstimateInput {
410
410
  model: string; // Model ID (e.g., 'gpt-4o', 'claude-opus-4.5')
411
411
  rounding?: 'ceil' | 'round' | 'floor'; // Rounding strategy (default: 'ceil')
412
412
  tokenizer?: 'heuristic' | 'openai_exact' | 'auto'; // Token counting strategy (default: 'heuristic')
413
+
414
+ // Extended cost estimation (optional)
415
+ outputTokens?: number; // Output tokens for cost calculation
416
+ cachedInputTokens?: number; // Cached input tokens (OpenAI only, must be <= estimatedTokens)
417
+ mode?: 'standard' | 'batch'; // Pricing mode (default: 'standard')
413
418
  }
414
419
  ```
415
420
 
416
421
  Note:
417
422
  - Provider-backed modes (`anthropic_count_tokens`, `gemini_count_tokens`, `gemma_sentencepiece`) are only supported in `estimateAsync()`.
423
+ - When `outputTokens`, `cachedInputTokens`, or `mode` is provided, the model must have the corresponding pricing available or an error is thrown.
418
424
 
419
425
  **Returns:**
420
426
 
@@ -423,10 +429,16 @@ interface EstimateOutput {
423
429
  model: string; // The model used
424
430
  characterCount: number; // Number of Unicode code points
425
431
  estimatedTokens: number; // Estimated token count (integer)
426
- estimatedInputCost: number; // Estimated cost in USD
432
+ estimatedInputCost: number; // Estimated input cost in USD
427
433
  charsPerToken: number; // The ratio used for this model
428
434
  tokenizerMode?: 'heuristic' | 'openai_exact' | 'auto'; // Which strategy was used
429
435
  encodingUsed?: string; // OpenAI encoding when using exact tokenization
436
+
437
+ // Extended cost fields (when cost inputs are provided)
438
+ outputTokens?: number; // Echoed from input
439
+ estimatedOutputCost?: number; // Output token cost in USD
440
+ estimatedCachedInputCost?: number; // Cached input cost in USD
441
+ estimatedTotalCost: number; // Total cost (input + output + cached)
430
442
  }
431
443
  ```
432
444
 
@@ -549,14 +561,99 @@ Encodes text into **OpenAI token IDs** using tiktoken-compatible BPE tokenizatio
549
561
 
550
562
  Decodes OpenAI token IDs back into text using the selected encoding/model.
551
563
 
564
+ ### `isWithinTokenLimit(text, tokenLimit, options?): false | number`
565
+
566
+ Checks if text is within a token limit with **early exit optimization**. Returns `false` if the limit is exceeded, or the actual token count if within limit.
567
+
568
+ This is significantly faster than full tokenization when the limit is exceeded early in the text (up to 1000x+ faster for large texts with small limits).
569
+
570
+ ```typescript
571
+ import { isWithinTokenLimit } from 'ai-token-estimator';
572
+
573
+ // Returns token count if within limit
574
+ const count = isWithinTokenLimit('Hello, world!', 100, { model: 'gpt-4o' });
575
+ if (count !== false) {
576
+ console.log(`Text has ${count} tokens`);
577
+ }
578
+
579
+ // Returns false if exceeds limit (with early exit)
580
+ const result = isWithinTokenLimit(longText, 10, { model: 'gpt-4o' });
581
+ if (result === false) {
582
+ console.log('Text exceeds 10 tokens');
583
+ }
584
+ ```
585
+
586
+ **Parameters:**
587
+
588
+ ```typescript
589
+ interface IsWithinTokenLimitOptions {
590
+ model?: string; // OpenAI model (e.g., 'gpt-4o')
591
+ encoding?: OpenAIEncoding; // Explicit encoding override
592
+ allowSpecial?: SpecialTokenHandling; // How to handle special tokens
593
+ }
594
+ ```
595
+
596
+ **Throws:**
597
+ - `Error` if `tokenLimit` is invalid (NaN, Infinity, negative, non-integer)
598
+ - `Error` if `model` is a known non-OpenAI model (claude-*, gemini-*)
599
+
600
+ ### `isChatWithinTokenLimit(input): false | number`
601
+
602
+ Checks if chat messages are within a token limit with **early exit optimization**. Returns `false` if exceeded, or the actual token count if within limit.
603
+
604
+ Uses the same token counting logic as `countChatCompletionTokens()` but exits early when the limit is exceeded.
605
+
606
+ ```typescript
607
+ import { isChatWithinTokenLimit } from 'ai-token-estimator';
608
+
609
+ const result = isChatWithinTokenLimit({
610
+ messages: [
611
+ { role: 'system', content: 'You are a helpful assistant.' },
612
+ { role: 'user', content: 'Hello!' }
613
+ ],
614
+ model: 'gpt-4o',
615
+ tokenLimit: 100,
616
+ functions: [{ name: 'get_weather', parameters: { type: 'object' } }],
617
+ });
618
+
619
+ if (result === false) {
620
+ console.log('Messages exceed token limit');
621
+ } else {
622
+ console.log(`Messages use ${result} tokens`);
623
+ }
624
+ ```
625
+
626
+ **Parameters:**
627
+
628
+ ```typescript
629
+ interface IsChatWithinTokenLimitInput {
630
+ messages: ChatMessage[];
631
+ model: string;
632
+ tokenLimit: number;
633
+ encoding?: OpenAIEncoding;
634
+ functions?: FunctionDefinition[];
635
+ function_call?: FunctionCallOption;
636
+ }
637
+ ```
638
+
639
+ **Throws:**
640
+ - `Error` if `tokenLimit` is invalid (NaN, Infinity, negative, non-integer)
641
+ - `Error` if model is not an OpenAI model (unless encoding override provided)
642
+ - `Error` if tools, tool_choice, tool_calls, or tool_call_id are present
643
+ - `Error` if any message has non-string content
644
+
552
645
  ### `getModelConfig(model: string): ModelConfig`
553
646
 
554
647
  Returns the configuration for a specific model. Throws if the model is not found.
555
648
 
556
649
  ```typescript
557
650
  interface ModelConfig {
558
- charsPerToken: number; // Characters per token ratio
559
- inputCostPerMillion: number; // USD per 1M input tokens
651
+ charsPerToken: number; // Characters per token ratio
652
+ inputCostPerMillion: number; // USD per 1M input tokens
653
+ outputCostPerMillion?: number; // USD per 1M output tokens (when available)
654
+ cachedInputCostPerMillion?: number; // USD per 1M cached input tokens (OpenAI)
655
+ batchInputCostPerMillion?: number; // USD per 1M batch input tokens (OpenAI)
656
+ batchOutputCostPerMillion?: number; // USD per 1M batch output tokens (OpenAI)
560
657
  }
561
658
  ```
562
659
 
@@ -564,6 +661,84 @@ interface ModelConfig {
564
661
 
565
662
  Read-only object containing all model configurations. Frozen to prevent runtime mutation.
566
663
 
664
+ ### Cost Estimation API
665
+
666
+ #### `estimateCost(options): CostEstimate`
667
+
668
+ Calculate cost from explicit token counts. Provides detailed cost breakdown for input, output, cached, and batch pricing.
669
+
670
+ ```typescript
671
+ import { estimateCost } from 'ai-token-estimator';
672
+
673
+ const result = estimateCost({
674
+ model: 'gpt-4o',
675
+ inputTokens: 1_000_000,
676
+ outputTokens: 500_000,
677
+ cachedInputTokens: 200_000, // optional
678
+ mode: 'standard', // or 'batch'
679
+ });
680
+
681
+ console.log(result);
682
+ // {
683
+ // model: 'gpt-4o',
684
+ // mode: 'standard',
685
+ // tokens: { input: 1000000, cachedInput: 200000, nonCachedInput: 800000, output: 500000 },
686
+ // costs: { input: 2.0, cachedInput: 0.25, output: 5.0, total: 7.25 },
687
+ // rates: { inputPerMillion: 2.5, outputPerMillion: 10.0, cachedInputPerMillion: 1.25, ... }
688
+ // }
689
+ ```
690
+
691
+ Throws if:
692
+ - Model is unknown
693
+ - Token counts are negative or non-integer
694
+ - `cachedInputTokens > inputTokens`
695
+ - Required pricing is missing (output/cached/batch)
696
+ - `mode: 'batch'` with `cachedInputTokens > 0`
697
+
698
+ #### `estimateCostFromText(options): CostEstimate`
699
+
700
+ Sync version that counts input tokens from text. Uses heuristic/exact tokenization based on model.
701
+
702
+ ```typescript
703
+ import { estimateCostFromText } from 'ai-token-estimator';
704
+
705
+ const result = estimateCostFromText({
706
+ model: 'gpt-4o',
707
+ inputText: 'Hello, world!',
708
+ outputText: 'Hi there!', // optional: auto-count output tokens
709
+ outputTokens: 100, // or: explicit output count (takes precedence)
710
+ cachedInputTokens: 0,
711
+ mode: 'standard',
712
+ });
713
+ ```
714
+
715
+ #### `estimateCostFromTextAsync(options): Promise<CostEstimate>`
716
+
717
+ Async version that supports provider-backed tokenizers for accurate counts.
718
+
719
+ ```typescript
720
+ import { estimateCostFromTextAsync } from 'ai-token-estimator';
721
+
722
+ const result = await estimateCostFromTextAsync({
723
+ model: 'claude-sonnet-4',
724
+ inputText: 'Hello, world!',
725
+ outputText: 'Hi there!',
726
+ tokenizer: 'anthropic_count_tokens',
727
+ anthropic: { apiKey: process.env.ANTHROPIC_API_KEY },
728
+ });
729
+ ```
730
+
731
+ #### `getTotalCost(model, inputTokens, outputTokens?): number`
732
+
733
+ Quick helper to get total cost for a model.
734
+
735
+ ```typescript
736
+ import { getTotalCost } from 'ai-token-estimator';
737
+
738
+ const cost = getTotalCost('gpt-4o', 1_000_000, 500_000);
739
+ // 7.5 (USD)
740
+ ```
741
+
567
742
  ### SentencePiece API
568
743
 
569
744
  #### `loadSentencePieceTokenizer(options: FileOptions): Promise<SentencePieceTokenizer>`