ai-token-estimator 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -561,6 +561,87 @@ Encodes text into **OpenAI token IDs** using tiktoken-compatible BPE tokenizatio
561
561
 
562
562
  Decodes OpenAI token IDs back into text using the selected encoding/model.
563
563
 
564
+ ### `isWithinTokenLimit(text, tokenLimit, options?): false | number`
565
+
566
+ Checks if text is within a token limit with **early exit optimization**. Returns `false` if the limit is exceeded, or the actual token count if within limit.
567
+
568
+ This is significantly faster than full tokenization when the limit is exceeded early in the text (up to 1000x+ faster for large texts with small limits).
569
+
570
+ ```typescript
571
+ import { isWithinTokenLimit } from 'ai-token-estimator';
572
+
573
+ // Returns token count if within limit
574
+ const count = isWithinTokenLimit('Hello, world!', 100, { model: 'gpt-4o' });
575
+ if (count !== false) {
576
+ console.log(`Text has ${count} tokens`);
577
+ }
578
+
579
+ // Returns false if exceeds limit (with early exit)
580
+ const result = isWithinTokenLimit(longText, 10, { model: 'gpt-4o' });
581
+ if (result === false) {
582
+ console.log('Text exceeds 10 tokens');
583
+ }
584
+ ```
585
+
586
+ **Parameters:**
587
+
588
+ ```typescript
589
+ interface IsWithinTokenLimitOptions {
590
+ model?: string; // OpenAI model (e.g., 'gpt-4o')
591
+ encoding?: OpenAIEncoding; // Explicit encoding override
592
+ allowSpecial?: SpecialTokenHandling; // How to handle special tokens
593
+ }
594
+ ```
595
+
596
+ **Throws:**
597
+ - `Error` if `tokenLimit` is invalid (NaN, Infinity, negative, non-integer)
598
+ - `Error` if `model` is a known non-OpenAI model (claude-*, gemini-*)
599
+
600
+ ### `isChatWithinTokenLimit(input): false | number`
601
+
602
+ Checks if chat messages are within a token limit with **early exit optimization**. Returns `false` if exceeded, or the actual token count if within limit.
603
+
604
+ Uses the same token counting logic as `countChatCompletionTokens()` but exits early when the limit is exceeded.
605
+
606
+ ```typescript
607
+ import { isChatWithinTokenLimit } from 'ai-token-estimator';
608
+
609
+ const result = isChatWithinTokenLimit({
610
+ messages: [
611
+ { role: 'system', content: 'You are a helpful assistant.' },
612
+ { role: 'user', content: 'Hello!' }
613
+ ],
614
+ model: 'gpt-4o',
615
+ tokenLimit: 100,
616
+ functions: [{ name: 'get_weather', parameters: { type: 'object' } }],
617
+ });
618
+
619
+ if (result === false) {
620
+ console.log('Messages exceed token limit');
621
+ } else {
622
+ console.log(`Messages use ${result} tokens`);
623
+ }
624
+ ```
625
+
626
+ **Parameters:**
627
+
628
+ ```typescript
629
+ interface IsChatWithinTokenLimitInput {
630
+ messages: ChatMessage[];
631
+ model: string;
632
+ tokenLimit: number;
633
+ encoding?: OpenAIEncoding;
634
+ functions?: FunctionDefinition[];
635
+ function_call?: FunctionCallOption;
636
+ }
637
+ ```
638
+
639
+ **Throws:**
640
+ - `Error` if `tokenLimit` is invalid (NaN, Infinity, negative, non-integer)
641
+ - `Error` if model is not an OpenAI model (unless encoding override provided)
642
+ - `Error` if tools, tool_choice, tool_calls, or tool_call_id are present
643
+ - `Error` if any message has non-string content
644
+
564
645
  ### `getModelConfig(model: string): ModelConfig`
565
646
 
566
647
  Returns the configuration for a specific model. Throws if the model is not found.
package/dist/index.cjs CHANGED
@@ -57,6 +57,8 @@ __export(index_exports, {
57
57
  getOpenAIEncoding: () => getOpenAIEncoding,
58
58
  getSentencePieceTokenizer: () => getSentencePieceTokenizer,
59
59
  getTotalCost: () => getTotalCost,
60
+ isChatWithinTokenLimit: () => isChatWithinTokenLimit,
61
+ isWithinTokenLimit: () => isWithinTokenLimit,
60
62
  loadSentencePieceTokenizer: () => loadSentencePieceTokenizer,
61
63
  parseModelProto: () => parseModelProto
62
64
  });
@@ -616,6 +618,83 @@ var BPETokenizer = class {
616
618
  }
617
619
  return tokens;
618
620
  }
621
+ /**
622
+ * Encode text with a token limit, returning early if the limit is exceeded.
623
+ * This is optimized for fast token-limit validation without full tokenization.
624
+ *
625
+ * @param text - The text to encode
626
+ * @param limit - Maximum number of tokens allowed
627
+ * @param allowedSpecial - Controls special token handling (same as encodeText)
628
+ * @returns Object with count and exceeded flag
629
+ */
630
+ encodeTextWithLimit(text, limit, allowedSpecial) {
631
+ if (!text) return { count: 0, exceeded: false };
632
+ if (limit < 0) return { count: 0, exceeded: true };
633
+ if (allowedSpecial === "skip") {
634
+ return this.encodeOrdinaryWithLimit(text, limit);
635
+ }
636
+ let count = 0;
637
+ if (this.specialTokenMap.size > 0) {
638
+ const parts = this.splitOnSpecialTokens(text, allowedSpecial);
639
+ for (const part of parts) {
640
+ if (part.isSpecial) {
641
+ count += 1;
642
+ if (count > limit) return { count, exceeded: true };
643
+ } else {
644
+ const result = this.encodeOrdinaryWithLimit(part.text, limit - count);
645
+ count += result.count;
646
+ if (result.exceeded) {
647
+ return { count, exceeded: true };
648
+ }
649
+ }
650
+ }
651
+ } else {
652
+ return this.encodeOrdinaryWithLimit(text, limit);
653
+ }
654
+ return { count, exceeded: false };
655
+ }
656
+ /**
657
+ * Incremental encoding with early exit.
658
+ * CRITICAL: Uses RegExp.exec() loop instead of text.match() to avoid
659
+ * allocating all pieces upfront. This enables true early exit.
660
+ */
661
+ encodeOrdinaryWithLimit(text, limit) {
662
+ if (!text) return { count: 0, exceeded: false };
663
+ if (limit < 0) return { count: 0, exceeded: true };
664
+ let count = 0;
665
+ const regex = new RegExp(
666
+ this.tokenSplitRegex.source,
667
+ this.tokenSplitRegex.flags.includes("g") ? this.tokenSplitRegex.flags : this.tokenSplitRegex.flags + "g"
668
+ );
669
+ let match;
670
+ while ((match = regex.exec(text)) !== null) {
671
+ const piece = match[0];
672
+ if (piece.length === 0) {
673
+ regex.lastIndex++;
674
+ continue;
675
+ }
676
+ const cached = this.getFromCache(piece);
677
+ if (cached) {
678
+ count += cached.length;
679
+ if (count > limit) return { count, exceeded: true };
680
+ continue;
681
+ }
682
+ const pieceBytes = this.textEncoder.encode(piece);
683
+ const key = bytesToLatin1(pieceBytes);
684
+ const directRank = this.encoder.get(key);
685
+ if (directRank !== void 0) {
686
+ count += 1;
687
+ this.addToCache(piece, [directRank]);
688
+ if (count > limit) return { count, exceeded: true };
689
+ continue;
690
+ }
691
+ const pieceTokens = this.mergeBytePairs(pieceBytes);
692
+ count += pieceTokens.length;
693
+ this.addToCache(piece, pieceTokens);
694
+ if (count > limit) return { count, exceeded: true };
695
+ }
696
+ return { count, exceeded: false };
697
+ }
619
698
  /**
620
699
  * Core BPE merge algorithm.
621
700
  */
@@ -401892,7 +401971,8 @@ function getTokenizer(encoding) {
401892
401971
  }
401893
401972
  return {
401894
401973
  encode: (text, allowedSpecial) => tokenizer.encodeText(text, allowedSpecial),
401895
- decode: (tokens) => tokenizer.decodeTokens(tokens)
401974
+ decode: (tokens) => tokenizer.decodeTokens(tokens),
401975
+ encodeTextWithLimit: (text, limit, allowedSpecial) => tokenizer.encodeTextWithLimit(text, limit, allowedSpecial)
401896
401976
  };
401897
401977
  }
401898
401978
  function resolveEncoding(options) {
@@ -401932,6 +402012,39 @@ function decode(tokens, options) {
401932
402012
  const api = getTokenizer(encoding);
401933
402013
  return api.decode(tokens);
401934
402014
  }
402015
+ function validateTokenLimit(tokenLimit) {
402016
+ if (!Number.isFinite(tokenLimit)) {
402017
+ throw new Error("tokenLimit must be a finite number");
402018
+ }
402019
+ if (!Number.isInteger(tokenLimit)) {
402020
+ throw new Error("tokenLimit must be an integer");
402021
+ }
402022
+ if (tokenLimit < 0) {
402023
+ throw new Error("tokenLimit must be non-negative");
402024
+ }
402025
+ }
402026
+ function rejectNonOpenAIModel(model) {
402027
+ if (!model) return;
402028
+ if (model.startsWith("claude-")) {
402029
+ throw new Error(
402030
+ `Model "${model}" is an Anthropic model. isWithinTokenLimit only supports OpenAI models. Use the Anthropic API's count_tokens endpoint via estimateAsync() instead.`
402031
+ );
402032
+ }
402033
+ if (model.startsWith("gemini-")) {
402034
+ throw new Error(
402035
+ `Model "${model}" is a Google model. isWithinTokenLimit only supports OpenAI models. Use the Gemini API's countTokens endpoint via estimateAsync() instead.`
402036
+ );
402037
+ }
402038
+ }
402039
+ function isWithinTokenLimit(text, tokenLimit, options) {
402040
+ validateTokenLimit(tokenLimit);
402041
+ rejectNonOpenAIModel(options?.model);
402042
+ const encoding = resolveEncoding(options);
402043
+ const api = getTokenizer(encoding);
402044
+ const allowedSpecial = resolveAllowedSpecial(options?.allowSpecial);
402045
+ const result = api.encodeTextWithLimit(text, tokenLimit, allowedSpecial);
402046
+ return result.exceeded ? false : result.count;
402047
+ }
401935
402048
 
401936
402049
  // src/token-counter.ts
401937
402050
  function isNonOpenAIModel(model) {
@@ -405208,6 +405321,125 @@ function countChatCompletionTokens(input) {
405208
405321
  }
405209
405322
  return result;
405210
405323
  }
405324
+ function validateTokenLimit2(tokenLimit) {
405325
+ if (!Number.isFinite(tokenLimit)) {
405326
+ throw new Error("tokenLimit must be a finite number");
405327
+ }
405328
+ if (!Number.isInteger(tokenLimit)) {
405329
+ throw new Error("tokenLimit must be an integer");
405330
+ }
405331
+ if (tokenLimit < 0) {
405332
+ throw new Error("tokenLimit must be non-negative");
405333
+ }
405334
+ }
405335
+ function isChatWithinTokenLimit(input) {
405336
+ const { messages, model, tokenLimit, encoding, functions, function_call } = input;
405337
+ validateTokenLimit2(tokenLimit);
405338
+ validateNoToolsApi(input);
405339
+ validateMessages(messages);
405340
+ validateOpenAIModel(model, encoding);
405341
+ const resolvedEncoding = encoding ?? getOpenAIEncoding({ model });
405342
+ const api = getTokenizer(resolvedEncoding);
405343
+ let count = COMPLETION_REQUEST_TOKEN_OVERHEAD;
405344
+ if (count > tokenLimit) return false;
405345
+ const hasFunctions = Boolean(functions?.length);
405346
+ const hasSystemMessage = messages.some((m) => m.role === "system");
405347
+ if (hasFunctions && functions) {
405348
+ const formatted = formatFunctionDefinitions(functions);
405349
+ const funcResult = api.encodeTextWithLimit(
405350
+ formatted,
405351
+ tokenLimit - count,
405352
+ "skip"
405353
+ );
405354
+ if (funcResult.exceeded) return false;
405355
+ let funcOverhead = funcResult.count + FUNCTION_DEFINITION_TOKEN_OVERHEAD;
405356
+ if (hasSystemMessage) {
405357
+ funcOverhead -= SYSTEM_FUNCTION_TOKEN_DEDUCTION;
405358
+ }
405359
+ count += funcOverhead;
405360
+ if (count > tokenLimit) return false;
405361
+ }
405362
+ if (function_call && function_call !== "auto") {
405363
+ if (function_call === "none") {
405364
+ count += FUNCTION_CALL_NONE_TOKEN_OVERHEAD;
405365
+ } else if (typeof function_call === "object" && function_call.name) {
405366
+ const fcNameResult = api.encodeTextWithLimit(
405367
+ function_call.name,
405368
+ tokenLimit - count,
405369
+ "skip"
405370
+ );
405371
+ if (fcNameResult.exceeded) return false;
405372
+ count += fcNameResult.count + FUNCTION_CALL_NAME_TOKEN_OVERHEAD;
405373
+ }
405374
+ if (count > tokenLimit) return false;
405375
+ }
405376
+ let systemPadded = false;
405377
+ for (const message of messages) {
405378
+ let overhead = MESSAGE_TOKEN_OVERHEAD;
405379
+ if (message.role) {
405380
+ const roleResult = api.encodeTextWithLimit(
405381
+ message.role,
405382
+ tokenLimit - count,
405383
+ "skip"
405384
+ );
405385
+ if (roleResult.exceeded) return false;
405386
+ count += roleResult.count;
405387
+ }
405388
+ let content = message.content ?? "";
405389
+ if (hasFunctions && message.role === "system" && !systemPadded) {
405390
+ if (content && !content.endsWith("\n")) {
405391
+ content = content + "\n";
405392
+ }
405393
+ systemPadded = true;
405394
+ }
405395
+ if (content) {
405396
+ const contentResult = api.encodeTextWithLimit(
405397
+ content,
405398
+ tokenLimit - count,
405399
+ "skip"
405400
+ );
405401
+ if (contentResult.exceeded) return false;
405402
+ count += contentResult.count;
405403
+ }
405404
+ if (message.name) {
405405
+ const nameResult = api.encodeTextWithLimit(
405406
+ message.name,
405407
+ tokenLimit - count,
405408
+ "skip"
405409
+ );
405410
+ if (nameResult.exceeded) return false;
405411
+ count += nameResult.count;
405412
+ overhead += MESSAGE_NAME_TOKEN_OVERHEAD;
405413
+ }
405414
+ if (message.function_call) {
405415
+ if (message.function_call.name) {
405416
+ const fcNameResult = api.encodeTextWithLimit(
405417
+ message.function_call.name,
405418
+ tokenLimit - count,
405419
+ "skip"
405420
+ );
405421
+ if (fcNameResult.exceeded) return false;
405422
+ count += fcNameResult.count;
405423
+ }
405424
+ if (message.function_call.arguments) {
405425
+ const fcArgsResult = api.encodeTextWithLimit(
405426
+ message.function_call.arguments,
405427
+ tokenLimit - count,
405428
+ "skip"
405429
+ );
405430
+ if (fcArgsResult.exceeded) return false;
405431
+ count += fcArgsResult.count;
405432
+ }
405433
+ overhead += FUNCTION_CALL_METADATA_TOKEN_OVERHEAD;
405434
+ }
405435
+ if (message.role === "function") {
405436
+ overhead -= FUNCTION_ROLE_TOKEN_DISCOUNT;
405437
+ }
405438
+ count += overhead;
405439
+ if (count > tokenLimit) return false;
405440
+ }
405441
+ return count;
405442
+ }
405211
405443
  // Annotate the CommonJS export names for ESM import in node:
405212
405444
  0 && (module.exports = {
405213
405445
  DEFAULT_MODELS,
@@ -405237,6 +405469,8 @@ function countChatCompletionTokens(input) {
405237
405469
  getOpenAIEncoding,
405238
405470
  getSentencePieceTokenizer,
405239
405471
  getTotalCost,
405472
+ isChatWithinTokenLimit,
405473
+ isWithinTokenLimit,
405240
405474
  loadSentencePieceTokenizer,
405241
405475
  parseModelProto
405242
405476
  });
package/dist/index.d.cts CHANGED
@@ -55,6 +55,58 @@ declare function encode(text: string, options?: EncodeOptions): number[];
55
55
  * Decode OpenAI token IDs into text using tiktoken-compatible BPE encoding.
56
56
  */
57
57
  declare function decode(tokens: Iterable<number>, options?: Pick<EncodeOptions, 'encoding' | 'model'>): string;
58
+ /**
59
+ * Options for isWithinTokenLimit.
60
+ */
61
+ interface IsWithinTokenLimitOptions {
62
+ /**
63
+ * Explicit OpenAI encoding override.
64
+ * When provided, this takes precedence over `model`.
65
+ */
66
+ encoding?: OpenAIEncoding;
67
+ /**
68
+ * OpenAI model ID used to select the appropriate encoding.
69
+ * Note: Non-OpenAI models (claude-*, gemini-*) are rejected.
70
+ */
71
+ model?: string;
72
+ /**
73
+ * How special tokens are handled.
74
+ * - `none_raise` (default): throw if special tokens appear
75
+ * - `none`: treat special tokens as regular text
76
+ * - `all`: allow special tokens and encode them as special token IDs
77
+ */
78
+ allowSpecial?: SpecialTokenHandling;
79
+ }
80
+ /**
81
+ * Check if text is within a token limit, with early exit optimization.
82
+ *
83
+ * Returns `false` if the token count exceeds the limit, otherwise returns the
84
+ * actual token count. This is significantly faster than full tokenization when
85
+ * the limit is exceeded early in the text.
86
+ *
87
+ * @param text - The text to check
88
+ * @param tokenLimit - Maximum allowed tokens (must be non-negative finite integer)
89
+ * @param options - Encoding options
90
+ * @returns `false` if exceeded, or the actual token count if within limit
91
+ * @throws Error if tokenLimit is invalid (NaN, Infinity, negative, non-integer)
92
+ * @throws Error if model is a known non-OpenAI model (claude-*, gemini-*)
93
+ *
94
+ * @example
95
+ * ```typescript
96
+ * // Returns token count if within limit
97
+ * const count = isWithinTokenLimit('Hello, world!', 100, { model: 'gpt-4o' });
98
+ * if (count !== false) {
99
+ * console.log(`Text has ${count} tokens`);
100
+ * }
101
+ *
102
+ * // Returns false if exceeds limit
103
+ * const result = isWithinTokenLimit(longText, 10, { model: 'gpt-4o' });
104
+ * if (result === false) {
105
+ * console.log('Text exceeds 10 tokens');
106
+ * }
107
+ * ```
108
+ */
109
+ declare function isWithinTokenLimit(text: string, tokenLimit: number, options?: IsWithinTokenLimitOptions): false | number;
58
110
 
59
111
  /**
60
112
  * Configuration for a specific LLM model.
@@ -542,6 +594,52 @@ declare function countTokens(input: TokenCountInput): TokenCountOutput;
542
594
  * ```
543
595
  */
544
596
  declare function countChatCompletionTokens(input: ChatCompletionTokenCountInput): ChatCompletionTokenCountOutput;
597
+ /**
598
+ * Input for isChatWithinTokenLimit.
599
+ * Object-style input to match countChatCompletionTokens API.
600
+ */
601
+ interface IsChatWithinTokenLimitInput {
602
+ messages: ChatMessage[];
603
+ model: string;
604
+ tokenLimit: number;
605
+ encoding?: OpenAIEncoding;
606
+ functions?: FunctionDefinition[];
607
+ function_call?: FunctionCallOption;
608
+ }
609
+ /**
610
+ * Check if chat messages are within a token limit, with early exit optimization.
611
+ *
612
+ * Uses object-style input to match countChatCompletionTokens API.
613
+ * Returns `false` if the token count exceeds the limit, otherwise returns
614
+ * the actual token count.
615
+ *
616
+ * This is significantly faster than full tokenization when the limit is
617
+ * exceeded early in the input.
618
+ *
619
+ * @throws {Error} If tokenLimit is invalid (NaN, Infinity, negative, non-integer)
620
+ * @throws {Error} If model is not an OpenAI model (unless encoding override provided)
621
+ * @throws {Error} If tools, tool_choice, tool_calls, or tool_call_id are present
622
+ * @throws {Error} If any message has non-string content (arrays, numbers, objects)
623
+ *
624
+ * @example
625
+ * ```typescript
626
+ * const result = isChatWithinTokenLimit({
627
+ * messages: [
628
+ * { role: 'system', content: 'You are a helpful assistant.' },
629
+ * { role: 'user', content: 'Hello!' }
630
+ * ],
631
+ * model: 'gpt-4o',
632
+ * tokenLimit: 100,
633
+ * });
634
+ *
635
+ * if (result === false) {
636
+ * console.log('Messages exceed token limit');
637
+ * } else {
638
+ * console.log(`Messages use ${result} tokens`);
639
+ * }
640
+ * ```
641
+ */
642
+ declare function isChatWithinTokenLimit(input: IsChatWithinTokenLimitInput): false | number;
545
643
 
546
644
  interface AnthropicCountTokensParams {
547
645
  /** Claude model id, e.g. `claude-sonnet-4-5` */
@@ -818,4 +916,4 @@ declare function clearModelCache(): void;
818
916
  */
819
917
  declare function parseModelProto(buffer: Uint8Array): ModelProto;
820
918
 
821
- export { type AnthropicCountTokensParams, type ChatCompletionTokenCountInput, type ChatCompletionTokenCountOutput, type ChatMessage, type CostEstimate, DEFAULT_MODELS, type DataOptions, type DownloadOptions, type EncodeOptions, type EstimateAsyncInput, type EstimateCostFromTextAsyncOptions, type EstimateCostFromTextOptions, type EstimateCostInput, type EstimateInput, type EstimateOutput, type FileOptions, type FunctionCallOption, type FunctionDefinition, type FunctionParameterProperty, type FunctionParameters, type GeminiCountTokensParams, type GemmaSentencePieceCountTokensParams, type KnownTokenizer, LAST_UPDATED, type ModelConfig, type ModelInfo, type ModelProto, type NormalizerSpec, type OpenAIEncoding, type SentencePiece, type SentencePieceTokenizer, type SpecialTokenHandling, type TokenCountInput, type TokenCountOutput, type TokenizerMode, type TokenizerModeAsync, type TrainerSpec, clearModelCache, countAnthropicInputTokens, countChatCompletionTokens, countGeminiTokens, countGemmaSentencePieceTokens, countSentencePieceTokens, countSentencePieceTokensAsync, countTokens, decode, decodeSentencePiece, decodeSentencePieceAsync, encode, encodeSentencePiece, encodeSentencePieceAsync, ensureSentencePieceModel, estimate, estimateAsync, estimateCost, estimateCostFromText, estimateCostFromTextAsync, getAvailableModels, getModelConfig, getOpenAIEncoding, getSentencePieceTokenizer, getTotalCost, loadSentencePieceTokenizer, parseModelProto };
919
+ export { type AnthropicCountTokensParams, type ChatCompletionTokenCountInput, type ChatCompletionTokenCountOutput, type ChatMessage, type CostEstimate, DEFAULT_MODELS, type DataOptions, type DownloadOptions, type EncodeOptions, type EstimateAsyncInput, type EstimateCostFromTextAsyncOptions, type EstimateCostFromTextOptions, type EstimateCostInput, type EstimateInput, type EstimateOutput, type FileOptions, type FunctionCallOption, type FunctionDefinition, type FunctionParameterProperty, type FunctionParameters, type GeminiCountTokensParams, type GemmaSentencePieceCountTokensParams, type IsChatWithinTokenLimitInput, type IsWithinTokenLimitOptions, type KnownTokenizer, LAST_UPDATED, type ModelConfig, type ModelInfo, type ModelProto, type NormalizerSpec, type OpenAIEncoding, type SentencePiece, type SentencePieceTokenizer, type SpecialTokenHandling, type TokenCountInput, type TokenCountOutput, type TokenizerMode, type TokenizerModeAsync, type TrainerSpec, clearModelCache, countAnthropicInputTokens, countChatCompletionTokens, countGeminiTokens, countGemmaSentencePieceTokens, countSentencePieceTokens, countSentencePieceTokensAsync, countTokens, decode, decodeSentencePiece, decodeSentencePieceAsync, encode, encodeSentencePiece, encodeSentencePieceAsync, ensureSentencePieceModel, estimate, estimateAsync, estimateCost, estimateCostFromText, estimateCostFromTextAsync, getAvailableModels, getModelConfig, getOpenAIEncoding, getSentencePieceTokenizer, getTotalCost, isChatWithinTokenLimit, isWithinTokenLimit, loadSentencePieceTokenizer, parseModelProto };
package/dist/index.d.ts CHANGED
@@ -55,6 +55,58 @@ declare function encode(text: string, options?: EncodeOptions): number[];
55
55
  * Decode OpenAI token IDs into text using tiktoken-compatible BPE encoding.
56
56
  */
57
57
  declare function decode(tokens: Iterable<number>, options?: Pick<EncodeOptions, 'encoding' | 'model'>): string;
58
+ /**
59
+ * Options for isWithinTokenLimit.
60
+ */
61
+ interface IsWithinTokenLimitOptions {
62
+ /**
63
+ * Explicit OpenAI encoding override.
64
+ * When provided, this takes precedence over `model`.
65
+ */
66
+ encoding?: OpenAIEncoding;
67
+ /**
68
+ * OpenAI model ID used to select the appropriate encoding.
69
+ * Note: Non-OpenAI models (claude-*, gemini-*) are rejected.
70
+ */
71
+ model?: string;
72
+ /**
73
+ * How special tokens are handled.
74
+ * - `none_raise` (default): throw if special tokens appear
75
+ * - `none`: treat special tokens as regular text
76
+ * - `all`: allow special tokens and encode them as special token IDs
77
+ */
78
+ allowSpecial?: SpecialTokenHandling;
79
+ }
80
+ /**
81
+ * Check if text is within a token limit, with early exit optimization.
82
+ *
83
+ * Returns `false` if the token count exceeds the limit, otherwise returns the
84
+ * actual token count. This is significantly faster than full tokenization when
85
+ * the limit is exceeded early in the text.
86
+ *
87
+ * @param text - The text to check
88
+ * @param tokenLimit - Maximum allowed tokens (must be non-negative finite integer)
89
+ * @param options - Encoding options
90
+ * @returns `false` if exceeded, or the actual token count if within limit
91
+ * @throws Error if tokenLimit is invalid (NaN, Infinity, negative, non-integer)
92
+ * @throws Error if model is a known non-OpenAI model (claude-*, gemini-*)
93
+ *
94
+ * @example
95
+ * ```typescript
96
+ * // Returns token count if within limit
97
+ * const count = isWithinTokenLimit('Hello, world!', 100, { model: 'gpt-4o' });
98
+ * if (count !== false) {
99
+ * console.log(`Text has ${count} tokens`);
100
+ * }
101
+ *
102
+ * // Returns false if exceeds limit
103
+ * const result = isWithinTokenLimit(longText, 10, { model: 'gpt-4o' });
104
+ * if (result === false) {
105
+ * console.log('Text exceeds 10 tokens');
106
+ * }
107
+ * ```
108
+ */
109
+ declare function isWithinTokenLimit(text: string, tokenLimit: number, options?: IsWithinTokenLimitOptions): false | number;
58
110
 
59
111
  /**
60
112
  * Configuration for a specific LLM model.
@@ -542,6 +594,52 @@ declare function countTokens(input: TokenCountInput): TokenCountOutput;
542
594
  * ```
543
595
  */
544
596
  declare function countChatCompletionTokens(input: ChatCompletionTokenCountInput): ChatCompletionTokenCountOutput;
597
+ /**
598
+ * Input for isChatWithinTokenLimit.
599
+ * Object-style input to match countChatCompletionTokens API.
600
+ */
601
+ interface IsChatWithinTokenLimitInput {
602
+ messages: ChatMessage[];
603
+ model: string;
604
+ tokenLimit: number;
605
+ encoding?: OpenAIEncoding;
606
+ functions?: FunctionDefinition[];
607
+ function_call?: FunctionCallOption;
608
+ }
609
+ /**
610
+ * Check if chat messages are within a token limit, with early exit optimization.
611
+ *
612
+ * Uses object-style input to match countChatCompletionTokens API.
613
+ * Returns `false` if the token count exceeds the limit, otherwise returns
614
+ * the actual token count.
615
+ *
616
+ * This is significantly faster than full tokenization when the limit is
617
+ * exceeded early in the input.
618
+ *
619
+ * @throws {Error} If tokenLimit is invalid (NaN, Infinity, negative, non-integer)
620
+ * @throws {Error} If model is not an OpenAI model (unless encoding override provided)
621
+ * @throws {Error} If tools, tool_choice, tool_calls, or tool_call_id are present
622
+ * @throws {Error} If any message has non-string content (arrays, numbers, objects)
623
+ *
624
+ * @example
625
+ * ```typescript
626
+ * const result = isChatWithinTokenLimit({
627
+ * messages: [
628
+ * { role: 'system', content: 'You are a helpful assistant.' },
629
+ * { role: 'user', content: 'Hello!' }
630
+ * ],
631
+ * model: 'gpt-4o',
632
+ * tokenLimit: 100,
633
+ * });
634
+ *
635
+ * if (result === false) {
636
+ * console.log('Messages exceed token limit');
637
+ * } else {
638
+ * console.log(`Messages use ${result} tokens`);
639
+ * }
640
+ * ```
641
+ */
642
+ declare function isChatWithinTokenLimit(input: IsChatWithinTokenLimitInput): false | number;
545
643
 
546
644
  interface AnthropicCountTokensParams {
547
645
  /** Claude model id, e.g. `claude-sonnet-4-5` */
@@ -818,4 +916,4 @@ declare function clearModelCache(): void;
818
916
  */
819
917
  declare function parseModelProto(buffer: Uint8Array): ModelProto;
820
918
 
821
- export { type AnthropicCountTokensParams, type ChatCompletionTokenCountInput, type ChatCompletionTokenCountOutput, type ChatMessage, type CostEstimate, DEFAULT_MODELS, type DataOptions, type DownloadOptions, type EncodeOptions, type EstimateAsyncInput, type EstimateCostFromTextAsyncOptions, type EstimateCostFromTextOptions, type EstimateCostInput, type EstimateInput, type EstimateOutput, type FileOptions, type FunctionCallOption, type FunctionDefinition, type FunctionParameterProperty, type FunctionParameters, type GeminiCountTokensParams, type GemmaSentencePieceCountTokensParams, type KnownTokenizer, LAST_UPDATED, type ModelConfig, type ModelInfo, type ModelProto, type NormalizerSpec, type OpenAIEncoding, type SentencePiece, type SentencePieceTokenizer, type SpecialTokenHandling, type TokenCountInput, type TokenCountOutput, type TokenizerMode, type TokenizerModeAsync, type TrainerSpec, clearModelCache, countAnthropicInputTokens, countChatCompletionTokens, countGeminiTokens, countGemmaSentencePieceTokens, countSentencePieceTokens, countSentencePieceTokensAsync, countTokens, decode, decodeSentencePiece, decodeSentencePieceAsync, encode, encodeSentencePiece, encodeSentencePieceAsync, ensureSentencePieceModel, estimate, estimateAsync, estimateCost, estimateCostFromText, estimateCostFromTextAsync, getAvailableModels, getModelConfig, getOpenAIEncoding, getSentencePieceTokenizer, getTotalCost, loadSentencePieceTokenizer, parseModelProto };
919
+ export { type AnthropicCountTokensParams, type ChatCompletionTokenCountInput, type ChatCompletionTokenCountOutput, type ChatMessage, type CostEstimate, DEFAULT_MODELS, type DataOptions, type DownloadOptions, type EncodeOptions, type EstimateAsyncInput, type EstimateCostFromTextAsyncOptions, type EstimateCostFromTextOptions, type EstimateCostInput, type EstimateInput, type EstimateOutput, type FileOptions, type FunctionCallOption, type FunctionDefinition, type FunctionParameterProperty, type FunctionParameters, type GeminiCountTokensParams, type GemmaSentencePieceCountTokensParams, type IsChatWithinTokenLimitInput, type IsWithinTokenLimitOptions, type KnownTokenizer, LAST_UPDATED, type ModelConfig, type ModelInfo, type ModelProto, type NormalizerSpec, type OpenAIEncoding, type SentencePiece, type SentencePieceTokenizer, type SpecialTokenHandling, type TokenCountInput, type TokenCountOutput, type TokenizerMode, type TokenizerModeAsync, type TrainerSpec, clearModelCache, countAnthropicInputTokens, countChatCompletionTokens, countGeminiTokens, countGemmaSentencePieceTokens, countSentencePieceTokens, countSentencePieceTokensAsync, countTokens, decode, decodeSentencePiece, decodeSentencePieceAsync, encode, encodeSentencePiece, encodeSentencePieceAsync, ensureSentencePieceModel, estimate, estimateAsync, estimateCost, estimateCostFromText, estimateCostFromTextAsync, getAvailableModels, getModelConfig, getOpenAIEncoding, getSentencePieceTokenizer, getTotalCost, isChatWithinTokenLimit, isWithinTokenLimit, loadSentencePieceTokenizer, parseModelProto };
package/dist/index.js CHANGED
@@ -552,6 +552,83 @@ var BPETokenizer = class {
552
552
  }
553
553
  return tokens;
554
554
  }
555
+ /**
556
+ * Encode text with a token limit, returning early if the limit is exceeded.
557
+ * This is optimized for fast token-limit validation without full tokenization.
558
+ *
559
+ * @param text - The text to encode
560
+ * @param limit - Maximum number of tokens allowed
561
+ * @param allowedSpecial - Controls special token handling (same as encodeText)
562
+ * @returns Object with count and exceeded flag
563
+ */
564
+ encodeTextWithLimit(text, limit, allowedSpecial) {
565
+ if (!text) return { count: 0, exceeded: false };
566
+ if (limit < 0) return { count: 0, exceeded: true };
567
+ if (allowedSpecial === "skip") {
568
+ return this.encodeOrdinaryWithLimit(text, limit);
569
+ }
570
+ let count = 0;
571
+ if (this.specialTokenMap.size > 0) {
572
+ const parts = this.splitOnSpecialTokens(text, allowedSpecial);
573
+ for (const part of parts) {
574
+ if (part.isSpecial) {
575
+ count += 1;
576
+ if (count > limit) return { count, exceeded: true };
577
+ } else {
578
+ const result = this.encodeOrdinaryWithLimit(part.text, limit - count);
579
+ count += result.count;
580
+ if (result.exceeded) {
581
+ return { count, exceeded: true };
582
+ }
583
+ }
584
+ }
585
+ } else {
586
+ return this.encodeOrdinaryWithLimit(text, limit);
587
+ }
588
+ return { count, exceeded: false };
589
+ }
590
+ /**
591
+ * Incremental encoding with early exit.
592
+ * CRITICAL: Uses RegExp.exec() loop instead of text.match() to avoid
593
+ * allocating all pieces upfront. This enables true early exit.
594
+ */
595
+ encodeOrdinaryWithLimit(text, limit) {
596
+ if (!text) return { count: 0, exceeded: false };
597
+ if (limit < 0) return { count: 0, exceeded: true };
598
+ let count = 0;
599
+ const regex = new RegExp(
600
+ this.tokenSplitRegex.source,
601
+ this.tokenSplitRegex.flags.includes("g") ? this.tokenSplitRegex.flags : this.tokenSplitRegex.flags + "g"
602
+ );
603
+ let match;
604
+ while ((match = regex.exec(text)) !== null) {
605
+ const piece = match[0];
606
+ if (piece.length === 0) {
607
+ regex.lastIndex++;
608
+ continue;
609
+ }
610
+ const cached = this.getFromCache(piece);
611
+ if (cached) {
612
+ count += cached.length;
613
+ if (count > limit) return { count, exceeded: true };
614
+ continue;
615
+ }
616
+ const pieceBytes = this.textEncoder.encode(piece);
617
+ const key = bytesToLatin1(pieceBytes);
618
+ const directRank = this.encoder.get(key);
619
+ if (directRank !== void 0) {
620
+ count += 1;
621
+ this.addToCache(piece, [directRank]);
622
+ if (count > limit) return { count, exceeded: true };
623
+ continue;
624
+ }
625
+ const pieceTokens = this.mergeBytePairs(pieceBytes);
626
+ count += pieceTokens.length;
627
+ this.addToCache(piece, pieceTokens);
628
+ if (count > limit) return { count, exceeded: true };
629
+ }
630
+ return { count, exceeded: false };
631
+ }
555
632
  /**
556
633
  * Core BPE merge algorithm.
557
634
  */
@@ -401828,7 +401905,8 @@ function getTokenizer(encoding) {
401828
401905
  }
401829
401906
  return {
401830
401907
  encode: (text, allowedSpecial) => tokenizer.encodeText(text, allowedSpecial),
401831
- decode: (tokens) => tokenizer.decodeTokens(tokens)
401908
+ decode: (tokens) => tokenizer.decodeTokens(tokens),
401909
+ encodeTextWithLimit: (text, limit, allowedSpecial) => tokenizer.encodeTextWithLimit(text, limit, allowedSpecial)
401832
401910
  };
401833
401911
  }
401834
401912
  function resolveEncoding(options) {
@@ -401868,6 +401946,39 @@ function decode(tokens, options) {
401868
401946
  const api = getTokenizer(encoding);
401869
401947
  return api.decode(tokens);
401870
401948
  }
401949
+ function validateTokenLimit(tokenLimit) {
401950
+ if (!Number.isFinite(tokenLimit)) {
401951
+ throw new Error("tokenLimit must be a finite number");
401952
+ }
401953
+ if (!Number.isInteger(tokenLimit)) {
401954
+ throw new Error("tokenLimit must be an integer");
401955
+ }
401956
+ if (tokenLimit < 0) {
401957
+ throw new Error("tokenLimit must be non-negative");
401958
+ }
401959
+ }
401960
+ function rejectNonOpenAIModel(model) {
401961
+ if (!model) return;
401962
+ if (model.startsWith("claude-")) {
401963
+ throw new Error(
401964
+ `Model "${model}" is an Anthropic model. isWithinTokenLimit only supports OpenAI models. Use the Anthropic API's count_tokens endpoint via estimateAsync() instead.`
401965
+ );
401966
+ }
401967
+ if (model.startsWith("gemini-")) {
401968
+ throw new Error(
401969
+ `Model "${model}" is a Google model. isWithinTokenLimit only supports OpenAI models. Use the Gemini API's countTokens endpoint via estimateAsync() instead.`
401970
+ );
401971
+ }
401972
+ }
401973
+ function isWithinTokenLimit(text, tokenLimit, options) {
401974
+ validateTokenLimit(tokenLimit);
401975
+ rejectNonOpenAIModel(options?.model);
401976
+ const encoding = resolveEncoding(options);
401977
+ const api = getTokenizer(encoding);
401978
+ const allowedSpecial = resolveAllowedSpecial(options?.allowSpecial);
401979
+ const result = api.encodeTextWithLimit(text, tokenLimit, allowedSpecial);
401980
+ return result.exceeded ? false : result.count;
401981
+ }
401871
401982
 
401872
401983
  // src/token-counter.ts
401873
401984
  function isNonOpenAIModel(model) {
@@ -405144,6 +405255,125 @@ function countChatCompletionTokens(input) {
405144
405255
  }
405145
405256
  return result;
405146
405257
  }
405258
+ function validateTokenLimit2(tokenLimit) {
405259
+ if (!Number.isFinite(tokenLimit)) {
405260
+ throw new Error("tokenLimit must be a finite number");
405261
+ }
405262
+ if (!Number.isInteger(tokenLimit)) {
405263
+ throw new Error("tokenLimit must be an integer");
405264
+ }
405265
+ if (tokenLimit < 0) {
405266
+ throw new Error("tokenLimit must be non-negative");
405267
+ }
405268
+ }
405269
+ function isChatWithinTokenLimit(input) {
405270
+ const { messages, model, tokenLimit, encoding, functions, function_call } = input;
405271
+ validateTokenLimit2(tokenLimit);
405272
+ validateNoToolsApi(input);
405273
+ validateMessages(messages);
405274
+ validateOpenAIModel(model, encoding);
405275
+ const resolvedEncoding = encoding ?? getOpenAIEncoding({ model });
405276
+ const api = getTokenizer(resolvedEncoding);
405277
+ let count = COMPLETION_REQUEST_TOKEN_OVERHEAD;
405278
+ if (count > tokenLimit) return false;
405279
+ const hasFunctions = Boolean(functions?.length);
405280
+ const hasSystemMessage = messages.some((m) => m.role === "system");
405281
+ if (hasFunctions && functions) {
405282
+ const formatted = formatFunctionDefinitions(functions);
405283
+ const funcResult = api.encodeTextWithLimit(
405284
+ formatted,
405285
+ tokenLimit - count,
405286
+ "skip"
405287
+ );
405288
+ if (funcResult.exceeded) return false;
405289
+ let funcOverhead = funcResult.count + FUNCTION_DEFINITION_TOKEN_OVERHEAD;
405290
+ if (hasSystemMessage) {
405291
+ funcOverhead -= SYSTEM_FUNCTION_TOKEN_DEDUCTION;
405292
+ }
405293
+ count += funcOverhead;
405294
+ if (count > tokenLimit) return false;
405295
+ }
405296
+ if (function_call && function_call !== "auto") {
405297
+ if (function_call === "none") {
405298
+ count += FUNCTION_CALL_NONE_TOKEN_OVERHEAD;
405299
+ } else if (typeof function_call === "object" && function_call.name) {
405300
+ const fcNameResult = api.encodeTextWithLimit(
405301
+ function_call.name,
405302
+ tokenLimit - count,
405303
+ "skip"
405304
+ );
405305
+ if (fcNameResult.exceeded) return false;
405306
+ count += fcNameResult.count + FUNCTION_CALL_NAME_TOKEN_OVERHEAD;
405307
+ }
405308
+ if (count > tokenLimit) return false;
405309
+ }
405310
+ let systemPadded = false;
405311
+ for (const message of messages) {
405312
+ let overhead = MESSAGE_TOKEN_OVERHEAD;
405313
+ if (message.role) {
405314
+ const roleResult = api.encodeTextWithLimit(
405315
+ message.role,
405316
+ tokenLimit - count,
405317
+ "skip"
405318
+ );
405319
+ if (roleResult.exceeded) return false;
405320
+ count += roleResult.count;
405321
+ }
405322
+ let content = message.content ?? "";
405323
+ if (hasFunctions && message.role === "system" && !systemPadded) {
405324
+ if (content && !content.endsWith("\n")) {
405325
+ content = content + "\n";
405326
+ }
405327
+ systemPadded = true;
405328
+ }
405329
+ if (content) {
405330
+ const contentResult = api.encodeTextWithLimit(
405331
+ content,
405332
+ tokenLimit - count,
405333
+ "skip"
405334
+ );
405335
+ if (contentResult.exceeded) return false;
405336
+ count += contentResult.count;
405337
+ }
405338
+ if (message.name) {
405339
+ const nameResult = api.encodeTextWithLimit(
405340
+ message.name,
405341
+ tokenLimit - count,
405342
+ "skip"
405343
+ );
405344
+ if (nameResult.exceeded) return false;
405345
+ count += nameResult.count;
405346
+ overhead += MESSAGE_NAME_TOKEN_OVERHEAD;
405347
+ }
405348
+ if (message.function_call) {
405349
+ if (message.function_call.name) {
405350
+ const fcNameResult = api.encodeTextWithLimit(
405351
+ message.function_call.name,
405352
+ tokenLimit - count,
405353
+ "skip"
405354
+ );
405355
+ if (fcNameResult.exceeded) return false;
405356
+ count += fcNameResult.count;
405357
+ }
405358
+ if (message.function_call.arguments) {
405359
+ const fcArgsResult = api.encodeTextWithLimit(
405360
+ message.function_call.arguments,
405361
+ tokenLimit - count,
405362
+ "skip"
405363
+ );
405364
+ if (fcArgsResult.exceeded) return false;
405365
+ count += fcArgsResult.count;
405366
+ }
405367
+ overhead += FUNCTION_CALL_METADATA_TOKEN_OVERHEAD;
405368
+ }
405369
+ if (message.role === "function") {
405370
+ overhead -= FUNCTION_ROLE_TOKEN_DISCOUNT;
405371
+ }
405372
+ count += overhead;
405373
+ if (count > tokenLimit) return false;
405374
+ }
405375
+ return count;
405376
+ }
405147
405377
  export {
405148
405378
  DEFAULT_MODELS,
405149
405379
  LAST_UPDATED,
@@ -405172,6 +405402,8 @@ export {
405172
405402
  getOpenAIEncoding,
405173
405403
  getSentencePieceTokenizer,
405174
405404
  getTotalCost,
405405
+ isChatWithinTokenLimit,
405406
+ isWithinTokenLimit,
405175
405407
  loadSentencePieceTokenizer,
405176
405408
  parseModelProto
405177
405409
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ai-token-estimator",
3
- "version": "1.4.0",
3
+ "version": "1.5.0",
4
4
  "description": "Estimate and count tokens (incl. exact OpenAI BPE) and input costs for LLM API calls",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",