npm - ai-token-estimator - Versions diffs - 1.4.0 → 1.5.0 - Mend

ai-token-estimator 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md CHANGED Viewed

@@ -561,6 +561,87 @@ Encodes text into **OpenAI token IDs** using tiktoken-compatible BPE tokenizatio
 Decodes OpenAI token IDs back into text using the selected encoding/model.
+### `isWithinTokenLimit(text, tokenLimit, options?): false | number`
+Checks if text is within a token limit with **early exit optimization**. Returns `false` if the limit is exceeded, or the actual token count if within limit.
+This is significantly faster than full tokenization when the limit is exceeded early in the text (up to 1000x+ faster for large texts with small limits).
+```typescript
+import { isWithinTokenLimit } from 'ai-token-estimator';
+// Returns token count if within limit
+const count = isWithinTokenLimit('Hello, world!', 100, { model: 'gpt-4o' });
+if (count !== false) {
+  console.log(`Text has ${count} tokens`);
+}
+// Returns false if exceeds limit (with early exit)
+const result = isWithinTokenLimit(longText, 10, { model: 'gpt-4o' });
+if (result === false) {
+  console.log('Text exceeds 10 tokens');
+}
+```
+**Parameters:**
+```typescript
+interface IsWithinTokenLimitOptions {
+  model?: string;              // OpenAI model (e.g., 'gpt-4o')
+  encoding?: OpenAIEncoding;   // Explicit encoding override
+  allowSpecial?: SpecialTokenHandling;  // How to handle special tokens
+}
+```
+**Throws:**
+- `Error` if `tokenLimit` is invalid (NaN, Infinity, negative, non-integer)
+- `Error` if `model` is a known non-OpenAI model (claude-*, gemini-*)
+### `isChatWithinTokenLimit(input): false | number`
+Checks if chat messages are within a token limit with **early exit optimization**. Returns `false` if exceeded, or the actual token count if within limit.
+Uses the same token counting logic as `countChatCompletionTokens()` but exits early when the limit is exceeded.
+```typescript
+import { isChatWithinTokenLimit } from 'ai-token-estimator';
+const result = isChatWithinTokenLimit({
+  messages: [
+    { role: 'system', content: 'You are a helpful assistant.' },
+    { role: 'user', content: 'Hello!' }
+  ],
+  model: 'gpt-4o',
+  tokenLimit: 100,
+  functions: [{ name: 'get_weather', parameters: { type: 'object' } }],
+});
+if (result === false) {
+  console.log('Messages exceed token limit');
+} else {
+  console.log(`Messages use ${result} tokens`);
+}
+```
+**Parameters:**
+```typescript
+interface IsChatWithinTokenLimitInput {
+  messages: ChatMessage[];
+  model: string;
+  tokenLimit: number;
+  encoding?: OpenAIEncoding;
+  functions?: FunctionDefinition[];
+  function_call?: FunctionCallOption;
+}
+```
+**Throws:**
+- `Error` if `tokenLimit` is invalid (NaN, Infinity, negative, non-integer)
+- `Error` if model is not an OpenAI model (unless encoding override provided)
+- `Error` if tools, tool_choice, tool_calls, or tool_call_id are present
+- `Error` if any message has non-string content
 ### `getModelConfig(model: string): ModelConfig`
 Returns the configuration for a specific model. Throws if the model is not found.

package/dist/index.cjs CHANGED Viewed

@@ -57,6 +57,8 @@ __export(index_exports, {
   getOpenAIEncoding: () => getOpenAIEncoding,
   getSentencePieceTokenizer: () => getSentencePieceTokenizer,
   getTotalCost: () => getTotalCost,
+  isChatWithinTokenLimit: () => isChatWithinTokenLimit,
+  isWithinTokenLimit: () => isWithinTokenLimit,
   loadSentencePieceTokenizer: () => loadSentencePieceTokenizer,
   parseModelProto: () => parseModelProto
 });
@@ -616,6 +618,83 @@ var BPETokenizer = class {
     }
     return tokens;
   }
+  /**
+   * Encode text with a token limit, returning early if the limit is exceeded.
+   * This is optimized for fast token-limit validation without full tokenization.
+   *
+   * @param text - The text to encode
+   * @param limit - Maximum number of tokens allowed
+   * @param allowedSpecial - Controls special token handling (same as encodeText)
+   * @returns Object with count and exceeded flag
+   */
+  encodeTextWithLimit(text, limit, allowedSpecial) {
+    if (!text) return { count: 0, exceeded: false };
+    if (limit < 0) return { count: 0, exceeded: true };
+    if (allowedSpecial === "skip") {
+      return this.encodeOrdinaryWithLimit(text, limit);
+    }
+    let count = 0;
+    if (this.specialTokenMap.size > 0) {
+      const parts = this.splitOnSpecialTokens(text, allowedSpecial);
+      for (const part of parts) {
+        if (part.isSpecial) {
+          count += 1;
+          if (count > limit) return { count, exceeded: true };
+        } else {
+          const result = this.encodeOrdinaryWithLimit(part.text, limit - count);
+          count += result.count;
+          if (result.exceeded) {
+            return { count, exceeded: true };
+          }
+        }
+      }
+    } else {
+      return this.encodeOrdinaryWithLimit(text, limit);
+    }
+    return { count, exceeded: false };
+  }
+  /**
+   * Incremental encoding with early exit.
+   * CRITICAL: Uses RegExp.exec() loop instead of text.match() to avoid
+   * allocating all pieces upfront. This enables true early exit.
+   */
+  encodeOrdinaryWithLimit(text, limit) {
+    if (!text) return { count: 0, exceeded: false };
+    if (limit < 0) return { count: 0, exceeded: true };
+    let count = 0;
+    const regex = new RegExp(
+      this.tokenSplitRegex.source,
+      this.tokenSplitRegex.flags.includes("g") ? this.tokenSplitRegex.flags : this.tokenSplitRegex.flags + "g"
+    );
+    let match;
+    while ((match = regex.exec(text)) !== null) {
+      const piece = match[0];
+      if (piece.length === 0) {
+        regex.lastIndex++;
+        continue;
+      }
+      const cached = this.getFromCache(piece);
+      if (cached) {
+        count += cached.length;
+        if (count > limit) return { count, exceeded: true };
+        continue;
+      }
+      const pieceBytes = this.textEncoder.encode(piece);
+      const key = bytesToLatin1(pieceBytes);
+      const directRank = this.encoder.get(key);
+      if (directRank !== void 0) {
+        count += 1;
+        this.addToCache(piece, [directRank]);
+        if (count > limit) return { count, exceeded: true };
+        continue;
+      }
+      const pieceTokens = this.mergeBytePairs(pieceBytes);
+      count += pieceTokens.length;
+      this.addToCache(piece, pieceTokens);
+      if (count > limit) return { count, exceeded: true };
+    }
+    return { count, exceeded: false };
+  }
   /**
    * Core BPE merge algorithm.
    */
@@ -401892,7 +401971,8 @@ function getTokenizer(encoding) {
   }
   return {
     encode: (text, allowedSpecial) => tokenizer.encodeText(text, allowedSpecial),
-    decode: (tokens) => tokenizer.decodeTokens(tokens)
+    decode: (tokens) => tokenizer.decodeTokens(tokens),
+    encodeTextWithLimit: (text, limit, allowedSpecial) => tokenizer.encodeTextWithLimit(text, limit, allowedSpecial)
   };
 }
 function resolveEncoding(options) {
@@ -401932,6 +402012,39 @@ function decode(tokens, options) {
   const api = getTokenizer(encoding);
   return api.decode(tokens);
 }
+function validateTokenLimit(tokenLimit) {
+  if (!Number.isFinite(tokenLimit)) {
+    throw new Error("tokenLimit must be a finite number");
+  }
+  if (!Number.isInteger(tokenLimit)) {
+    throw new Error("tokenLimit must be an integer");
+  }
+  if (tokenLimit < 0) {
+    throw new Error("tokenLimit must be non-negative");
+  }
+}
+function rejectNonOpenAIModel(model) {
+  if (!model) return;
+  if (model.startsWith("claude-")) {
+    throw new Error(
+      `Model "${model}" is an Anthropic model. isWithinTokenLimit only supports OpenAI models. Use the Anthropic API's count_tokens endpoint via estimateAsync() instead.`
+    );
+  }
+  if (model.startsWith("gemini-")) {
+    throw new Error(
+      `Model "${model}" is a Google model. isWithinTokenLimit only supports OpenAI models. Use the Gemini API's countTokens endpoint via estimateAsync() instead.`
+    );
+  }
+}
+function isWithinTokenLimit(text, tokenLimit, options) {
+  validateTokenLimit(tokenLimit);
+  rejectNonOpenAIModel(options?.model);
+  const encoding = resolveEncoding(options);
+  const api = getTokenizer(encoding);
+  const allowedSpecial = resolveAllowedSpecial(options?.allowSpecial);
+  const result = api.encodeTextWithLimit(text, tokenLimit, allowedSpecial);
+  return result.exceeded ? false : result.count;
+}
 // src/token-counter.ts
 function isNonOpenAIModel(model) {
@@ -405208,6 +405321,125 @@ function countChatCompletionTokens(input) {
   }
   return result;
 }
+function validateTokenLimit2(tokenLimit) {
+  if (!Number.isFinite(tokenLimit)) {
+    throw new Error("tokenLimit must be a finite number");
+  }
+  if (!Number.isInteger(tokenLimit)) {
+    throw new Error("tokenLimit must be an integer");
+  }
+  if (tokenLimit < 0) {
+    throw new Error("tokenLimit must be non-negative");
+  }
+}
+function isChatWithinTokenLimit(input) {
+  const { messages, model, tokenLimit, encoding, functions, function_call } = input;
+  validateTokenLimit2(tokenLimit);
+  validateNoToolsApi(input);
+  validateMessages(messages);
+  validateOpenAIModel(model, encoding);
+  const resolvedEncoding = encoding ?? getOpenAIEncoding({ model });
+  const api = getTokenizer(resolvedEncoding);
+  let count = COMPLETION_REQUEST_TOKEN_OVERHEAD;
+  if (count > tokenLimit) return false;
+  const hasFunctions = Boolean(functions?.length);
+  const hasSystemMessage = messages.some((m) => m.role === "system");
+  if (hasFunctions && functions) {
+    const formatted = formatFunctionDefinitions(functions);
+    const funcResult = api.encodeTextWithLimit(
+      formatted,
+      tokenLimit - count,
+      "skip"
+    );
+    if (funcResult.exceeded) return false;
+    let funcOverhead = funcResult.count + FUNCTION_DEFINITION_TOKEN_OVERHEAD;
+    if (hasSystemMessage) {
+      funcOverhead -= SYSTEM_FUNCTION_TOKEN_DEDUCTION;
+    }
+    count += funcOverhead;
+    if (count > tokenLimit) return false;
+  }
+  if (function_call && function_call !== "auto") {
+    if (function_call === "none") {
+      count += FUNCTION_CALL_NONE_TOKEN_OVERHEAD;
+    } else if (typeof function_call === "object" && function_call.name) {
+      const fcNameResult = api.encodeTextWithLimit(
+        function_call.name,
+        tokenLimit - count,
+        "skip"
+      );
+      if (fcNameResult.exceeded) return false;
+      count += fcNameResult.count + FUNCTION_CALL_NAME_TOKEN_OVERHEAD;
+    }
+    if (count > tokenLimit) return false;
+  }
+  let systemPadded = false;
+  for (const message of messages) {
+    let overhead = MESSAGE_TOKEN_OVERHEAD;
+    if (message.role) {
+      const roleResult = api.encodeTextWithLimit(
+        message.role,
+        tokenLimit - count,
+        "skip"
+      );
+      if (roleResult.exceeded) return false;
+      count += roleResult.count;
+    }
+    let content = message.content ?? "";
+    if (hasFunctions && message.role === "system" && !systemPadded) {
+      if (content && !content.endsWith("\n")) {
+        content = content + "\n";
+      }
+      systemPadded = true;
+    }
+    if (content) {
+      const contentResult = api.encodeTextWithLimit(
+        content,
+        tokenLimit - count,
+        "skip"
+      );
+      if (contentResult.exceeded) return false;
+      count += contentResult.count;
+    }
+    if (message.name) {
+      const nameResult = api.encodeTextWithLimit(
+        message.name,
+        tokenLimit - count,
+        "skip"
+      );
+      if (nameResult.exceeded) return false;
+      count += nameResult.count;
+      overhead += MESSAGE_NAME_TOKEN_OVERHEAD;
+    }
+    if (message.function_call) {
+      if (message.function_call.name) {
+        const fcNameResult = api.encodeTextWithLimit(
+          message.function_call.name,
+          tokenLimit - count,
+          "skip"
+        );
+        if (fcNameResult.exceeded) return false;
+        count += fcNameResult.count;
+      }
+      if (message.function_call.arguments) {
+        const fcArgsResult = api.encodeTextWithLimit(
+          message.function_call.arguments,
+          tokenLimit - count,
+          "skip"
+        );
+        if (fcArgsResult.exceeded) return false;
+        count += fcArgsResult.count;
+      }
+      overhead += FUNCTION_CALL_METADATA_TOKEN_OVERHEAD;
+    }
+    if (message.role === "function") {
+      overhead -= FUNCTION_ROLE_TOKEN_DISCOUNT;
+    }
+    count += overhead;
+    if (count > tokenLimit) return false;
+  }
+  return count;
+}
 // Annotate the CommonJS export names for ESM import in node:
 0 && (module.exports = {
   DEFAULT_MODELS,
@@ -405237,6 +405469,8 @@ function countChatCompletionTokens(input) {
   getOpenAIEncoding,
   getSentencePieceTokenizer,
   getTotalCost,
+  isChatWithinTokenLimit,
+  isWithinTokenLimit,
   loadSentencePieceTokenizer,
   parseModelProto
 });

package/dist/index.d.cts CHANGED Viewed

@@ -55,6 +55,58 @@ declare function encode(text: string, options?: EncodeOptions): number[];
  * Decode OpenAI token IDs into text using tiktoken-compatible BPE encoding.
  */
 declare function decode(tokens: Iterable<number>, options?: Pick<EncodeOptions, 'encoding' | 'model'>): string;
+/**
+ * Options for isWithinTokenLimit.
+ */
+interface IsWithinTokenLimitOptions {
+    /**
+     * Explicit OpenAI encoding override.
+     * When provided, this takes precedence over `model`.
+     */
+    encoding?: OpenAIEncoding;
+    /**
+     * OpenAI model ID used to select the appropriate encoding.
+     * Note: Non-OpenAI models (claude-*, gemini-*) are rejected.
+     */
+    model?: string;
+    /**
+     * How special tokens are handled.
+     * - `none_raise` (default): throw if special tokens appear
+     * - `none`: treat special tokens as regular text
+     * - `all`: allow special tokens and encode them as special token IDs
+     */
+    allowSpecial?: SpecialTokenHandling;
+}
+/**
+ * Check if text is within a token limit, with early exit optimization.
+ *
+ * Returns `false` if the token count exceeds the limit, otherwise returns the
+ * actual token count. This is significantly faster than full tokenization when
+ * the limit is exceeded early in the text.
+ *
+ * @param text - The text to check
+ * @param tokenLimit - Maximum allowed tokens (must be non-negative finite integer)
+ * @param options - Encoding options
+ * @returns `false` if exceeded, or the actual token count if within limit
+ * @throws Error if tokenLimit is invalid (NaN, Infinity, negative, non-integer)
+ * @throws Error if model is a known non-OpenAI model (claude-*, gemini-*)
+ *
+ * @example
+ * ```typescript
+ * // Returns token count if within limit
+ * const count = isWithinTokenLimit('Hello, world!', 100, { model: 'gpt-4o' });
+ * if (count !== false) {
+ *   console.log(`Text has ${count} tokens`);
+ * }
+ *
+ * // Returns false if exceeds limit
+ * const result = isWithinTokenLimit(longText, 10, { model: 'gpt-4o' });
+ * if (result === false) {
+ *   console.log('Text exceeds 10 tokens');
+ * }
+ * ```
+ */
+declare function isWithinTokenLimit(text: string, tokenLimit: number, options?: IsWithinTokenLimitOptions): false | number;
 /**
  * Configuration for a specific LLM model.
@@ -542,6 +594,52 @@ declare function countTokens(input: TokenCountInput): TokenCountOutput;
  * ```
  */
 declare function countChatCompletionTokens(input: ChatCompletionTokenCountInput): ChatCompletionTokenCountOutput;
+/**
+ * Input for isChatWithinTokenLimit.
+ * Object-style input to match countChatCompletionTokens API.
+ */
+interface IsChatWithinTokenLimitInput {
+    messages: ChatMessage[];
+    model: string;
+    tokenLimit: number;
+    encoding?: OpenAIEncoding;
+    functions?: FunctionDefinition[];
+    function_call?: FunctionCallOption;
+}
+/**
+ * Check if chat messages are within a token limit, with early exit optimization.
+ *
+ * Uses object-style input to match countChatCompletionTokens API.
+ * Returns `false` if the token count exceeds the limit, otherwise returns
+ * the actual token count.
+ *
+ * This is significantly faster than full tokenization when the limit is
+ * exceeded early in the input.
+ *
+ * @throws {Error} If tokenLimit is invalid (NaN, Infinity, negative, non-integer)
+ * @throws {Error} If model is not an OpenAI model (unless encoding override provided)
+ * @throws {Error} If tools, tool_choice, tool_calls, or tool_call_id are present
+ * @throws {Error} If any message has non-string content (arrays, numbers, objects)
+ *
+ * @example
+ * ```typescript
+ * const result = isChatWithinTokenLimit({
+ *   messages: [
+ *     { role: 'system', content: 'You are a helpful assistant.' },
+ *     { role: 'user', content: 'Hello!' }
+ *   ],
+ *   model: 'gpt-4o',
+ *   tokenLimit: 100,
+ * });
+ *
+ * if (result === false) {
+ *   console.log('Messages exceed token limit');
+ * } else {
+ *   console.log(`Messages use ${result} tokens`);
+ * }
+ * ```
+ */
+declare function isChatWithinTokenLimit(input: IsChatWithinTokenLimitInput): false | number;
 interface AnthropicCountTokensParams {
     /** Claude model id, e.g. `claude-sonnet-4-5` */
@@ -818,4 +916,4 @@ declare function clearModelCache(): void;
  */
 declare function parseModelProto(buffer: Uint8Array): ModelProto;
-export { type AnthropicCountTokensParams, type ChatCompletionTokenCountInput, type ChatCompletionTokenCountOutput, type ChatMessage, type CostEstimate, DEFAULT_MODELS, type DataOptions, type DownloadOptions, type EncodeOptions, type EstimateAsyncInput, type EstimateCostFromTextAsyncOptions, type EstimateCostFromTextOptions, type EstimateCostInput, type EstimateInput, type EstimateOutput, type FileOptions, type FunctionCallOption, type FunctionDefinition, type FunctionParameterProperty, type FunctionParameters, type GeminiCountTokensParams, type GemmaSentencePieceCountTokensParams, type KnownTokenizer, LAST_UPDATED, type ModelConfig, type ModelInfo, type ModelProto, type NormalizerSpec, type OpenAIEncoding, type SentencePiece, type SentencePieceTokenizer, type SpecialTokenHandling, type TokenCountInput, type TokenCountOutput, type TokenizerMode, type TokenizerModeAsync, type TrainerSpec, clearModelCache, countAnthropicInputTokens, countChatCompletionTokens, countGeminiTokens, countGemmaSentencePieceTokens, countSentencePieceTokens, countSentencePieceTokensAsync, countTokens, decode, decodeSentencePiece, decodeSentencePieceAsync, encode, encodeSentencePiece, encodeSentencePieceAsync, ensureSentencePieceModel, estimate, estimateAsync, estimateCost, estimateCostFromText, estimateCostFromTextAsync, getAvailableModels, getModelConfig, getOpenAIEncoding, getSentencePieceTokenizer, getTotalCost, loadSentencePieceTokenizer, parseModelProto };
+export { type AnthropicCountTokensParams, type ChatCompletionTokenCountInput, type ChatCompletionTokenCountOutput, type ChatMessage, type CostEstimate, DEFAULT_MODELS, type DataOptions, type DownloadOptions, type EncodeOptions, type EstimateAsyncInput, type EstimateCostFromTextAsyncOptions, type EstimateCostFromTextOptions, type EstimateCostInput, type EstimateInput, type EstimateOutput, type FileOptions, type FunctionCallOption, type FunctionDefinition, type FunctionParameterProperty, type FunctionParameters, type GeminiCountTokensParams, type GemmaSentencePieceCountTokensParams, type IsChatWithinTokenLimitInput, type IsWithinTokenLimitOptions, type KnownTokenizer, LAST_UPDATED, type ModelConfig, type ModelInfo, type ModelProto, type NormalizerSpec, type OpenAIEncoding, type SentencePiece, type SentencePieceTokenizer, type SpecialTokenHandling, type TokenCountInput, type TokenCountOutput, type TokenizerMode, type TokenizerModeAsync, type TrainerSpec, clearModelCache, countAnthropicInputTokens, countChatCompletionTokens, countGeminiTokens, countGemmaSentencePieceTokens, countSentencePieceTokens, countSentencePieceTokensAsync, countTokens, decode, decodeSentencePiece, decodeSentencePieceAsync, encode, encodeSentencePiece, encodeSentencePieceAsync, ensureSentencePieceModel, estimate, estimateAsync, estimateCost, estimateCostFromText, estimateCostFromTextAsync, getAvailableModels, getModelConfig, getOpenAIEncoding, getSentencePieceTokenizer, getTotalCost, isChatWithinTokenLimit, isWithinTokenLimit, loadSentencePieceTokenizer, parseModelProto };

package/dist/index.d.ts CHANGED Viewed

@@ -55,6 +55,58 @@ declare function encode(text: string, options?: EncodeOptions): number[];
  * Decode OpenAI token IDs into text using tiktoken-compatible BPE encoding.
  */
 declare function decode(tokens: Iterable<number>, options?: Pick<EncodeOptions, 'encoding' | 'model'>): string;
+/**
+ * Options for isWithinTokenLimit.
+ */
+interface IsWithinTokenLimitOptions {
+    /**
+     * Explicit OpenAI encoding override.
+     * When provided, this takes precedence over `model`.
+     */
+    encoding?: OpenAIEncoding;
+    /**
+     * OpenAI model ID used to select the appropriate encoding.
+     * Note: Non-OpenAI models (claude-*, gemini-*) are rejected.
+     */
+    model?: string;
+    /**
+     * How special tokens are handled.
+     * - `none_raise` (default): throw if special tokens appear
+     * - `none`: treat special tokens as regular text
+     * - `all`: allow special tokens and encode them as special token IDs
+     */
+    allowSpecial?: SpecialTokenHandling;
+}
+/**
+ * Check if text is within a token limit, with early exit optimization.
+ *
+ * Returns `false` if the token count exceeds the limit, otherwise returns the
+ * actual token count. This is significantly faster than full tokenization when
+ * the limit is exceeded early in the text.
+ *
+ * @param text - The text to check
+ * @param tokenLimit - Maximum allowed tokens (must be non-negative finite integer)
+ * @param options - Encoding options
+ * @returns `false` if exceeded, or the actual token count if within limit
+ * @throws Error if tokenLimit is invalid (NaN, Infinity, negative, non-integer)
+ * @throws Error if model is a known non-OpenAI model (claude-*, gemini-*)
+ *
+ * @example
+ * ```typescript
+ * // Returns token count if within limit
+ * const count = isWithinTokenLimit('Hello, world!', 100, { model: 'gpt-4o' });
+ * if (count !== false) {
+ *   console.log(`Text has ${count} tokens`);
+ * }
+ *
+ * // Returns false if exceeds limit
+ * const result = isWithinTokenLimit(longText, 10, { model: 'gpt-4o' });
+ * if (result === false) {
+ *   console.log('Text exceeds 10 tokens');
+ * }
+ * ```
+ */
+declare function isWithinTokenLimit(text: string, tokenLimit: number, options?: IsWithinTokenLimitOptions): false | number;
 /**
  * Configuration for a specific LLM model.
@@ -542,6 +594,52 @@ declare function countTokens(input: TokenCountInput): TokenCountOutput;
  * ```
  */
 declare function countChatCompletionTokens(input: ChatCompletionTokenCountInput): ChatCompletionTokenCountOutput;
+/**
+ * Input for isChatWithinTokenLimit.
+ * Object-style input to match countChatCompletionTokens API.
+ */
+interface IsChatWithinTokenLimitInput {
+    messages: ChatMessage[];
+    model: string;
+    tokenLimit: number;
+    encoding?: OpenAIEncoding;
+    functions?: FunctionDefinition[];
+    function_call?: FunctionCallOption;
+}
+/**
+ * Check if chat messages are within a token limit, with early exit optimization.
+ *
+ * Uses object-style input to match countChatCompletionTokens API.
+ * Returns `false` if the token count exceeds the limit, otherwise returns
+ * the actual token count.
+ *
+ * This is significantly faster than full tokenization when the limit is
+ * exceeded early in the input.
+ *
+ * @throws {Error} If tokenLimit is invalid (NaN, Infinity, negative, non-integer)
+ * @throws {Error} If model is not an OpenAI model (unless encoding override provided)
+ * @throws {Error} If tools, tool_choice, tool_calls, or tool_call_id are present
+ * @throws {Error} If any message has non-string content (arrays, numbers, objects)
+ *
+ * @example
+ * ```typescript
+ * const result = isChatWithinTokenLimit({
+ *   messages: [
+ *     { role: 'system', content: 'You are a helpful assistant.' },
+ *     { role: 'user', content: 'Hello!' }
+ *   ],
+ *   model: 'gpt-4o',
+ *   tokenLimit: 100,
+ * });
+ *
+ * if (result === false) {
+ *   console.log('Messages exceed token limit');
+ * } else {
+ *   console.log(`Messages use ${result} tokens`);
+ * }
+ * ```
+ */
+declare function isChatWithinTokenLimit(input: IsChatWithinTokenLimitInput): false | number;
 interface AnthropicCountTokensParams {
     /** Claude model id, e.g. `claude-sonnet-4-5` */
@@ -818,4 +916,4 @@ declare function clearModelCache(): void;
  */
 declare function parseModelProto(buffer: Uint8Array): ModelProto;
-export { type AnthropicCountTokensParams, type ChatCompletionTokenCountInput, type ChatCompletionTokenCountOutput, type ChatMessage, type CostEstimate, DEFAULT_MODELS, type DataOptions, type DownloadOptions, type EncodeOptions, type EstimateAsyncInput, type EstimateCostFromTextAsyncOptions, type EstimateCostFromTextOptions, type EstimateCostInput, type EstimateInput, type EstimateOutput, type FileOptions, type FunctionCallOption, type FunctionDefinition, type FunctionParameterProperty, type FunctionParameters, type GeminiCountTokensParams, type GemmaSentencePieceCountTokensParams, type KnownTokenizer, LAST_UPDATED, type ModelConfig, type ModelInfo, type ModelProto, type NormalizerSpec, type OpenAIEncoding, type SentencePiece, type SentencePieceTokenizer, type SpecialTokenHandling, type TokenCountInput, type TokenCountOutput, type TokenizerMode, type TokenizerModeAsync, type TrainerSpec, clearModelCache, countAnthropicInputTokens, countChatCompletionTokens, countGeminiTokens, countGemmaSentencePieceTokens, countSentencePieceTokens, countSentencePieceTokensAsync, countTokens, decode, decodeSentencePiece, decodeSentencePieceAsync, encode, encodeSentencePiece, encodeSentencePieceAsync, ensureSentencePieceModel, estimate, estimateAsync, estimateCost, estimateCostFromText, estimateCostFromTextAsync, getAvailableModels, getModelConfig, getOpenAIEncoding, getSentencePieceTokenizer, getTotalCost, loadSentencePieceTokenizer, parseModelProto };
+export { type AnthropicCountTokensParams, type ChatCompletionTokenCountInput, type ChatCompletionTokenCountOutput, type ChatMessage, type CostEstimate, DEFAULT_MODELS, type DataOptions, type DownloadOptions, type EncodeOptions, type EstimateAsyncInput, type EstimateCostFromTextAsyncOptions, type EstimateCostFromTextOptions, type EstimateCostInput, type EstimateInput, type EstimateOutput, type FileOptions, type FunctionCallOption, type FunctionDefinition, type FunctionParameterProperty, type FunctionParameters, type GeminiCountTokensParams, type GemmaSentencePieceCountTokensParams, type IsChatWithinTokenLimitInput, type IsWithinTokenLimitOptions, type KnownTokenizer, LAST_UPDATED, type ModelConfig, type ModelInfo, type ModelProto, type NormalizerSpec, type OpenAIEncoding, type SentencePiece, type SentencePieceTokenizer, type SpecialTokenHandling, type TokenCountInput, type TokenCountOutput, type TokenizerMode, type TokenizerModeAsync, type TrainerSpec, clearModelCache, countAnthropicInputTokens, countChatCompletionTokens, countGeminiTokens, countGemmaSentencePieceTokens, countSentencePieceTokens, countSentencePieceTokensAsync, countTokens, decode, decodeSentencePiece, decodeSentencePieceAsync, encode, encodeSentencePiece, encodeSentencePieceAsync, ensureSentencePieceModel, estimate, estimateAsync, estimateCost, estimateCostFromText, estimateCostFromTextAsync, getAvailableModels, getModelConfig, getOpenAIEncoding, getSentencePieceTokenizer, getTotalCost, isChatWithinTokenLimit, isWithinTokenLimit, loadSentencePieceTokenizer, parseModelProto };

package/dist/index.js CHANGED Viewed

@@ -552,6 +552,83 @@ var BPETokenizer = class {
     }
     return tokens;
   }
+  /**
+   * Encode text with a token limit, returning early if the limit is exceeded.
+   * This is optimized for fast token-limit validation without full tokenization.
+   *
+   * @param text - The text to encode
+   * @param limit - Maximum number of tokens allowed
+   * @param allowedSpecial - Controls special token handling (same as encodeText)
+   * @returns Object with count and exceeded flag
+   */
+  encodeTextWithLimit(text, limit, allowedSpecial) {
+    if (!text) return { count: 0, exceeded: false };
+    if (limit < 0) return { count: 0, exceeded: true };
+    if (allowedSpecial === "skip") {
+      return this.encodeOrdinaryWithLimit(text, limit);
+    }
+    let count = 0;
+    if (this.specialTokenMap.size > 0) {
+      const parts = this.splitOnSpecialTokens(text, allowedSpecial);
+      for (const part of parts) {
+        if (part.isSpecial) {
+          count += 1;
+          if (count > limit) return { count, exceeded: true };
+        } else {
+          const result = this.encodeOrdinaryWithLimit(part.text, limit - count);
+          count += result.count;
+          if (result.exceeded) {
+            return { count, exceeded: true };
+          }
+        }
+      }
+    } else {
+      return this.encodeOrdinaryWithLimit(text, limit);
+    }
+    return { count, exceeded: false };
+  }
+  /**
+   * Incremental encoding with early exit.
+   * CRITICAL: Uses RegExp.exec() loop instead of text.match() to avoid
+   * allocating all pieces upfront. This enables true early exit.
+   */
+  encodeOrdinaryWithLimit(text, limit) {
+    if (!text) return { count: 0, exceeded: false };
+    if (limit < 0) return { count: 0, exceeded: true };
+    let count = 0;
+    const regex = new RegExp(
+      this.tokenSplitRegex.source,
+      this.tokenSplitRegex.flags.includes("g") ? this.tokenSplitRegex.flags : this.tokenSplitRegex.flags + "g"
+    );
+    let match;
+    while ((match = regex.exec(text)) !== null) {
+      const piece = match[0];
+      if (piece.length === 0) {
+        regex.lastIndex++;
+        continue;
+      }
+      const cached = this.getFromCache(piece);
+      if (cached) {
+        count += cached.length;
+        if (count > limit) return { count, exceeded: true };
+        continue;
+      }
+      const pieceBytes = this.textEncoder.encode(piece);
+      const key = bytesToLatin1(pieceBytes);
+      const directRank = this.encoder.get(key);
+      if (directRank !== void 0) {
+        count += 1;
+        this.addToCache(piece, [directRank]);
+        if (count > limit) return { count, exceeded: true };
+        continue;
+      }
+      const pieceTokens = this.mergeBytePairs(pieceBytes);
+      count += pieceTokens.length;
+      this.addToCache(piece, pieceTokens);
+      if (count > limit) return { count, exceeded: true };
+    }
+    return { count, exceeded: false };
+  }
   /**
    * Core BPE merge algorithm.
    */
@@ -401828,7 +401905,8 @@ function getTokenizer(encoding) {
   }
   return {
     encode: (text, allowedSpecial) => tokenizer.encodeText(text, allowedSpecial),
-    decode: (tokens) => tokenizer.decodeTokens(tokens)
+    decode: (tokens) => tokenizer.decodeTokens(tokens),
+    encodeTextWithLimit: (text, limit, allowedSpecial) => tokenizer.encodeTextWithLimit(text, limit, allowedSpecial)
   };
 }
 function resolveEncoding(options) {
@@ -401868,6 +401946,39 @@ function decode(tokens, options) {
   const api = getTokenizer(encoding);
   return api.decode(tokens);
 }
+function validateTokenLimit(tokenLimit) {
+  if (!Number.isFinite(tokenLimit)) {
+    throw new Error("tokenLimit must be a finite number");
+  }
+  if (!Number.isInteger(tokenLimit)) {
+    throw new Error("tokenLimit must be an integer");
+  }
+  if (tokenLimit < 0) {
+    throw new Error("tokenLimit must be non-negative");
+  }
+}
+function rejectNonOpenAIModel(model) {
+  if (!model) return;
+  if (model.startsWith("claude-")) {
+    throw new Error(
+      `Model "${model}" is an Anthropic model. isWithinTokenLimit only supports OpenAI models. Use the Anthropic API's count_tokens endpoint via estimateAsync() instead.`
+    );
+  }
+  if (model.startsWith("gemini-")) {
+    throw new Error(
+      `Model "${model}" is a Google model. isWithinTokenLimit only supports OpenAI models. Use the Gemini API's countTokens endpoint via estimateAsync() instead.`
+    );
+  }
+}
+function isWithinTokenLimit(text, tokenLimit, options) {
+  validateTokenLimit(tokenLimit);
+  rejectNonOpenAIModel(options?.model);
+  const encoding = resolveEncoding(options);
+  const api = getTokenizer(encoding);
+  const allowedSpecial = resolveAllowedSpecial(options?.allowSpecial);
+  const result = api.encodeTextWithLimit(text, tokenLimit, allowedSpecial);
+  return result.exceeded ? false : result.count;
+}
 // src/token-counter.ts
 function isNonOpenAIModel(model) {
@@ -405144,6 +405255,125 @@ function countChatCompletionTokens(input) {
   }
   return result;
 }
+function validateTokenLimit2(tokenLimit) {
+  if (!Number.isFinite(tokenLimit)) {
+    throw new Error("tokenLimit must be a finite number");
+  }
+  if (!Number.isInteger(tokenLimit)) {
+    throw new Error("tokenLimit must be an integer");
+  }
+  if (tokenLimit < 0) {
+    throw new Error("tokenLimit must be non-negative");
+  }
+}
+function isChatWithinTokenLimit(input) {
+  const { messages, model, tokenLimit, encoding, functions, function_call } = input;
+  validateTokenLimit2(tokenLimit);
+  validateNoToolsApi(input);
+  validateMessages(messages);
+  validateOpenAIModel(model, encoding);
+  const resolvedEncoding = encoding ?? getOpenAIEncoding({ model });
+  const api = getTokenizer(resolvedEncoding);
+  let count = COMPLETION_REQUEST_TOKEN_OVERHEAD;
+  if (count > tokenLimit) return false;
+  const hasFunctions = Boolean(functions?.length);
+  const hasSystemMessage = messages.some((m) => m.role === "system");
+  if (hasFunctions && functions) {
+    const formatted = formatFunctionDefinitions(functions);
+    const funcResult = api.encodeTextWithLimit(
+      formatted,
+      tokenLimit - count,
+      "skip"
+    );
+    if (funcResult.exceeded) return false;
+    let funcOverhead = funcResult.count + FUNCTION_DEFINITION_TOKEN_OVERHEAD;
+    if (hasSystemMessage) {
+      funcOverhead -= SYSTEM_FUNCTION_TOKEN_DEDUCTION;
+    }
+    count += funcOverhead;
+    if (count > tokenLimit) return false;
+  }
+  if (function_call && function_call !== "auto") {
+    if (function_call === "none") {
+      count += FUNCTION_CALL_NONE_TOKEN_OVERHEAD;
+    } else if (typeof function_call === "object" && function_call.name) {
+      const fcNameResult = api.encodeTextWithLimit(
+        function_call.name,
+        tokenLimit - count,
+        "skip"
+      );
+      if (fcNameResult.exceeded) return false;
+      count += fcNameResult.count + FUNCTION_CALL_NAME_TOKEN_OVERHEAD;
+    }
+    if (count > tokenLimit) return false;
+  }
+  let systemPadded = false;
+  for (const message of messages) {
+    let overhead = MESSAGE_TOKEN_OVERHEAD;
+    if (message.role) {
+      const roleResult = api.encodeTextWithLimit(
+        message.role,
+        tokenLimit - count,
+        "skip"
+      );
+      if (roleResult.exceeded) return false;
+      count += roleResult.count;
+    }
+    let content = message.content ?? "";
+    if (hasFunctions && message.role === "system" && !systemPadded) {
+      if (content && !content.endsWith("\n")) {
+        content = content + "\n";
+      }
+      systemPadded = true;
+    }
+    if (content) {
+      const contentResult = api.encodeTextWithLimit(
+        content,
+        tokenLimit - count,
+        "skip"
+      );
+      if (contentResult.exceeded) return false;
+      count += contentResult.count;
+    }
+    if (message.name) {
+      const nameResult = api.encodeTextWithLimit(
+        message.name,
+        tokenLimit - count,
+        "skip"
+      );
+      if (nameResult.exceeded) return false;
+      count += nameResult.count;
+      overhead += MESSAGE_NAME_TOKEN_OVERHEAD;
+    }
+    if (message.function_call) {
+      if (message.function_call.name) {
+        const fcNameResult = api.encodeTextWithLimit(
+          message.function_call.name,
+          tokenLimit - count,
+          "skip"
+        );
+        if (fcNameResult.exceeded) return false;
+        count += fcNameResult.count;
+      }
+      if (message.function_call.arguments) {
+        const fcArgsResult = api.encodeTextWithLimit(
+          message.function_call.arguments,
+          tokenLimit - count,
+          "skip"
+        );
+        if (fcArgsResult.exceeded) return false;
+        count += fcArgsResult.count;
+      }
+      overhead += FUNCTION_CALL_METADATA_TOKEN_OVERHEAD;
+    }
+    if (message.role === "function") {
+      overhead -= FUNCTION_ROLE_TOKEN_DISCOUNT;
+    }
+    count += overhead;
+    if (count > tokenLimit) return false;
+  }
+  return count;
+}
 export {
   DEFAULT_MODELS,
   LAST_UPDATED,
@@ -405172,6 +405402,8 @@ export {
   getOpenAIEncoding,
   getSentencePieceTokenizer,
   getTotalCost,
+  isChatWithinTokenLimit,
+  isWithinTokenLimit,
   loadSentencePieceTokenizer,
   parseModelProto
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ai-token-estimator",
-  "version": "1.4.0",
+  "version": "1.5.0",
   "description": "Estimate and count tokens (incl. exact OpenAI BPE) and input costs for LLM API calls",
   "type": "module",
   "main": "./dist/index.cjs",