npm - ai-token-estimator - Versions diffs - 1.6.0 → 1.7.0 - Mend

ai-token-estimator 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md CHANGED Viewed

@@ -12,6 +12,8 @@ The best way to estimate **tokens + input cost** for LLM calls — with **exact
 - **Exact OpenAI tokenization** (tiktoken-compatible BPE): `encode()` / `decode()` / `openai_exact`
 - **Chat-aware tokenization**: `encodeChat()` returns exact token IDs for chat messages using ChatML format
+- **Fast token limit checking**: `isWithinTokenLimit()` / `isChatWithinTokenLimit()` with early-exit optimization (up to 1000x faster for large texts)
+- **Generator-based streaming**: `encodeGenerator()` / `encodeChatGenerator()` / `decodeGenerator()` / `decodeAsyncGenerator()` for memory-efficient tokenization
 - **OpenAI chat completion token counting** (legacy `functions` API): `countChatCompletionTokens()` with optional per-message breakdown
 - **Pure TypeScript SentencePiece tokenizer** (no native dependencies):
   - Supports `.model` files (protobuf format)
@@ -91,6 +93,43 @@ const { totalTokens } = countChatCompletionTokens({
 });
 ```
+### Fast token limit checking (early exit)
+```ts
+import { isWithinTokenLimit, isChatWithinTokenLimit } from 'ai-token-estimator';
+// Plain text - returns token count or false if exceeded
+const count = isWithinTokenLimit(longText, 4096, { model: 'gpt-4o' });
+if (count === false) console.log('Text exceeds limit');
+// Chat messages - same early-exit optimization
+const chatCount = isChatWithinTokenLimit({
+  messages: [{ role: 'user', content: longText }],
+  model: 'gpt-4o',
+  tokenLimit: 4096,
+});
+```
+### Generator-based streaming tokenization
+```ts
+import { encodeGenerator, decodeAsyncGenerator } from 'ai-token-estimator';
+// Stream-encode large text (memory efficient)
+let tokenCount = 0;
+for (const tokenChunk of encodeGenerator(hugeText, { model: 'gpt-4o' })) {
+  tokenCount += tokenChunk.length;
+  // Process chunk...
+}
+// Decode streaming LLM response
+async function decodeLLMStream(tokenStream: AsyncIterable<number>) {
+  for await (const text of decodeAsyncGenerator(tokenStream, { model: 'gpt-4o' })) {
+    process.stdout.write(text);
+  }
+}
+```
 ### Local SentencePiece token counting
 ```ts
@@ -700,6 +739,81 @@ interface IsChatWithinTokenLimitInput {
 - `Error` if tools, tool_choice, tool_calls, or tool_call_id are present
 - `Error` if any message has non-string content
+### Generator APIs
+Generator-based APIs for memory-efficient streaming tokenization.
+#### `encodeGenerator(text, options?): Generator<number[], number, undefined>`
+Encode text yielding token chunks. Memory-efficient for large inputs.
+- **Yields:** `number[]` — token IDs per regex-matched piece (word/punctuation)
+- **Returns:** `number` — total token count when iteration completes
+```typescript
+import { encodeGenerator } from 'ai-token-estimator';
+// Stream-encode large text
+let tokenCount = 0;
+for (const tokenChunk of encodeGenerator(hugeText, { model: 'gpt-4o' })) {
+  tokenCount += tokenChunk.length;
+}
+// Or get total count from return value
+const gen = encodeGenerator(text, { model: 'gpt-4o' });
+let result = gen.next();
+while (!result.done) result = gen.next();
+console.log('Total tokens:', result.value);
+```
+#### `encodeChatGenerator(messages, options?): Generator<number[], number, undefined>`
+Encode chat messages yielding token chunks per message component.
+- **Yields:** `number[]` — token IDs per component (special tokens, role, content chunks, etc.)
+- **Returns:** `number` — total token count
+```typescript
+import { encodeChatGenerator } from 'ai-token-estimator';
+const messages = [
+  { role: 'system', content: 'You are helpful.' },
+  { role: 'user', content: 'Hello!' }
+];
+for (const tokenChunk of encodeChatGenerator(messages, { model: 'gpt-4o' })) {
+  console.log('Chunk:', tokenChunk);
+}
+```
+#### `decodeGenerator(tokens, options?): Generator<string, void, void>`
+Decode tokens yielding text chunks. Uses TextDecoder streaming mode — may yield empty strings when buffering incomplete UTF-8 sequences.
+```typescript
+import { encode, decodeGenerator } from 'ai-token-estimator';
+const tokens = encode('Hello, world!', { model: 'gpt-4o' });
+for (const textChunk of decodeGenerator(tokens, { model: 'gpt-4o' })) {
+  process.stdout.write(textChunk);
+}
+```
+#### `decodeAsyncGenerator(tokens, options?): AsyncGenerator<string, void, void>`
+Decode async token stream yielding text chunks. Accepts `AsyncIterable<number | number[]>` for flexibility with streaming APIs.
+```typescript
+import { decodeAsyncGenerator } from 'ai-token-estimator';
+// Decode streaming LLM response
+async function decodeLLMStream(tokenStream: AsyncIterable<number>) {
+  for await (const text of decodeAsyncGenerator(tokenStream, { model: 'gpt-4o' })) {
+    process.stdout.write(text);
+  }
+}
+```
 ### `getModelConfig(model: string): ModelConfig`
 Returns the configuration for a specific model. Throws if the model is not found.

package/dist/index.cjs CHANGED Viewed

@@ -41,10 +41,14 @@ __export(index_exports, {
   countSentencePieceTokensAsync: () => countSentencePieceTokensAsync,
   countTokens: () => countTokens,
   decode: () => decode,
+  decodeAsyncGenerator: () => decodeAsyncGenerator,
+  decodeGenerator: () => decodeGenerator,
   decodeSentencePiece: () => decodeSentencePiece,
   decodeSentencePieceAsync: () => decodeSentencePieceAsync,
   encode: () => encode,
   encodeChat: () => encodeChat,
+  encodeChatGenerator: () => encodeChatGenerator,
+  encodeGenerator: () => encodeGenerator,
   encodeSentencePiece: () => encodeSentencePiece,
   encodeSentencePieceAsync: () => encodeSentencePieceAsync,
   ensureSentencePieceModel: () => ensureSentencePieceModel,
@@ -778,6 +782,7 @@ var BPETokenizer = class {
   }
   /**
    * Add an entry to the cache, evicting LRU entries if necessary.
+   * Freezes the array to prevent mutation by consumers (especially generator yields).
    */
   addToCache(key, value) {
     if (this.cacheCapacity <= 0) return;
@@ -790,6 +795,7 @@ var BPETokenizer = class {
         removed++;
       }
     }
+    Object.freeze(value);
     this.tokenCache.set(key, value);
   }
   /**
@@ -830,6 +836,158 @@ var BPETokenizer = class {
   clearCache() {
     this.tokenCache.clear();
   }
+  // ===========================================================================
+  // Generator Methods
+  // ===========================================================================
+  /**
+   * Generator version of encodeText. Yields token arrays per regex-matched piece.
+   * Returns total token count.
+   *
+   * @param text - The text to encode
+   * @param allowedSpecial - Controls special token handling (same as encodeText)
+   * @returns Generator that yields token arrays and returns total count
+   */
+  *encodeTextGenerator(text, allowedSpecial) {
+    if (!text) return 0;
+    let totalTokens = 0;
+    if (allowedSpecial === "skip") {
+      const gen = this.encodeOrdinaryGenerator(text);
+      let result = gen.next();
+      while (!result.done) {
+        yield result.value;
+        totalTokens += result.value.length;
+        result = gen.next();
+      }
+      return totalTokens;
+    }
+    if (this.specialTokenMap.size > 0) {
+      const parts = this.splitOnSpecialTokens(text, allowedSpecial);
+      for (const part of parts) {
+        if (part.isSpecial) {
+          const tokenId = this.specialTokenMap.get(part.text);
+          yield [tokenId];
+          totalTokens += 1;
+        } else {
+          const gen = this.encodeOrdinaryGenerator(part.text);
+          let result = gen.next();
+          while (!result.done) {
+            yield result.value;
+            totalTokens += result.value.length;
+            result = gen.next();
+          }
+        }
+      }
+    } else {
+      const gen = this.encodeOrdinaryGenerator(text);
+      let result = gen.next();
+      while (!result.done) {
+        yield result.value;
+        totalTokens += result.value.length;
+        result = gen.next();
+      }
+    }
+    return totalTokens;
+  }
+  /**
+   * Generator version of encodeOrdinary. Yields token arrays per regex piece.
+   * Uses same cache logic as encodeOrdinary.
+   */
+  *encodeOrdinaryGenerator(text) {
+    if (!text) return;
+    const regex = new RegExp(
+      this.tokenSplitRegex.source,
+      this.tokenSplitRegex.flags.includes("g") ? this.tokenSplitRegex.flags : this.tokenSplitRegex.flags + "g"
+    );
+    let match;
+    while ((match = regex.exec(text)) !== null) {
+      const piece = match[0];
+      if (piece.length === 0) {
+        regex.lastIndex++;
+        continue;
+      }
+      const cached = this.getFromCache(piece);
+      if (cached) {
+        yield cached;
+        continue;
+      }
+      const pieceBytes = this.textEncoder.encode(piece);
+      const key = bytesToLatin1(pieceBytes);
+      const directRank = this.encoder.get(key);
+      if (directRank !== void 0) {
+        const tokens = [directRank];
+        this.addToCache(piece, tokens);
+        yield tokens;
+        continue;
+      }
+      const pieceTokens = this.mergeBytePairs(pieceBytes);
+      this.addToCache(piece, pieceTokens);
+      yield pieceTokens;
+    }
+  }
+  /**
+   * Generator version of decodeTokens. Yields text chunks.
+   * Uses TextDecoder streaming mode to handle partial UTF-8 correctly.
+   * May yield empty strings when buffering incomplete sequences.
+   *
+   * Streaming semantics:
+   * - During iteration: decode(bytes, { stream: true }) - buffers incomplete UTF-8
+   * - At end/flush: decode() with no stream flag (defaults to false) - emits buffered bytes
+   */
+  *decodeTokensGenerator(tokens) {
+    const streamingDecoder = new TextDecoder("utf-8", { fatal: false });
+    for (const token of tokens) {
+      const specialToken = this.specialTokenDecoder.get(token);
+      if (specialToken !== void 0) {
+        const flushed = streamingDecoder.decode(new Uint8Array(0));
+        if (flushed) yield flushed;
+        yield specialToken;
+        continue;
+      }
+      const tokenBytes = this.decoder.get(token);
+      if (!tokenBytes) {
+        throw new Error(
+          `Invalid token ID: ${token}. Token not found in vocabulary or special tokens.`
+        );
+      }
+      const decoded = streamingDecoder.decode(tokenBytes, { stream: true });
+      yield decoded;
+    }
+    const final = streamingDecoder.decode();
+    if (final) yield final;
+  }
+  /**
+   * Async generator version of decodeTokens.
+   * Accepts AsyncIterable<number | number[]> for flexibility.
+   *
+   * Streaming semantics:
+   * - During iteration: decode(bytes, { stream: true }) - buffers incomplete UTF-8
+   * - At end/flush: decode() with no stream flag (defaults to false) - emits buffered bytes
+   */
+  async *decodeTokensAsyncGenerator(tokens) {
+    const streamingDecoder = new TextDecoder("utf-8", { fatal: false });
+    for await (const tokenOrChunk of tokens) {
+      const tokenArray = typeof tokenOrChunk === "number" ? [tokenOrChunk] : tokenOrChunk;
+      for (const token of tokenArray) {
+        const specialToken = this.specialTokenDecoder.get(token);
+        if (specialToken !== void 0) {
+          const flushed = streamingDecoder.decode(new Uint8Array(0));
+          if (flushed) yield flushed;
+          yield specialToken;
+          continue;
+        }
+        const tokenBytes = this.decoder.get(token);
+        if (!tokenBytes) {
+          throw new Error(
+            `Invalid token ID: ${token}. Token not found in vocabulary or special tokens.`
+          );
+        }
+        const decoded = streamingDecoder.decode(tokenBytes, { stream: true });
+        yield decoded;
+      }
+    }
+    const final = streamingDecoder.decode();
+    if (final) yield final;
+  }
 };
 // src/bpe/special-tokens.ts
@@ -401981,7 +402139,11 @@ function getTokenizer(encoding) {
   return {
     encode: (text, allowedSpecial) => tokenizer.encodeText(text, allowedSpecial),
     decode: (tokens) => tokenizer.decodeTokens(tokens),
-    encodeTextWithLimit: (text, limit, allowedSpecial) => tokenizer.encodeTextWithLimit(text, limit, allowedSpecial)
+    encodeTextWithLimit: (text, limit, allowedSpecial) => tokenizer.encodeTextWithLimit(text, limit, allowedSpecial),
+    // Generator methods - delegate to BPETokenizer
+    encodeGenerator: (text, allowedSpecial) => tokenizer.encodeTextGenerator(text, allowedSpecial),
+    decodeGenerator: (tokens) => tokenizer.decodeTokensGenerator(tokens),
+    decodeAsyncGenerator: (tokens) => tokenizer.decodeTokensAsyncGenerator(tokens)
   };
 }
 function resolveEncoding(options) {
@@ -402054,6 +402216,22 @@ function isWithinTokenLimit(text, tokenLimit, options) {
   const result = api.encodeTextWithLimit(text, tokenLimit, allowedSpecial);
   return result.exceeded ? false : result.count;
 }
+function encodeGenerator(text, options) {
+  const encoding = resolveEncoding(options);
+  const api = getTokenizer(encoding);
+  const allowedSpecial = resolveAllowedSpecial(options?.allowSpecial);
+  return api.encodeGenerator(text, allowedSpecial);
+}
+function* decodeGenerator(tokens, options) {
+  const encoding = resolveEncoding(options);
+  const api = getTokenizer(encoding);
+  yield* api.decodeGenerator(tokens);
+}
+async function* decodeAsyncGenerator(tokens, options) {
+  const encoding = resolveEncoding(options);
+  const api = getTokenizer(encoding);
+  yield* api.decodeAsyncGenerator(tokens);
+}
 // src/token-counter.ts
 function isNonOpenAIModel(model) {
@@ -405574,6 +405752,76 @@ function formatFunctionCall(fc) {
   if (fc.arguments) parts.push(fc.arguments);
   return parts.join("\n");
 }
+function getRoleString(message) {
+  if (message.role === "function" && message.name) {
+    return message.name;
+  } else if (message.name) {
+    return `${message.role}:${message.name}`;
+  }
+  return message.role;
+}
+function* encodeChatGenerator(messages, options) {
+  const { model, encoding: encodingOverride, primeAssistant = true } = options ?? {};
+  validateChatModel(model, encodingOverride);
+  const encoding = encodingOverride ?? (model ? getOpenAIEncoding({ model }) : "o200k_base");
+  if (encoding === "o200k_harmony") {
+    console.warn(
+      "[ai-token-estimator] o200k_harmony support is experimental. Token structure may not match actual API behavior."
+    );
+  }
+  const chatTokens = getChatTokens(encoding);
+  if (!chatTokens) {
+    throw new Error(
+      `Encoding "${encoding}" does not support chat format. Use cl100k_base or o200k_base for chat models.`
+    );
+  }
+  const { imStart, imEnd, imSep } = chatTokens;
+  let totalTokens = 0;
+  for (const message of messages) {
+    validateMessage(message);
+    yield [imStart];
+    totalTokens += 1;
+    const roleStr = getRoleString(message);
+    const roleTokens = encode(roleStr, { encoding, allowSpecial: "none" });
+    yield roleTokens;
+    totalTokens += roleTokens.length;
+    yield [imSep];
+    totalTokens += 1;
+    if (message.content) {
+      const contentGen = encodeGenerator(message.content, {
+        encoding,
+        allowSpecial: "none"
+      });
+      let result = contentGen.next();
+      while (!result.done) {
+        yield result.value;
+        totalTokens += result.value.length;
+        result = contentGen.next();
+      }
+    }
+    if (message.function_call) {
+      const fcContent = formatFunctionCall(message.function_call);
+      const fcTokens = encode(fcContent, { encoding, allowSpecial: "none" });
+      yield fcTokens;
+      totalTokens += fcTokens.length;
+    }
+    yield [imEnd];
+    totalTokens += 1;
+  }
+  if (primeAssistant) {
+    yield [imStart];
+    totalTokens += 1;
+    const assistantTokens = encode("assistant", {
+      encoding,
+      allowSpecial: "none"
+    });
+    yield assistantTokens;
+    totalTokens += assistantTokens.length;
+    yield [imSep];
+    totalTokens += 1;
+  }
+  return totalTokens;
+}
 // Annotate the CommonJS export names for ESM import in node:
 0 && (module.exports = {
   DEFAULT_MODELS,
@@ -405587,10 +405835,14 @@ function formatFunctionCall(fc) {
   countSentencePieceTokensAsync,
   countTokens,
   decode,
+  decodeAsyncGenerator,
+  decodeGenerator,
   decodeSentencePiece,
   decodeSentencePieceAsync,
   encode,
   encodeChat,
+  encodeChatGenerator,
+  encodeGenerator,
   encodeSentencePiece,
   encodeSentencePieceAsync,
   ensureSentencePieceModel,

package/dist/index.d.cts CHANGED Viewed

@@ -107,6 +107,72 @@ interface IsWithinTokenLimitOptions {
  * ```
  */
 declare function isWithinTokenLimit(text: string, tokenLimit: number, options?: IsWithinTokenLimitOptions): false | number;
+/**
+ * Encode text yielding token chunks. Memory-efficient for large inputs.
+ *
+ * Yields token arrays per regex-matched piece (word/punctuation), not per token.
+ * Returns total token count when iteration completes.
+ *
+ * @param text - The text to encode
+ * @param options - Encoding options
+ * @returns Generator that yields token arrays per piece, returns total count
+ *
+ * @example
+ * ```typescript
+ * // Stream-encode large text
+ * let tokenCount = 0;
+ * for (const tokenChunk of encodeGenerator(hugeText, { model: 'gpt-4o' })) {
+ *   tokenCount += tokenChunk.length;
+ * }
+ *
+ * // Or get total count from return value
+ * const gen = encodeGenerator(text, { model: 'gpt-4o' });
+ * let result = gen.next();
+ * while (!result.done) result = gen.next();
+ * console.log('Total tokens:', result.value);
+ * ```
+ */
+declare function encodeGenerator(text: string, options?: EncodeOptions): Generator<number[], number, undefined>;
+/**
+ * Decode tokens yielding text chunks.
+ * Uses TextDecoder streaming mode - may yield empty strings when buffering
+ * incomplete UTF-8 sequences.
+ *
+ * @param tokens - Token IDs to decode
+ * @param options - Decoding options
+ * @returns Generator that yields text chunks
+ *
+ * @example
+ * ```typescript
+ * const tokens = encode('Hello, world!', { model: 'gpt-4o' });
+ * for (const textChunk of decodeGenerator(tokens, { model: 'gpt-4o' })) {
+ *   process.stdout.write(textChunk);
+ * }
+ * ```
+ */
+declare function decodeGenerator(tokens: Iterable<number>, options?: Pick<EncodeOptions, 'encoding' | 'model'>): Generator<string, void, void>;
+/**
+ * Decode async token stream yielding text chunks.
+ * Accepts single tokens or token arrays for flexibility with streaming APIs.
+ *
+ * Uses TextDecoder streaming mode - may yield empty strings when buffering
+ * incomplete UTF-8 sequences.
+ *
+ * @param tokens - Async iterable of token IDs (numbers or number arrays)
+ * @param options - Decoding options
+ * @returns AsyncGenerator that yields text chunks
+ *
+ * @example
+ * ```typescript
+ * // Decode streaming LLM response
+ * async function decodeLLMStream(tokenStream: AsyncIterable<number>) {
+ *   for await (const text of decodeAsyncGenerator(tokenStream, { model: 'gpt-4o' })) {
+ *     process.stdout.write(text);
+ *   }
+ * }
+ * ```
+ */
+declare function decodeAsyncGenerator(tokens: AsyncIterable<number | number[]>, options?: Pick<EncodeOptions, 'encoding' | 'model'>): AsyncGenerator<string, void, void>;
 /**
  * Configuration for a specific LLM model.
@@ -688,6 +754,44 @@ interface EncodeChatOptions {
  * ```
  */
 declare function encodeChat(messages: ChatMessage[], options?: EncodeChatOptions): number[];
+/**
+ * Generator version of encodeChat. Yields token arrays per message component.
+ * Returns total token count.
+ *
+ * Yields tokens in the following order per message:
+ * - [imStart] (1 token)
+ * - role tokens
+ * - [imSep] (1 token)
+ * - content tokens (if present, yielded in chunks)
+ * - function_call tokens (if present)
+ * - [imEnd] (1 token)
+ *
+ * If primeAssistant is true (default), also yields assistant priming tokens at the end.
+ *
+ * @param messages - Array or iterable of chat messages
+ * @param options - Encoding options
+ * @returns Generator that yields token arrays per component, returns total count
+ *
+ * @example
+ * ```typescript
+ * const messages = [
+ *   { role: 'system', content: 'You are helpful.' },
+ *   { role: 'user', content: 'Hello!' }
+ * ];
+ *
+ * // Stream-encode messages
+ * for (const tokenChunk of encodeChatGenerator(messages, { model: 'gpt-4o' })) {
+ *   console.log('Chunk:', tokenChunk);
+ * }
+ *
+ * // Get total count from return value
+ * const gen = encodeChatGenerator(messages, { model: 'gpt-4o' });
+ * let result = gen.next();
+ * while (!result.done) result = gen.next();
+ * console.log('Total tokens:', result.value);
+ * ```
+ */
+declare function encodeChatGenerator(messages: ChatMessage[] | Iterable<ChatMessage>, options?: EncodeChatOptions): Generator<number[], number, undefined>;
 interface AnthropicCountTokensParams {
     /** Claude model id, e.g. `claude-sonnet-4-5` */
@@ -964,4 +1068,4 @@ declare function clearModelCache(): void;
  */
 declare function parseModelProto(buffer: Uint8Array): ModelProto;
-export { type AnthropicCountTokensParams, type ChatCompletionTokenCountInput, type ChatCompletionTokenCountOutput, type ChatMessage, type CostEstimate, DEFAULT_MODELS, type DataOptions, type DownloadOptions, type EncodeChatOptions, type EncodeOptions, type EstimateAsyncInput, type EstimateCostFromTextAsyncOptions, type EstimateCostFromTextOptions, type EstimateCostInput, type EstimateInput, type EstimateOutput, type FileOptions, type FunctionCallOption, type FunctionDefinition, type FunctionParameterProperty, type FunctionParameters, type GeminiCountTokensParams, type GemmaSentencePieceCountTokensParams, type IsChatWithinTokenLimitInput, type IsWithinTokenLimitOptions, type KnownTokenizer, LAST_UPDATED, type ModelConfig, type ModelInfo, type ModelProto, type NormalizerSpec, type OpenAIEncoding, type SentencePiece, type SentencePieceTokenizer, type SpecialTokenHandling, type TokenCountInput, type TokenCountOutput, type TokenizerMode, type TokenizerModeAsync, type TrainerSpec, clearModelCache, countAnthropicInputTokens, countChatCompletionTokens, countGeminiTokens, countGemmaSentencePieceTokens, countSentencePieceTokens, countSentencePieceTokensAsync, countTokens, decode, decodeSentencePiece, decodeSentencePieceAsync, encode, encodeChat, encodeSentencePiece, encodeSentencePieceAsync, ensureSentencePieceModel, estimate, estimateAsync, estimateCost, estimateCostFromText, estimateCostFromTextAsync, getAvailableModels, getModelConfig, getOpenAIEncoding, getSentencePieceTokenizer, getTotalCost, isChatWithinTokenLimit, isWithinTokenLimit, loadSentencePieceTokenizer, parseModelProto };
+export { type AnthropicCountTokensParams, type ChatCompletionTokenCountInput, type ChatCompletionTokenCountOutput, type ChatMessage, type CostEstimate, DEFAULT_MODELS, type DataOptions, type DownloadOptions, type EncodeChatOptions, type EncodeOptions, type EstimateAsyncInput, type EstimateCostFromTextAsyncOptions, type EstimateCostFromTextOptions, type EstimateCostInput, type EstimateInput, type EstimateOutput, type FileOptions, type FunctionCallOption, type FunctionDefinition, type FunctionParameterProperty, type FunctionParameters, type GeminiCountTokensParams, type GemmaSentencePieceCountTokensParams, type IsChatWithinTokenLimitInput, type IsWithinTokenLimitOptions, type KnownTokenizer, LAST_UPDATED, type ModelConfig, type ModelInfo, type ModelProto, type NormalizerSpec, type OpenAIEncoding, type SentencePiece, type SentencePieceTokenizer, type SpecialTokenHandling, type TokenCountInput, type TokenCountOutput, type TokenizerMode, type TokenizerModeAsync, type TrainerSpec, clearModelCache, countAnthropicInputTokens, countChatCompletionTokens, countGeminiTokens, countGemmaSentencePieceTokens, countSentencePieceTokens, countSentencePieceTokensAsync, countTokens, decode, decodeAsyncGenerator, decodeGenerator, decodeSentencePiece, decodeSentencePieceAsync, encode, encodeChat, encodeChatGenerator, encodeGenerator, encodeSentencePiece, encodeSentencePieceAsync, ensureSentencePieceModel, estimate, estimateAsync, estimateCost, estimateCostFromText, estimateCostFromTextAsync, getAvailableModels, getModelConfig, getOpenAIEncoding, getSentencePieceTokenizer, getTotalCost, isChatWithinTokenLimit, isWithinTokenLimit, loadSentencePieceTokenizer, parseModelProto };

package/dist/index.d.ts CHANGED Viewed

@@ -107,6 +107,72 @@ interface IsWithinTokenLimitOptions {
  * ```
  */
 declare function isWithinTokenLimit(text: string, tokenLimit: number, options?: IsWithinTokenLimitOptions): false | number;
+/**
+ * Encode text yielding token chunks. Memory-efficient for large inputs.
+ *
+ * Yields token arrays per regex-matched piece (word/punctuation), not per token.
+ * Returns total token count when iteration completes.
+ *
+ * @param text - The text to encode
+ * @param options - Encoding options
+ * @returns Generator that yields token arrays per piece, returns total count
+ *
+ * @example
+ * ```typescript
+ * // Stream-encode large text
+ * let tokenCount = 0;
+ * for (const tokenChunk of encodeGenerator(hugeText, { model: 'gpt-4o' })) {
+ *   tokenCount += tokenChunk.length;
+ * }
+ *
+ * // Or get total count from return value
+ * const gen = encodeGenerator(text, { model: 'gpt-4o' });
+ * let result = gen.next();
+ * while (!result.done) result = gen.next();
+ * console.log('Total tokens:', result.value);
+ * ```
+ */
+declare function encodeGenerator(text: string, options?: EncodeOptions): Generator<number[], number, undefined>;
+/**
+ * Decode tokens yielding text chunks.
+ * Uses TextDecoder streaming mode - may yield empty strings when buffering
+ * incomplete UTF-8 sequences.
+ *
+ * @param tokens - Token IDs to decode
+ * @param options - Decoding options
+ * @returns Generator that yields text chunks
+ *
+ * @example
+ * ```typescript
+ * const tokens = encode('Hello, world!', { model: 'gpt-4o' });
+ * for (const textChunk of decodeGenerator(tokens, { model: 'gpt-4o' })) {
+ *   process.stdout.write(textChunk);
+ * }
+ * ```
+ */
+declare function decodeGenerator(tokens: Iterable<number>, options?: Pick<EncodeOptions, 'encoding' | 'model'>): Generator<string, void, void>;
+/**
+ * Decode async token stream yielding text chunks.
+ * Accepts single tokens or token arrays for flexibility with streaming APIs.
+ *
+ * Uses TextDecoder streaming mode - may yield empty strings when buffering
+ * incomplete UTF-8 sequences.
+ *
+ * @param tokens - Async iterable of token IDs (numbers or number arrays)
+ * @param options - Decoding options
+ * @returns AsyncGenerator that yields text chunks
+ *
+ * @example
+ * ```typescript
+ * // Decode streaming LLM response
+ * async function decodeLLMStream(tokenStream: AsyncIterable<number>) {
+ *   for await (const text of decodeAsyncGenerator(tokenStream, { model: 'gpt-4o' })) {
+ *     process.stdout.write(text);
+ *   }
+ * }
+ * ```
+ */
+declare function decodeAsyncGenerator(tokens: AsyncIterable<number | number[]>, options?: Pick<EncodeOptions, 'encoding' | 'model'>): AsyncGenerator<string, void, void>;
 /**
  * Configuration for a specific LLM model.
@@ -688,6 +754,44 @@ interface EncodeChatOptions {
  * ```
  */
 declare function encodeChat(messages: ChatMessage[], options?: EncodeChatOptions): number[];
+/**
+ * Generator version of encodeChat. Yields token arrays per message component.
+ * Returns total token count.
+ *
+ * Yields tokens in the following order per message:
+ * - [imStart] (1 token)
+ * - role tokens
+ * - [imSep] (1 token)
+ * - content tokens (if present, yielded in chunks)
+ * - function_call tokens (if present)
+ * - [imEnd] (1 token)
+ *
+ * If primeAssistant is true (default), also yields assistant priming tokens at the end.
+ *
+ * @param messages - Array or iterable of chat messages
+ * @param options - Encoding options
+ * @returns Generator that yields token arrays per component, returns total count
+ *
+ * @example
+ * ```typescript
+ * const messages = [
+ *   { role: 'system', content: 'You are helpful.' },
+ *   { role: 'user', content: 'Hello!' }
+ * ];
+ *
+ * // Stream-encode messages
+ * for (const tokenChunk of encodeChatGenerator(messages, { model: 'gpt-4o' })) {
+ *   console.log('Chunk:', tokenChunk);
+ * }
+ *
+ * // Get total count from return value
+ * const gen = encodeChatGenerator(messages, { model: 'gpt-4o' });
+ * let result = gen.next();
+ * while (!result.done) result = gen.next();
+ * console.log('Total tokens:', result.value);
+ * ```
+ */
+declare function encodeChatGenerator(messages: ChatMessage[] | Iterable<ChatMessage>, options?: EncodeChatOptions): Generator<number[], number, undefined>;
 interface AnthropicCountTokensParams {
     /** Claude model id, e.g. `claude-sonnet-4-5` */
@@ -964,4 +1068,4 @@ declare function clearModelCache(): void;
  */
 declare function parseModelProto(buffer: Uint8Array): ModelProto;
-export { type AnthropicCountTokensParams, type ChatCompletionTokenCountInput, type ChatCompletionTokenCountOutput, type ChatMessage, type CostEstimate, DEFAULT_MODELS, type DataOptions, type DownloadOptions, type EncodeChatOptions, type EncodeOptions, type EstimateAsyncInput, type EstimateCostFromTextAsyncOptions, type EstimateCostFromTextOptions, type EstimateCostInput, type EstimateInput, type EstimateOutput, type FileOptions, type FunctionCallOption, type FunctionDefinition, type FunctionParameterProperty, type FunctionParameters, type GeminiCountTokensParams, type GemmaSentencePieceCountTokensParams, type IsChatWithinTokenLimitInput, type IsWithinTokenLimitOptions, type KnownTokenizer, LAST_UPDATED, type ModelConfig, type ModelInfo, type ModelProto, type NormalizerSpec, type OpenAIEncoding, type SentencePiece, type SentencePieceTokenizer, type SpecialTokenHandling, type TokenCountInput, type TokenCountOutput, type TokenizerMode, type TokenizerModeAsync, type TrainerSpec, clearModelCache, countAnthropicInputTokens, countChatCompletionTokens, countGeminiTokens, countGemmaSentencePieceTokens, countSentencePieceTokens, countSentencePieceTokensAsync, countTokens, decode, decodeSentencePiece, decodeSentencePieceAsync, encode, encodeChat, encodeSentencePiece, encodeSentencePieceAsync, ensureSentencePieceModel, estimate, estimateAsync, estimateCost, estimateCostFromText, estimateCostFromTextAsync, getAvailableModels, getModelConfig, getOpenAIEncoding, getSentencePieceTokenizer, getTotalCost, isChatWithinTokenLimit, isWithinTokenLimit, loadSentencePieceTokenizer, parseModelProto };
+export { type AnthropicCountTokensParams, type ChatCompletionTokenCountInput, type ChatCompletionTokenCountOutput, type ChatMessage, type CostEstimate, DEFAULT_MODELS, type DataOptions, type DownloadOptions, type EncodeChatOptions, type EncodeOptions, type EstimateAsyncInput, type EstimateCostFromTextAsyncOptions, type EstimateCostFromTextOptions, type EstimateCostInput, type EstimateInput, type EstimateOutput, type FileOptions, type FunctionCallOption, type FunctionDefinition, type FunctionParameterProperty, type FunctionParameters, type GeminiCountTokensParams, type GemmaSentencePieceCountTokensParams, type IsChatWithinTokenLimitInput, type IsWithinTokenLimitOptions, type KnownTokenizer, LAST_UPDATED, type ModelConfig, type ModelInfo, type ModelProto, type NormalizerSpec, type OpenAIEncoding, type SentencePiece, type SentencePieceTokenizer, type SpecialTokenHandling, type TokenCountInput, type TokenCountOutput, type TokenizerMode, type TokenizerModeAsync, type TrainerSpec, clearModelCache, countAnthropicInputTokens, countChatCompletionTokens, countGeminiTokens, countGemmaSentencePieceTokens, countSentencePieceTokens, countSentencePieceTokensAsync, countTokens, decode, decodeAsyncGenerator, decodeGenerator, decodeSentencePiece, decodeSentencePieceAsync, encode, encodeChat, encodeChatGenerator, encodeGenerator, encodeSentencePiece, encodeSentencePieceAsync, ensureSentencePieceModel, estimate, estimateAsync, estimateCost, estimateCostFromText, estimateCostFromTextAsync, getAvailableModels, getModelConfig, getOpenAIEncoding, getSentencePieceTokenizer, getTotalCost, isChatWithinTokenLimit, isWithinTokenLimit, loadSentencePieceTokenizer, parseModelProto };

package/dist/index.js CHANGED Viewed

@@ -711,6 +711,7 @@ var BPETokenizer = class {
   }
   /**
    * Add an entry to the cache, evicting LRU entries if necessary.
+   * Freezes the array to prevent mutation by consumers (especially generator yields).
    */
   addToCache(key, value) {
     if (this.cacheCapacity <= 0) return;
@@ -723,6 +724,7 @@ var BPETokenizer = class {
         removed++;
       }
     }
+    Object.freeze(value);
     this.tokenCache.set(key, value);
   }
   /**
@@ -763,6 +765,158 @@ var BPETokenizer = class {
   clearCache() {
     this.tokenCache.clear();
   }
+  // ===========================================================================
+  // Generator Methods
+  // ===========================================================================
+  /**
+   * Generator version of encodeText. Yields token arrays per regex-matched piece.
+   * Returns total token count.
+   *
+   * @param text - The text to encode
+   * @param allowedSpecial - Controls special token handling (same as encodeText)
+   * @returns Generator that yields token arrays and returns total count
+   */
+  *encodeTextGenerator(text, allowedSpecial) {
+    if (!text) return 0;
+    let totalTokens = 0;
+    if (allowedSpecial === "skip") {
+      const gen = this.encodeOrdinaryGenerator(text);
+      let result = gen.next();
+      while (!result.done) {
+        yield result.value;
+        totalTokens += result.value.length;
+        result = gen.next();
+      }
+      return totalTokens;
+    }
+    if (this.specialTokenMap.size > 0) {
+      const parts = this.splitOnSpecialTokens(text, allowedSpecial);
+      for (const part of parts) {
+        if (part.isSpecial) {
+          const tokenId = this.specialTokenMap.get(part.text);
+          yield [tokenId];
+          totalTokens += 1;
+        } else {
+          const gen = this.encodeOrdinaryGenerator(part.text);
+          let result = gen.next();
+          while (!result.done) {
+            yield result.value;
+            totalTokens += result.value.length;
+            result = gen.next();
+          }
+        }
+      }
+    } else {
+      const gen = this.encodeOrdinaryGenerator(text);
+      let result = gen.next();
+      while (!result.done) {
+        yield result.value;
+        totalTokens += result.value.length;
+        result = gen.next();
+      }
+    }
+    return totalTokens;
+  }
+  /**
+   * Generator version of encodeOrdinary. Yields token arrays per regex piece.
+   * Uses same cache logic as encodeOrdinary.
+   */
+  *encodeOrdinaryGenerator(text) {
+    if (!text) return;
+    const regex = new RegExp(
+      this.tokenSplitRegex.source,
+      this.tokenSplitRegex.flags.includes("g") ? this.tokenSplitRegex.flags : this.tokenSplitRegex.flags + "g"
+    );
+    let match;
+    while ((match = regex.exec(text)) !== null) {
+      const piece = match[0];
+      if (piece.length === 0) {
+        regex.lastIndex++;
+        continue;
+      }
+      const cached = this.getFromCache(piece);
+      if (cached) {
+        yield cached;
+        continue;
+      }
+      const pieceBytes = this.textEncoder.encode(piece);
+      const key = bytesToLatin1(pieceBytes);
+      const directRank = this.encoder.get(key);
+      if (directRank !== void 0) {
+        const tokens = [directRank];
+        this.addToCache(piece, tokens);
+        yield tokens;
+        continue;
+      }
+      const pieceTokens = this.mergeBytePairs(pieceBytes);
+      this.addToCache(piece, pieceTokens);
+      yield pieceTokens;
+    }
+  }
+  /**
+   * Generator version of decodeTokens. Yields text chunks.
+   * Uses TextDecoder streaming mode to handle partial UTF-8 correctly.
+   * May yield empty strings when buffering incomplete sequences.
+   *
+   * Streaming semantics:
+   * - During iteration: decode(bytes, { stream: true }) - buffers incomplete UTF-8
+   * - At end/flush: decode() with no stream flag (defaults to false) - emits buffered bytes
+   */
+  *decodeTokensGenerator(tokens) {
+    const streamingDecoder = new TextDecoder("utf-8", { fatal: false });
+    for (const token of tokens) {
+      const specialToken = this.specialTokenDecoder.get(token);
+      if (specialToken !== void 0) {
+        const flushed = streamingDecoder.decode(new Uint8Array(0));
+        if (flushed) yield flushed;
+        yield specialToken;
+        continue;
+      }
+      const tokenBytes = this.decoder.get(token);
+      if (!tokenBytes) {
+        throw new Error(
+          `Invalid token ID: ${token}. Token not found in vocabulary or special tokens.`
+        );
+      }
+      const decoded = streamingDecoder.decode(tokenBytes, { stream: true });
+      yield decoded;
+    }
+    const final = streamingDecoder.decode();
+    if (final) yield final;
+  }
+  /**
+   * Async generator version of decodeTokens.
+   * Accepts AsyncIterable<number | number[]> for flexibility.
+   *
+   * Streaming semantics:
+   * - During iteration: decode(bytes, { stream: true }) - buffers incomplete UTF-8
+   * - At end/flush: decode() with no stream flag (defaults to false) - emits buffered bytes
+   */
+  async *decodeTokensAsyncGenerator(tokens) {
+    const streamingDecoder = new TextDecoder("utf-8", { fatal: false });
+    for await (const tokenOrChunk of tokens) {
+      const tokenArray = typeof tokenOrChunk === "number" ? [tokenOrChunk] : tokenOrChunk;
+      for (const token of tokenArray) {
+        const specialToken = this.specialTokenDecoder.get(token);
+        if (specialToken !== void 0) {
+          const flushed = streamingDecoder.decode(new Uint8Array(0));
+          if (flushed) yield flushed;
+          yield specialToken;
+          continue;
+        }
+        const tokenBytes = this.decoder.get(token);
+        if (!tokenBytes) {
+          throw new Error(
+            `Invalid token ID: ${token}. Token not found in vocabulary or special tokens.`
+          );
+        }
+        const decoded = streamingDecoder.decode(tokenBytes, { stream: true });
+        yield decoded;
+      }
+    }
+    const final = streamingDecoder.decode();
+    if (final) yield final;
+  }
 };
 // src/bpe/special-tokens.ts
@@ -401914,7 +402068,11 @@ function getTokenizer(encoding) {
   return {
     encode: (text, allowedSpecial) => tokenizer.encodeText(text, allowedSpecial),
     decode: (tokens) => tokenizer.decodeTokens(tokens),
-    encodeTextWithLimit: (text, limit, allowedSpecial) => tokenizer.encodeTextWithLimit(text, limit, allowedSpecial)
+    encodeTextWithLimit: (text, limit, allowedSpecial) => tokenizer.encodeTextWithLimit(text, limit, allowedSpecial),
+    // Generator methods - delegate to BPETokenizer
+    encodeGenerator: (text, allowedSpecial) => tokenizer.encodeTextGenerator(text, allowedSpecial),
+    decodeGenerator: (tokens) => tokenizer.decodeTokensGenerator(tokens),
+    decodeAsyncGenerator: (tokens) => tokenizer.decodeTokensAsyncGenerator(tokens)
   };
 }
 function resolveEncoding(options) {
@@ -401987,6 +402145,22 @@ function isWithinTokenLimit(text, tokenLimit, options) {
   const result = api.encodeTextWithLimit(text, tokenLimit, allowedSpecial);
   return result.exceeded ? false : result.count;
 }
+function encodeGenerator(text, options) {
+  const encoding = resolveEncoding(options);
+  const api = getTokenizer(encoding);
+  const allowedSpecial = resolveAllowedSpecial(options?.allowSpecial);
+  return api.encodeGenerator(text, allowedSpecial);
+}
+function* decodeGenerator(tokens, options) {
+  const encoding = resolveEncoding(options);
+  const api = getTokenizer(encoding);
+  yield* api.decodeGenerator(tokens);
+}
+async function* decodeAsyncGenerator(tokens, options) {
+  const encoding = resolveEncoding(options);
+  const api = getTokenizer(encoding);
+  yield* api.decodeAsyncGenerator(tokens);
+}
 // src/token-counter.ts
 function isNonOpenAIModel(model) {
@@ -405507,6 +405681,76 @@ function formatFunctionCall(fc) {
   if (fc.arguments) parts.push(fc.arguments);
   return parts.join("\n");
 }
+function getRoleString(message) {
+  if (message.role === "function" && message.name) {
+    return message.name;
+  } else if (message.name) {
+    return `${message.role}:${message.name}`;
+  }
+  return message.role;
+}
+function* encodeChatGenerator(messages, options) {
+  const { model, encoding: encodingOverride, primeAssistant = true } = options ?? {};
+  validateChatModel(model, encodingOverride);
+  const encoding = encodingOverride ?? (model ? getOpenAIEncoding({ model }) : "o200k_base");
+  if (encoding === "o200k_harmony") {
+    console.warn(
+      "[ai-token-estimator] o200k_harmony support is experimental. Token structure may not match actual API behavior."
+    );
+  }
+  const chatTokens = getChatTokens(encoding);
+  if (!chatTokens) {
+    throw new Error(
+      `Encoding "${encoding}" does not support chat format. Use cl100k_base or o200k_base for chat models.`
+    );
+  }
+  const { imStart, imEnd, imSep } = chatTokens;
+  let totalTokens = 0;
+  for (const message of messages) {
+    validateMessage(message);
+    yield [imStart];
+    totalTokens += 1;
+    const roleStr = getRoleString(message);
+    const roleTokens = encode(roleStr, { encoding, allowSpecial: "none" });
+    yield roleTokens;
+    totalTokens += roleTokens.length;
+    yield [imSep];
+    totalTokens += 1;
+    if (message.content) {
+      const contentGen = encodeGenerator(message.content, {
+        encoding,
+        allowSpecial: "none"
+      });
+      let result = contentGen.next();
+      while (!result.done) {
+        yield result.value;
+        totalTokens += result.value.length;
+        result = contentGen.next();
+      }
+    }
+    if (message.function_call) {
+      const fcContent = formatFunctionCall(message.function_call);
+      const fcTokens = encode(fcContent, { encoding, allowSpecial: "none" });
+      yield fcTokens;
+      totalTokens += fcTokens.length;
+    }
+    yield [imEnd];
+    totalTokens += 1;
+  }
+  if (primeAssistant) {
+    yield [imStart];
+    totalTokens += 1;
+    const assistantTokens = encode("assistant", {
+      encoding,
+      allowSpecial: "none"
+    });
+    yield assistantTokens;
+    totalTokens += assistantTokens.length;
+    yield [imSep];
+    totalTokens += 1;
+  }
+  return totalTokens;
+}
 export {
   DEFAULT_MODELS,
   LAST_UPDATED,
@@ -405519,10 +405763,14 @@ export {
   countSentencePieceTokensAsync,
   countTokens,
   decode,
+  decodeAsyncGenerator,
+  decodeGenerator,
   decodeSentencePiece,
   decodeSentencePieceAsync,
   encode,
   encodeChat,
+  encodeChatGenerator,
+  encodeGenerator,
   encodeSentencePiece,
   encodeSentencePieceAsync,
   ensureSentencePieceModel,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ai-token-estimator",
-  "version": "1.6.0",
+  "version": "1.7.0",
   "description": "Estimate and count tokens (incl. exact OpenAI BPE) and input costs for LLM API calls",
   "type": "module",
   "main": "./dist/index.cjs",