npm - ai-token-estimator - Versions diffs - 1.5.0 → 1.7.0 - Mend

ai-token-estimator 1.5.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md CHANGED Viewed

@@ -11,6 +11,9 @@ The best way to estimate **tokens + input cost** for LLM calls — with **exact
 ## Features
 - **Exact OpenAI tokenization** (tiktoken-compatible BPE): `encode()` / `decode()` / `openai_exact`
+- **Chat-aware tokenization**: `encodeChat()` returns exact token IDs for chat messages using ChatML format
+- **Fast token limit checking**: `isWithinTokenLimit()` / `isChatWithinTokenLimit()` with early-exit optimization (up to 1000x faster for large texts)
+- **Generator-based streaming**: `encodeGenerator()` / `encodeChatGenerator()` / `decodeGenerator()` / `decodeAsyncGenerator()` for memory-efficient tokenization
 - **OpenAI chat completion token counting** (legacy `functions` API): `countChatCompletionTokens()` with optional per-message breakdown
 - **Pure TypeScript SentencePiece tokenizer** (no native dependencies):
   - Supports `.model` files (protobuf format)
@@ -64,6 +67,21 @@ console.log(countTokens({ text: 'Hello, world!', model: 'gpt-5.1' }));
 ## Quick Recipes
+### Encode chat messages to tokens (ChatML format)
+```ts
+import { encodeChat, decode } from 'ai-token-estimator';
+const tokens = encodeChat([
+  { role: 'system', content: 'You are helpful.' },
+  { role: 'user', content: 'Hello!' }
+], { model: 'gpt-4o' });
+console.log(tokens); // [200264, 9125, 200266, 2610, 525, 11190, 13, 200265, ...]
+console.log(decode(tokens, { encoding: 'o200k_base' }));
+// <|im_start|>system<|im_sep|>You are helpful.<|im_end|>...
+```
 ### OpenAI chat completion tokens (legacy functions API)
 ```ts
@@ -75,6 +93,43 @@ const { totalTokens } = countChatCompletionTokens({
 });
 ```
+### Fast token limit checking (early exit)
+```ts
+import { isWithinTokenLimit, isChatWithinTokenLimit } from 'ai-token-estimator';
+// Plain text - returns token count or false if exceeded
+const count = isWithinTokenLimit(longText, 4096, { model: 'gpt-4o' });
+if (count === false) console.log('Text exceeds limit');
+// Chat messages - same early-exit optimization
+const chatCount = isChatWithinTokenLimit({
+  messages: [{ role: 'user', content: longText }],
+  model: 'gpt-4o',
+  tokenLimit: 4096,
+});
+```
+### Generator-based streaming tokenization
+```ts
+import { encodeGenerator, decodeAsyncGenerator } from 'ai-token-estimator';
+// Stream-encode large text (memory efficient)
+let tokenCount = 0;
+for (const tokenChunk of encodeGenerator(hugeText, { model: 'gpt-4o' })) {
+  tokenCount += tokenChunk.length;
+  // Process chunk...
+}
+// Decode streaming LLM response
+async function decodeLLMStream(tokenStream: AsyncIterable<number>) {
+  for await (const text of decodeAsyncGenerator(tokenStream, { model: 'gpt-4o' })) {
+    process.stdout.write(text);
+  }
+}
+```
 ### Local SentencePiece token counting
 ```ts
@@ -561,6 +616,48 @@ Encodes text into **OpenAI token IDs** using tiktoken-compatible BPE tokenizatio
 Decodes OpenAI token IDs back into text using the selected encoding/model.
+### `encodeChat(messages: ChatMessage[], options?: EncodeChatOptions): number[]`
+Encodes chat messages into **exact token IDs** using ChatML format. Returns the ChatML message prompt tokens (messages + optional assistant priming), including special delimiter tokens (`<|im_start|>`, `<|im_sep|>`, `<|im_end|>`).
+```ts
+import { encodeChat, decode } from 'ai-token-estimator';
+const tokens = encodeChat([
+  { role: 'system', content: 'You are helpful.' },
+  { role: 'user', content: 'Hello!' }
+], { model: 'gpt-4o' });
+// Tokens include ChatML structure:
+// <|im_start|>system<|im_sep|>You are helpful.<|im_end|>
+// <|im_start|>user<|im_sep|>Hello!<|im_end|>
+// <|im_start|>assistant<|im_sep|>  (priming)
+```
+**Parameters:**
+```typescript
+interface EncodeChatOptions {
+  model?: string;              // OpenAI model (e.g., 'gpt-4o')
+  encoding?: OpenAIEncoding;   // Explicit encoding override
+  primeAssistant?: boolean;    // Append assistant priming (default: true)
+}
+```
+**Supported encodings:**
+- `cl100k_base` (GPT-4, GPT-3.5-turbo)
+- `o200k_base` (GPT-4o, GPT-4o-mini)
+- `o200k_harmony` (experimental)
+**Limitations:**
+- **OpenAI models only** — throws for claude-*, gemini-*
+- **Legacy functions API only** — throws for tool_calls, tool_call_id
+- **Text content only** — throws for multimodal content (arrays)
+**Note on function_call:** Messages with `function_call` are encoded with the function name and arguments as content. The token count differs from `countChatCompletionTokens()` because the latter includes `FUNCTION_CALL_METADATA_TOKEN_OVERHEAD` (3 tokens) for API accounting. The exact difference depends on whether both name and arguments are present (2 token difference due to newline separator) or only one field is present (3 token difference).
+**Note on o200k_harmony:** Support for `o200k_harmony` encoding is experimental. The token structure may not match actual API behavior.
 ### `isWithinTokenLimit(text, tokenLimit, options?): false | number`
 Checks if text is within a token limit with **early exit optimization**. Returns `false` if the limit is exceeded, or the actual token count if within limit.
@@ -642,6 +739,81 @@ interface IsChatWithinTokenLimitInput {
 - `Error` if tools, tool_choice, tool_calls, or tool_call_id are present
 - `Error` if any message has non-string content
+### Generator APIs
+Generator-based APIs for memory-efficient streaming tokenization.
+#### `encodeGenerator(text, options?): Generator<number[], number, undefined>`
+Encode text yielding token chunks. Memory-efficient for large inputs.
+- **Yields:** `number[]` — token IDs per regex-matched piece (word/punctuation)
+- **Returns:** `number` — total token count when iteration completes
+```typescript
+import { encodeGenerator } from 'ai-token-estimator';
+// Stream-encode large text
+let tokenCount = 0;
+for (const tokenChunk of encodeGenerator(hugeText, { model: 'gpt-4o' })) {
+  tokenCount += tokenChunk.length;
+}
+// Or get total count from return value
+const gen = encodeGenerator(text, { model: 'gpt-4o' });
+let result = gen.next();
+while (!result.done) result = gen.next();
+console.log('Total tokens:', result.value);
+```
+#### `encodeChatGenerator(messages, options?): Generator<number[], number, undefined>`
+Encode chat messages yielding token chunks per message component.
+- **Yields:** `number[]` — token IDs per component (special tokens, role, content chunks, etc.)
+- **Returns:** `number` — total token count
+```typescript
+import { encodeChatGenerator } from 'ai-token-estimator';
+const messages = [
+  { role: 'system', content: 'You are helpful.' },
+  { role: 'user', content: 'Hello!' }
+];
+for (const tokenChunk of encodeChatGenerator(messages, { model: 'gpt-4o' })) {
+  console.log('Chunk:', tokenChunk);
+}
+```
+#### `decodeGenerator(tokens, options?): Generator<string, void, void>`
+Decode tokens yielding text chunks. Uses TextDecoder streaming mode — may yield empty strings when buffering incomplete UTF-8 sequences.
+```typescript
+import { encode, decodeGenerator } from 'ai-token-estimator';
+const tokens = encode('Hello, world!', { model: 'gpt-4o' });
+for (const textChunk of decodeGenerator(tokens, { model: 'gpt-4o' })) {
+  process.stdout.write(textChunk);
+}
+```
+#### `decodeAsyncGenerator(tokens, options?): AsyncGenerator<string, void, void>`
+Decode async token stream yielding text chunks. Accepts `AsyncIterable<number | number[]>` for flexibility with streaming APIs.
+```typescript
+import { decodeAsyncGenerator } from 'ai-token-estimator';
+// Decode streaming LLM response
+async function decodeLLMStream(tokenStream: AsyncIterable<number>) {
+  for await (const text of decodeAsyncGenerator(tokenStream, { model: 'gpt-4o' })) {
+    process.stdout.write(text);
+  }
+}
+```
 ### `getModelConfig(model: string): ModelConfig`
 Returns the configuration for a specific model. Throws if the model is not found.

package/dist/index.cjs CHANGED Viewed

@@ -41,9 +41,14 @@ __export(index_exports, {
   countSentencePieceTokensAsync: () => countSentencePieceTokensAsync,
   countTokens: () => countTokens,
   decode: () => decode,
+  decodeAsyncGenerator: () => decodeAsyncGenerator,
+  decodeGenerator: () => decodeGenerator,
   decodeSentencePiece: () => decodeSentencePiece,
   decodeSentencePieceAsync: () => decodeSentencePieceAsync,
   encode: () => encode,
+  encodeChat: () => encodeChat,
+  encodeChatGenerator: () => encodeChatGenerator,
+  encodeGenerator: () => encodeGenerator,
   encodeSentencePiece: () => encodeSentencePiece,
   encodeSentencePieceAsync: () => encodeSentencePieceAsync,
   ensureSentencePieceModel: () => ensureSentencePieceModel,
@@ -777,6 +782,7 @@ var BPETokenizer = class {
   }
   /**
    * Add an entry to the cache, evicting LRU entries if necessary.
+   * Freezes the array to prevent mutation by consumers (especially generator yields).
    */
   addToCache(key, value) {
     if (this.cacheCapacity <= 0) return;
@@ -789,6 +795,7 @@ var BPETokenizer = class {
         removed++;
       }
     }
+    Object.freeze(value);
     this.tokenCache.set(key, value);
   }
   /**
@@ -829,6 +836,158 @@ var BPETokenizer = class {
   clearCache() {
     this.tokenCache.clear();
   }
+  // ===========================================================================
+  // Generator Methods
+  // ===========================================================================
+  /**
+   * Generator version of encodeText. Yields token arrays per regex-matched piece.
+   * Returns total token count.
+   *
+   * @param text - The text to encode
+   * @param allowedSpecial - Controls special token handling (same as encodeText)
+   * @returns Generator that yields token arrays and returns total count
+   */
+  *encodeTextGenerator(text, allowedSpecial) {
+    if (!text) return 0;
+    let totalTokens = 0;
+    if (allowedSpecial === "skip") {
+      const gen = this.encodeOrdinaryGenerator(text);
+      let result = gen.next();
+      while (!result.done) {
+        yield result.value;
+        totalTokens += result.value.length;
+        result = gen.next();
+      }
+      return totalTokens;
+    }
+    if (this.specialTokenMap.size > 0) {
+      const parts = this.splitOnSpecialTokens(text, allowedSpecial);
+      for (const part of parts) {
+        if (part.isSpecial) {
+          const tokenId = this.specialTokenMap.get(part.text);
+          yield [tokenId];
+          totalTokens += 1;
+        } else {
+          const gen = this.encodeOrdinaryGenerator(part.text);
+          let result = gen.next();
+          while (!result.done) {
+            yield result.value;
+            totalTokens += result.value.length;
+            result = gen.next();
+          }
+        }
+      }
+    } else {
+      const gen = this.encodeOrdinaryGenerator(text);
+      let result = gen.next();
+      while (!result.done) {
+        yield result.value;
+        totalTokens += result.value.length;
+        result = gen.next();
+      }
+    }
+    return totalTokens;
+  }
+  /**
+   * Generator version of encodeOrdinary. Yields token arrays per regex piece.
+   * Uses same cache logic as encodeOrdinary.
+   */
+  *encodeOrdinaryGenerator(text) {
+    if (!text) return;
+    const regex = new RegExp(
+      this.tokenSplitRegex.source,
+      this.tokenSplitRegex.flags.includes("g") ? this.tokenSplitRegex.flags : this.tokenSplitRegex.flags + "g"
+    );
+    let match;
+    while ((match = regex.exec(text)) !== null) {
+      const piece = match[0];
+      if (piece.length === 0) {
+        regex.lastIndex++;
+        continue;
+      }
+      const cached = this.getFromCache(piece);
+      if (cached) {
+        yield cached;
+        continue;
+      }
+      const pieceBytes = this.textEncoder.encode(piece);
+      const key = bytesToLatin1(pieceBytes);
+      const directRank = this.encoder.get(key);
+      if (directRank !== void 0) {
+        const tokens = [directRank];
+        this.addToCache(piece, tokens);
+        yield tokens;
+        continue;
+      }
+      const pieceTokens = this.mergeBytePairs(pieceBytes);
+      this.addToCache(piece, pieceTokens);
+      yield pieceTokens;
+    }
+  }
+  /**
+   * Generator version of decodeTokens. Yields text chunks.
+   * Uses TextDecoder streaming mode to handle partial UTF-8 correctly.
+   * May yield empty strings when buffering incomplete sequences.
+   *
+   * Streaming semantics:
+   * - During iteration: decode(bytes, { stream: true }) - buffers incomplete UTF-8
+   * - At end/flush: decode() with no stream flag (defaults to false) - emits buffered bytes
+   */
+  *decodeTokensGenerator(tokens) {
+    const streamingDecoder = new TextDecoder("utf-8", { fatal: false });
+    for (const token of tokens) {
+      const specialToken = this.specialTokenDecoder.get(token);
+      if (specialToken !== void 0) {
+        const flushed = streamingDecoder.decode(new Uint8Array(0));
+        if (flushed) yield flushed;
+        yield specialToken;
+        continue;
+      }
+      const tokenBytes = this.decoder.get(token);
+      if (!tokenBytes) {
+        throw new Error(
+          `Invalid token ID: ${token}. Token not found in vocabulary or special tokens.`
+        );
+      }
+      const decoded = streamingDecoder.decode(tokenBytes, { stream: true });
+      yield decoded;
+    }
+    const final = streamingDecoder.decode();
+    if (final) yield final;
+  }
+  /**
+   * Async generator version of decodeTokens.
+   * Accepts AsyncIterable<number | number[]> for flexibility.
+   *
+   * Streaming semantics:
+   * - During iteration: decode(bytes, { stream: true }) - buffers incomplete UTF-8
+   * - At end/flush: decode() with no stream flag (defaults to false) - emits buffered bytes
+   */
+  async *decodeTokensAsyncGenerator(tokens) {
+    const streamingDecoder = new TextDecoder("utf-8", { fatal: false });
+    for await (const tokenOrChunk of tokens) {
+      const tokenArray = typeof tokenOrChunk === "number" ? [tokenOrChunk] : tokenOrChunk;
+      for (const token of tokenArray) {
+        const specialToken = this.specialTokenDecoder.get(token);
+        if (specialToken !== void 0) {
+          const flushed = streamingDecoder.decode(new Uint8Array(0));
+          if (flushed) yield flushed;
+          yield specialToken;
+          continue;
+        }
+        const tokenBytes = this.decoder.get(token);
+        if (!tokenBytes) {
+          throw new Error(
+            `Invalid token ID: ${token}. Token not found in vocabulary or special tokens.`
+          );
+        }
+        const decoded = streamingDecoder.decode(tokenBytes, { stream: true });
+        yield decoded;
+      }
+    }
+    const final = streamingDecoder.decode();
+    if (final) yield final;
+  }
 };
 // src/bpe/special-tokens.ts
@@ -849,10 +1008,18 @@ var CL100K_BASE_SPECIAL_TOKENS = [
   ["<|fim_prefix|>", 100258],
   ["<|fim_middle|>", 100259],
   ["<|fim_suffix|>", 100260],
+  // ChatML tokens for chat completion
+  ["<|im_start|>", 100264],
+  ["<|im_end|>", 100265],
+  ["<|im_sep|>", 100266],
   ["<|endofprompt|>", 100276]
 ];
 var O200K_BASE_SPECIAL_TOKENS = [
   ["<|endoftext|>", 199999],
+  // ChatML tokens for chat completion
+  ["<|im_start|>", 200264],
+  ["<|im_end|>", 200265],
+  ["<|im_sep|>", 200266],
   ["<|endofprompt|>", 200018]
 ];
 function buildO200kHarmonySpecialTokens() {
@@ -401972,7 +402139,11 @@ function getTokenizer(encoding) {
   return {
     encode: (text, allowedSpecial) => tokenizer.encodeText(text, allowedSpecial),
     decode: (tokens) => tokenizer.decodeTokens(tokens),
-    encodeTextWithLimit: (text, limit, allowedSpecial) => tokenizer.encodeTextWithLimit(text, limit, allowedSpecial)
+    encodeTextWithLimit: (text, limit, allowedSpecial) => tokenizer.encodeTextWithLimit(text, limit, allowedSpecial),
+    // Generator methods - delegate to BPETokenizer
+    encodeGenerator: (text, allowedSpecial) => tokenizer.encodeTextGenerator(text, allowedSpecial),
+    decodeGenerator: (tokens) => tokenizer.decodeTokensGenerator(tokens),
+    decodeAsyncGenerator: (tokens) => tokenizer.decodeTokensAsyncGenerator(tokens)
   };
 }
 function resolveEncoding(options) {
@@ -402045,6 +402216,22 @@ function isWithinTokenLimit(text, tokenLimit, options) {
   const result = api.encodeTextWithLimit(text, tokenLimit, allowedSpecial);
   return result.exceeded ? false : result.count;
 }
+function encodeGenerator(text, options) {
+  const encoding = resolveEncoding(options);
+  const api = getTokenizer(encoding);
+  const allowedSpecial = resolveAllowedSpecial(options?.allowSpecial);
+  return api.encodeGenerator(text, allowedSpecial);
+}
+function* decodeGenerator(tokens, options) {
+  const encoding = resolveEncoding(options);
+  const api = getTokenizer(encoding);
+  yield* api.decodeGenerator(tokens);
+}
+async function* decodeAsyncGenerator(tokens, options) {
+  const encoding = resolveEncoding(options);
+  const api = getTokenizer(encoding);
+  yield* api.decodeAsyncGenerator(tokens);
+}
 // src/token-counter.ts
 function isNonOpenAIModel(model) {
@@ -405440,6 +405627,201 @@ function isChatWithinTokenLimit(input) {
   }
   return count;
 }
+// src/encode-chat.ts
+var CHAT_TOKENS = {
+  cl100k_base: { imStart: 100264, imEnd: 100265, imSep: 100266 },
+  o200k_base: { imStart: 200264, imEnd: 200265, imSep: 200266 }
+};
+var HARMONY_TOKENS = {
+  start: 200006,
+  end: 200007,
+  message: 200008
+};
+function encodeChat(messages, options) {
+  const { model, encoding: encodingOverride, primeAssistant = true } = options ?? {};
+  validateChatModel(model, encodingOverride);
+  const encoding = encodingOverride ?? (model ? getOpenAIEncoding({ model }) : "o200k_base");
+  if (encoding === "o200k_harmony") {
+    console.warn(
+      "[ai-token-estimator] o200k_harmony support is experimental. Token structure may not match actual API behavior."
+    );
+  }
+  const chatTokens = getChatTokens(encoding);
+  if (!chatTokens) {
+    throw new Error(
+      `Encoding "${encoding}" does not support chat format. Use cl100k_base or o200k_base for chat models.`
+    );
+  }
+  const { imStart, imEnd, imSep } = chatTokens;
+  const tokens = [];
+  for (const message of messages) {
+    validateMessage(message);
+    tokens.push(imStart);
+    let roleStr;
+    if (message.role === "function" && message.name) {
+      roleStr = message.name;
+    } else if (message.name) {
+      roleStr = `${message.role}:${message.name}`;
+    } else {
+      roleStr = message.role;
+    }
+    tokens.push(...encode(roleStr, { encoding, allowSpecial: "none" }));
+    tokens.push(imSep);
+    if (message.content) {
+      tokens.push(
+        ...encode(message.content, { encoding, allowSpecial: "none" })
+      );
+    }
+    if (message.function_call) {
+      const fcContent = formatFunctionCall(message.function_call);
+      tokens.push(...encode(fcContent, { encoding, allowSpecial: "none" }));
+    }
+    tokens.push(imEnd);
+  }
+  if (primeAssistant) {
+    tokens.push(imStart);
+    tokens.push(...encode("assistant", { encoding, allowSpecial: "none" }));
+    tokens.push(imSep);
+  }
+  return tokens;
+}
+function validateChatModel(model, encodingOverride) {
+  if (model) {
+    if (isAnthropicModel(model)) {
+      throw new Error(
+        `Model "${model}" is an Anthropic model. encodeChat only supports OpenAI models.`
+      );
+    }
+    if (isGoogleModel(model)) {
+      throw new Error(
+        `Model "${model}" is a Google model. encodeChat only supports OpenAI models.`
+      );
+    }
+    if (isKnownModel(model) && !isChatModel(model)) {
+      throw new Error(
+        `Model "${model}" is not a chat completion model. encodeChat only supports chat models (e.g., gpt-4o, gpt-3.5-turbo).`
+      );
+    }
+  }
+  if (encodingOverride) {
+    return;
+  }
+  if (!model) {
+    throw new Error(
+      "Either model or encoding must be provided. Provide a known OpenAI chat model (e.g., gpt-4o) or an explicit encoding (e.g., o200k_base)."
+    );
+  }
+  if (!isChatModel(model)) {
+    throw new Error(
+      `Model "${model}" is not recognized as an OpenAI chat model. If this is a new OpenAI model, provide the encoding option explicitly (e.g., encoding: "o200k_base").`
+    );
+  }
+}
+function validateMessage(message) {
+  const msgAny = message;
+  if ("tool_calls" in msgAny && msgAny.tool_calls !== void 0) {
+    throw new Error(
+      "tool_calls is not supported. Use function_call with the legacy functions API."
+    );
+  }
+  if ("tool_call_id" in msgAny && msgAny.tool_call_id !== void 0) {
+    throw new Error(
+      "tool_call_id is not supported. Use the legacy functions API."
+    );
+  }
+  if (message.content !== null && message.content !== void 0 && typeof message.content !== "string") {
+    throw new Error(
+      "Multimodal content (arrays) is not supported. Only text content is supported."
+    );
+  }
+}
+function getChatTokens(encoding) {
+  if (encoding === "o200k_harmony") {
+    return {
+      imStart: HARMONY_TOKENS.start,
+      imEnd: HARMONY_TOKENS.end,
+      imSep: HARMONY_TOKENS.message
+    };
+  }
+  return CHAT_TOKENS[encoding] ?? null;
+}
+function formatFunctionCall(fc) {
+  const parts = [];
+  if (fc.name) parts.push(fc.name);
+  if (fc.arguments) parts.push(fc.arguments);
+  return parts.join("\n");
+}
+function getRoleString(message) {
+  if (message.role === "function" && message.name) {
+    return message.name;
+  } else if (message.name) {
+    return `${message.role}:${message.name}`;
+  }
+  return message.role;
+}
+function* encodeChatGenerator(messages, options) {
+  const { model, encoding: encodingOverride, primeAssistant = true } = options ?? {};
+  validateChatModel(model, encodingOverride);
+  const encoding = encodingOverride ?? (model ? getOpenAIEncoding({ model }) : "o200k_base");
+  if (encoding === "o200k_harmony") {
+    console.warn(
+      "[ai-token-estimator] o200k_harmony support is experimental. Token structure may not match actual API behavior."
+    );
+  }
+  const chatTokens = getChatTokens(encoding);
+  if (!chatTokens) {
+    throw new Error(
+      `Encoding "${encoding}" does not support chat format. Use cl100k_base or o200k_base for chat models.`
+    );
+  }
+  const { imStart, imEnd, imSep } = chatTokens;
+  let totalTokens = 0;
+  for (const message of messages) {
+    validateMessage(message);
+    yield [imStart];
+    totalTokens += 1;
+    const roleStr = getRoleString(message);
+    const roleTokens = encode(roleStr, { encoding, allowSpecial: "none" });
+    yield roleTokens;
+    totalTokens += roleTokens.length;
+    yield [imSep];
+    totalTokens += 1;
+    if (message.content) {
+      const contentGen = encodeGenerator(message.content, {
+        encoding,
+        allowSpecial: "none"
+      });
+      let result = contentGen.next();
+      while (!result.done) {
+        yield result.value;
+        totalTokens += result.value.length;
+        result = contentGen.next();
+      }
+    }
+    if (message.function_call) {
+      const fcContent = formatFunctionCall(message.function_call);
+      const fcTokens = encode(fcContent, { encoding, allowSpecial: "none" });
+      yield fcTokens;
+      totalTokens += fcTokens.length;
+    }
+    yield [imEnd];
+    totalTokens += 1;
+  }
+  if (primeAssistant) {
+    yield [imStart];
+    totalTokens += 1;
+    const assistantTokens = encode("assistant", {
+      encoding,
+      allowSpecial: "none"
+    });
+    yield assistantTokens;
+    totalTokens += assistantTokens.length;
+    yield [imSep];
+    totalTokens += 1;
+  }
+  return totalTokens;
+}
 // Annotate the CommonJS export names for ESM import in node:
 0 && (module.exports = {
   DEFAULT_MODELS,
@@ -405453,9 +405835,14 @@ function isChatWithinTokenLimit(input) {
   countSentencePieceTokensAsync,
   countTokens,
   decode,
+  decodeAsyncGenerator,
+  decodeGenerator,
   decodeSentencePiece,
   decodeSentencePieceAsync,
   encode,
+  encodeChat,
+  encodeChatGenerator,
+  encodeGenerator,
   encodeSentencePiece,
   encodeSentencePieceAsync,
   ensureSentencePieceModel,