ai-token-estimator 1.5.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -11,6 +11,9 @@ The best way to estimate **tokens + input cost** for LLM calls — with **exact
11
11
  ## Features
12
12
 
13
13
  - **Exact OpenAI tokenization** (tiktoken-compatible BPE): `encode()` / `decode()` / `openai_exact`
14
+ - **Chat-aware tokenization**: `encodeChat()` returns exact token IDs for chat messages using ChatML format
15
+ - **Fast token limit checking**: `isWithinTokenLimit()` / `isChatWithinTokenLimit()` with early-exit optimization (up to 1000x faster for large texts)
16
+ - **Generator-based streaming**: `encodeGenerator()` / `encodeChatGenerator()` / `decodeGenerator()` / `decodeAsyncGenerator()` for memory-efficient tokenization
14
17
  - **OpenAI chat completion token counting** (legacy `functions` API): `countChatCompletionTokens()` with optional per-message breakdown
15
18
  - **Pure TypeScript SentencePiece tokenizer** (no native dependencies):
16
19
  - Supports `.model` files (protobuf format)
@@ -64,6 +67,21 @@ console.log(countTokens({ text: 'Hello, world!', model: 'gpt-5.1' }));
64
67
 
65
68
  ## Quick Recipes
66
69
 
70
+ ### Encode chat messages to tokens (ChatML format)
71
+
72
+ ```ts
73
+ import { encodeChat, decode } from 'ai-token-estimator';
74
+
75
+ const tokens = encodeChat([
76
+ { role: 'system', content: 'You are helpful.' },
77
+ { role: 'user', content: 'Hello!' }
78
+ ], { model: 'gpt-4o' });
79
+
80
+ console.log(tokens); // [200264, 9125, 200266, 2610, 525, 11190, 13, 200265, ...]
81
+ console.log(decode(tokens, { encoding: 'o200k_base' }));
82
+ // <|im_start|>system<|im_sep|>You are helpful.<|im_end|>...
83
+ ```
84
+
67
85
  ### OpenAI chat completion tokens (legacy functions API)
68
86
 
69
87
  ```ts
@@ -75,6 +93,43 @@ const { totalTokens } = countChatCompletionTokens({
75
93
  });
76
94
  ```
77
95
 
96
+ ### Fast token limit checking (early exit)
97
+
98
+ ```ts
99
+ import { isWithinTokenLimit, isChatWithinTokenLimit } from 'ai-token-estimator';
100
+
101
+ // Plain text - returns token count or false if exceeded
102
+ const count = isWithinTokenLimit(longText, 4096, { model: 'gpt-4o' });
103
+ if (count === false) console.log('Text exceeds limit');
104
+
105
+ // Chat messages - same early-exit optimization
106
+ const chatCount = isChatWithinTokenLimit({
107
+ messages: [{ role: 'user', content: longText }],
108
+ model: 'gpt-4o',
109
+ tokenLimit: 4096,
110
+ });
111
+ ```
112
+
113
+ ### Generator-based streaming tokenization
114
+
115
+ ```ts
116
+ import { encodeGenerator, decodeAsyncGenerator } from 'ai-token-estimator';
117
+
118
+ // Stream-encode large text (memory efficient)
119
+ let tokenCount = 0;
120
+ for (const tokenChunk of encodeGenerator(hugeText, { model: 'gpt-4o' })) {
121
+ tokenCount += tokenChunk.length;
122
+ // Process chunk...
123
+ }
124
+
125
+ // Decode streaming LLM response
126
+ async function decodeLLMStream(tokenStream: AsyncIterable<number>) {
127
+ for await (const text of decodeAsyncGenerator(tokenStream, { model: 'gpt-4o' })) {
128
+ process.stdout.write(text);
129
+ }
130
+ }
131
+ ```
132
+
78
133
  ### Local SentencePiece token counting
79
134
 
80
135
  ```ts
@@ -561,6 +616,48 @@ Encodes text into **OpenAI token IDs** using tiktoken-compatible BPE tokenizatio
561
616
 
562
617
  Decodes OpenAI token IDs back into text using the selected encoding/model.
563
618
 
619
+ ### `encodeChat(messages: ChatMessage[], options?: EncodeChatOptions): number[]`
620
+
621
+ Encodes chat messages into **exact token IDs** using ChatML format. Returns the ChatML message prompt tokens (messages + optional assistant priming), including special delimiter tokens (`<|im_start|>`, `<|im_sep|>`, `<|im_end|>`).
622
+
623
+ ```ts
624
+ import { encodeChat, decode } from 'ai-token-estimator';
625
+
626
+ const tokens = encodeChat([
627
+ { role: 'system', content: 'You are helpful.' },
628
+ { role: 'user', content: 'Hello!' }
629
+ ], { model: 'gpt-4o' });
630
+
631
+ // Tokens include ChatML structure:
632
+ // <|im_start|>system<|im_sep|>You are helpful.<|im_end|>
633
+ // <|im_start|>user<|im_sep|>Hello!<|im_end|>
634
+ // <|im_start|>assistant<|im_sep|> (priming)
635
+ ```
636
+
637
+ **Parameters:**
638
+
639
+ ```typescript
640
+ interface EncodeChatOptions {
641
+ model?: string; // OpenAI model (e.g., 'gpt-4o')
642
+ encoding?: OpenAIEncoding; // Explicit encoding override
643
+ primeAssistant?: boolean; // Append assistant priming (default: true)
644
+ }
645
+ ```
646
+
647
+ **Supported encodings:**
648
+ - `cl100k_base` (GPT-4, GPT-3.5-turbo)
649
+ - `o200k_base` (GPT-4o, GPT-4o-mini)
650
+ - `o200k_harmony` (experimental)
651
+
652
+ **Limitations:**
653
+ - **OpenAI models only** — throws for claude-*, gemini-*
654
+ - **Legacy functions API only** — throws for tool_calls, tool_call_id
655
+ - **Text content only** — throws for multimodal content (arrays)
656
+
657
+ **Note on function_call:** Messages with `function_call` are encoded with the function name and arguments as content. The token count differs from `countChatCompletionTokens()` because the latter includes `FUNCTION_CALL_METADATA_TOKEN_OVERHEAD` (3 tokens) for API accounting. The exact difference depends on whether both name and arguments are present (2 token difference due to newline separator) or only one field is present (3 token difference).
658
+
659
+ **Note on o200k_harmony:** Support for `o200k_harmony` encoding is experimental. The token structure may not match actual API behavior.
660
+
564
661
  ### `isWithinTokenLimit(text, tokenLimit, options?): false | number`
565
662
 
566
663
  Checks if text is within a token limit with **early exit optimization**. Returns `false` if the limit is exceeded, or the actual token count if within limit.
@@ -642,6 +739,81 @@ interface IsChatWithinTokenLimitInput {
642
739
  - `Error` if tools, tool_choice, tool_calls, or tool_call_id are present
643
740
  - `Error` if any message has non-string content
644
741
 
742
+ ### Generator APIs
743
+
744
+ Generator-based APIs for memory-efficient streaming tokenization.
745
+
746
+ #### `encodeGenerator(text, options?): Generator<number[], number, undefined>`
747
+
748
+ Encode text yielding token chunks. Memory-efficient for large inputs.
749
+
750
+ - **Yields:** `number[]` — token IDs per regex-matched piece (word/punctuation)
751
+ - **Returns:** `number` — total token count when iteration completes
752
+
753
+ ```typescript
754
+ import { encodeGenerator } from 'ai-token-estimator';
755
+
756
+ // Stream-encode large text
757
+ let tokenCount = 0;
758
+ for (const tokenChunk of encodeGenerator(hugeText, { model: 'gpt-4o' })) {
759
+ tokenCount += tokenChunk.length;
760
+ }
761
+
762
+ // Or get total count from return value
763
+ const gen = encodeGenerator(text, { model: 'gpt-4o' });
764
+ let result = gen.next();
765
+ while (!result.done) result = gen.next();
766
+ console.log('Total tokens:', result.value);
767
+ ```
768
+
769
+ #### `encodeChatGenerator(messages, options?): Generator<number[], number, undefined>`
770
+
771
+ Encode chat messages yielding token chunks per message component.
772
+
773
+ - **Yields:** `number[]` — token IDs per component (special tokens, role, content chunks, etc.)
774
+ - **Returns:** `number` — total token count
775
+
776
+ ```typescript
777
+ import { encodeChatGenerator } from 'ai-token-estimator';
778
+
779
+ const messages = [
780
+ { role: 'system', content: 'You are helpful.' },
781
+ { role: 'user', content: 'Hello!' }
782
+ ];
783
+
784
+ for (const tokenChunk of encodeChatGenerator(messages, { model: 'gpt-4o' })) {
785
+ console.log('Chunk:', tokenChunk);
786
+ }
787
+ ```
788
+
789
+ #### `decodeGenerator(tokens, options?): Generator<string, void, void>`
790
+
791
+ Decode tokens yielding text chunks. Uses TextDecoder streaming mode — may yield empty strings when buffering incomplete UTF-8 sequences.
792
+
793
+ ```typescript
794
+ import { encode, decodeGenerator } from 'ai-token-estimator';
795
+
796
+ const tokens = encode('Hello, world!', { model: 'gpt-4o' });
797
+ for (const textChunk of decodeGenerator(tokens, { model: 'gpt-4o' })) {
798
+ process.stdout.write(textChunk);
799
+ }
800
+ ```
801
+
802
+ #### `decodeAsyncGenerator(tokens, options?): AsyncGenerator<string, void, void>`
803
+
804
+ Decode async token stream yielding text chunks. Accepts `AsyncIterable<number | number[]>` for flexibility with streaming APIs.
805
+
806
+ ```typescript
807
+ import { decodeAsyncGenerator } from 'ai-token-estimator';
808
+
809
+ // Decode streaming LLM response
810
+ async function decodeLLMStream(tokenStream: AsyncIterable<number>) {
811
+ for await (const text of decodeAsyncGenerator(tokenStream, { model: 'gpt-4o' })) {
812
+ process.stdout.write(text);
813
+ }
814
+ }
815
+ ```
816
+
645
817
  ### `getModelConfig(model: string): ModelConfig`
646
818
 
647
819
  Returns the configuration for a specific model. Throws if the model is not found.
package/dist/index.cjs CHANGED
@@ -41,9 +41,14 @@ __export(index_exports, {
41
41
  countSentencePieceTokensAsync: () => countSentencePieceTokensAsync,
42
42
  countTokens: () => countTokens,
43
43
  decode: () => decode,
44
+ decodeAsyncGenerator: () => decodeAsyncGenerator,
45
+ decodeGenerator: () => decodeGenerator,
44
46
  decodeSentencePiece: () => decodeSentencePiece,
45
47
  decodeSentencePieceAsync: () => decodeSentencePieceAsync,
46
48
  encode: () => encode,
49
+ encodeChat: () => encodeChat,
50
+ encodeChatGenerator: () => encodeChatGenerator,
51
+ encodeGenerator: () => encodeGenerator,
47
52
  encodeSentencePiece: () => encodeSentencePiece,
48
53
  encodeSentencePieceAsync: () => encodeSentencePieceAsync,
49
54
  ensureSentencePieceModel: () => ensureSentencePieceModel,
@@ -777,6 +782,7 @@ var BPETokenizer = class {
777
782
  }
778
783
  /**
779
784
  * Add an entry to the cache, evicting LRU entries if necessary.
785
+ * Freezes the array to prevent mutation by consumers (especially generator yields).
780
786
  */
781
787
  addToCache(key, value) {
782
788
  if (this.cacheCapacity <= 0) return;
@@ -789,6 +795,7 @@ var BPETokenizer = class {
789
795
  removed++;
790
796
  }
791
797
  }
798
+ Object.freeze(value);
792
799
  this.tokenCache.set(key, value);
793
800
  }
794
801
  /**
@@ -829,6 +836,158 @@ var BPETokenizer = class {
829
836
  clearCache() {
830
837
  this.tokenCache.clear();
831
838
  }
839
+ // ===========================================================================
840
+ // Generator Methods
841
+ // ===========================================================================
842
+ /**
843
+ * Generator version of encodeText. Yields token arrays per regex-matched piece.
844
+ * Returns total token count.
845
+ *
846
+ * @param text - The text to encode
847
+ * @param allowedSpecial - Controls special token handling (same as encodeText)
848
+ * @returns Generator that yields token arrays and returns total count
849
+ */
850
+ *encodeTextGenerator(text, allowedSpecial) {
851
+ if (!text) return 0;
852
+ let totalTokens = 0;
853
+ if (allowedSpecial === "skip") {
854
+ const gen = this.encodeOrdinaryGenerator(text);
855
+ let result = gen.next();
856
+ while (!result.done) {
857
+ yield result.value;
858
+ totalTokens += result.value.length;
859
+ result = gen.next();
860
+ }
861
+ return totalTokens;
862
+ }
863
+ if (this.specialTokenMap.size > 0) {
864
+ const parts = this.splitOnSpecialTokens(text, allowedSpecial);
865
+ for (const part of parts) {
866
+ if (part.isSpecial) {
867
+ const tokenId = this.specialTokenMap.get(part.text);
868
+ yield [tokenId];
869
+ totalTokens += 1;
870
+ } else {
871
+ const gen = this.encodeOrdinaryGenerator(part.text);
872
+ let result = gen.next();
873
+ while (!result.done) {
874
+ yield result.value;
875
+ totalTokens += result.value.length;
876
+ result = gen.next();
877
+ }
878
+ }
879
+ }
880
+ } else {
881
+ const gen = this.encodeOrdinaryGenerator(text);
882
+ let result = gen.next();
883
+ while (!result.done) {
884
+ yield result.value;
885
+ totalTokens += result.value.length;
886
+ result = gen.next();
887
+ }
888
+ }
889
+ return totalTokens;
890
+ }
891
+ /**
892
+ * Generator version of encodeOrdinary. Yields token arrays per regex piece.
893
+ * Uses same cache logic as encodeOrdinary.
894
+ */
895
+ *encodeOrdinaryGenerator(text) {
896
+ if (!text) return;
897
+ const regex = new RegExp(
898
+ this.tokenSplitRegex.source,
899
+ this.tokenSplitRegex.flags.includes("g") ? this.tokenSplitRegex.flags : this.tokenSplitRegex.flags + "g"
900
+ );
901
+ let match;
902
+ while ((match = regex.exec(text)) !== null) {
903
+ const piece = match[0];
904
+ if (piece.length === 0) {
905
+ regex.lastIndex++;
906
+ continue;
907
+ }
908
+ const cached = this.getFromCache(piece);
909
+ if (cached) {
910
+ yield cached;
911
+ continue;
912
+ }
913
+ const pieceBytes = this.textEncoder.encode(piece);
914
+ const key = bytesToLatin1(pieceBytes);
915
+ const directRank = this.encoder.get(key);
916
+ if (directRank !== void 0) {
917
+ const tokens = [directRank];
918
+ this.addToCache(piece, tokens);
919
+ yield tokens;
920
+ continue;
921
+ }
922
+ const pieceTokens = this.mergeBytePairs(pieceBytes);
923
+ this.addToCache(piece, pieceTokens);
924
+ yield pieceTokens;
925
+ }
926
+ }
927
+ /**
928
+ * Generator version of decodeTokens. Yields text chunks.
929
+ * Uses TextDecoder streaming mode to handle partial UTF-8 correctly.
930
+ * May yield empty strings when buffering incomplete sequences.
931
+ *
932
+ * Streaming semantics:
933
+ * - During iteration: decode(bytes, { stream: true }) - buffers incomplete UTF-8
934
+ * - At end/flush: decode() with no stream flag (defaults to false) - emits buffered bytes
935
+ */
936
+ *decodeTokensGenerator(tokens) {
937
+ const streamingDecoder = new TextDecoder("utf-8", { fatal: false });
938
+ for (const token of tokens) {
939
+ const specialToken = this.specialTokenDecoder.get(token);
940
+ if (specialToken !== void 0) {
941
+ const flushed = streamingDecoder.decode(new Uint8Array(0));
942
+ if (flushed) yield flushed;
943
+ yield specialToken;
944
+ continue;
945
+ }
946
+ const tokenBytes = this.decoder.get(token);
947
+ if (!tokenBytes) {
948
+ throw new Error(
949
+ `Invalid token ID: ${token}. Token not found in vocabulary or special tokens.`
950
+ );
951
+ }
952
+ const decoded = streamingDecoder.decode(tokenBytes, { stream: true });
953
+ yield decoded;
954
+ }
955
+ const final = streamingDecoder.decode();
956
+ if (final) yield final;
957
+ }
958
+ /**
959
+ * Async generator version of decodeTokens.
960
+ * Accepts AsyncIterable<number | number[]> for flexibility.
961
+ *
962
+ * Streaming semantics:
963
+ * - During iteration: decode(bytes, { stream: true }) - buffers incomplete UTF-8
964
+ * - At end/flush: decode() with no stream flag (defaults to false) - emits buffered bytes
965
+ */
966
+ async *decodeTokensAsyncGenerator(tokens) {
967
+ const streamingDecoder = new TextDecoder("utf-8", { fatal: false });
968
+ for await (const tokenOrChunk of tokens) {
969
+ const tokenArray = typeof tokenOrChunk === "number" ? [tokenOrChunk] : tokenOrChunk;
970
+ for (const token of tokenArray) {
971
+ const specialToken = this.specialTokenDecoder.get(token);
972
+ if (specialToken !== void 0) {
973
+ const flushed = streamingDecoder.decode(new Uint8Array(0));
974
+ if (flushed) yield flushed;
975
+ yield specialToken;
976
+ continue;
977
+ }
978
+ const tokenBytes = this.decoder.get(token);
979
+ if (!tokenBytes) {
980
+ throw new Error(
981
+ `Invalid token ID: ${token}. Token not found in vocabulary or special tokens.`
982
+ );
983
+ }
984
+ const decoded = streamingDecoder.decode(tokenBytes, { stream: true });
985
+ yield decoded;
986
+ }
987
+ }
988
+ const final = streamingDecoder.decode();
989
+ if (final) yield final;
990
+ }
832
991
  };
833
992
 
834
993
  // src/bpe/special-tokens.ts
@@ -849,10 +1008,18 @@ var CL100K_BASE_SPECIAL_TOKENS = [
849
1008
  ["<|fim_prefix|>", 100258],
850
1009
  ["<|fim_middle|>", 100259],
851
1010
  ["<|fim_suffix|>", 100260],
1011
+ // ChatML tokens for chat completion
1012
+ ["<|im_start|>", 100264],
1013
+ ["<|im_end|>", 100265],
1014
+ ["<|im_sep|>", 100266],
852
1015
  ["<|endofprompt|>", 100276]
853
1016
  ];
854
1017
  var O200K_BASE_SPECIAL_TOKENS = [
855
1018
  ["<|endoftext|>", 199999],
1019
+ // ChatML tokens for chat completion
1020
+ ["<|im_start|>", 200264],
1021
+ ["<|im_end|>", 200265],
1022
+ ["<|im_sep|>", 200266],
856
1023
  ["<|endofprompt|>", 200018]
857
1024
  ];
858
1025
  function buildO200kHarmonySpecialTokens() {
@@ -401972,7 +402139,11 @@ function getTokenizer(encoding) {
401972
402139
  return {
401973
402140
  encode: (text, allowedSpecial) => tokenizer.encodeText(text, allowedSpecial),
401974
402141
  decode: (tokens) => tokenizer.decodeTokens(tokens),
401975
- encodeTextWithLimit: (text, limit, allowedSpecial) => tokenizer.encodeTextWithLimit(text, limit, allowedSpecial)
402142
+ encodeTextWithLimit: (text, limit, allowedSpecial) => tokenizer.encodeTextWithLimit(text, limit, allowedSpecial),
402143
+ // Generator methods - delegate to BPETokenizer
402144
+ encodeGenerator: (text, allowedSpecial) => tokenizer.encodeTextGenerator(text, allowedSpecial),
402145
+ decodeGenerator: (tokens) => tokenizer.decodeTokensGenerator(tokens),
402146
+ decodeAsyncGenerator: (tokens) => tokenizer.decodeTokensAsyncGenerator(tokens)
401976
402147
  };
401977
402148
  }
401978
402149
  function resolveEncoding(options) {
@@ -402045,6 +402216,22 @@ function isWithinTokenLimit(text, tokenLimit, options) {
402045
402216
  const result = api.encodeTextWithLimit(text, tokenLimit, allowedSpecial);
402046
402217
  return result.exceeded ? false : result.count;
402047
402218
  }
402219
+ function encodeGenerator(text, options) {
402220
+ const encoding = resolveEncoding(options);
402221
+ const api = getTokenizer(encoding);
402222
+ const allowedSpecial = resolveAllowedSpecial(options?.allowSpecial);
402223
+ return api.encodeGenerator(text, allowedSpecial);
402224
+ }
402225
+ function* decodeGenerator(tokens, options) {
402226
+ const encoding = resolveEncoding(options);
402227
+ const api = getTokenizer(encoding);
402228
+ yield* api.decodeGenerator(tokens);
402229
+ }
402230
+ async function* decodeAsyncGenerator(tokens, options) {
402231
+ const encoding = resolveEncoding(options);
402232
+ const api = getTokenizer(encoding);
402233
+ yield* api.decodeAsyncGenerator(tokens);
402234
+ }
402048
402235
 
402049
402236
  // src/token-counter.ts
402050
402237
  function isNonOpenAIModel(model) {
@@ -405440,6 +405627,201 @@ function isChatWithinTokenLimit(input) {
405440
405627
  }
405441
405628
  return count;
405442
405629
  }
405630
+
405631
+ // src/encode-chat.ts
405632
+ var CHAT_TOKENS = {
405633
+ cl100k_base: { imStart: 100264, imEnd: 100265, imSep: 100266 },
405634
+ o200k_base: { imStart: 200264, imEnd: 200265, imSep: 200266 }
405635
+ };
405636
+ var HARMONY_TOKENS = {
405637
+ start: 200006,
405638
+ end: 200007,
405639
+ message: 200008
405640
+ };
405641
+ function encodeChat(messages, options) {
405642
+ const { model, encoding: encodingOverride, primeAssistant = true } = options ?? {};
405643
+ validateChatModel(model, encodingOverride);
405644
+ const encoding = encodingOverride ?? (model ? getOpenAIEncoding({ model }) : "o200k_base");
405645
+ if (encoding === "o200k_harmony") {
405646
+ console.warn(
405647
+ "[ai-token-estimator] o200k_harmony support is experimental. Token structure may not match actual API behavior."
405648
+ );
405649
+ }
405650
+ const chatTokens = getChatTokens(encoding);
405651
+ if (!chatTokens) {
405652
+ throw new Error(
405653
+ `Encoding "${encoding}" does not support chat format. Use cl100k_base or o200k_base for chat models.`
405654
+ );
405655
+ }
405656
+ const { imStart, imEnd, imSep } = chatTokens;
405657
+ const tokens = [];
405658
+ for (const message of messages) {
405659
+ validateMessage(message);
405660
+ tokens.push(imStart);
405661
+ let roleStr;
405662
+ if (message.role === "function" && message.name) {
405663
+ roleStr = message.name;
405664
+ } else if (message.name) {
405665
+ roleStr = `${message.role}:${message.name}`;
405666
+ } else {
405667
+ roleStr = message.role;
405668
+ }
405669
+ tokens.push(...encode(roleStr, { encoding, allowSpecial: "none" }));
405670
+ tokens.push(imSep);
405671
+ if (message.content) {
405672
+ tokens.push(
405673
+ ...encode(message.content, { encoding, allowSpecial: "none" })
405674
+ );
405675
+ }
405676
+ if (message.function_call) {
405677
+ const fcContent = formatFunctionCall(message.function_call);
405678
+ tokens.push(...encode(fcContent, { encoding, allowSpecial: "none" }));
405679
+ }
405680
+ tokens.push(imEnd);
405681
+ }
405682
+ if (primeAssistant) {
405683
+ tokens.push(imStart);
405684
+ tokens.push(...encode("assistant", { encoding, allowSpecial: "none" }));
405685
+ tokens.push(imSep);
405686
+ }
405687
+ return tokens;
405688
+ }
405689
+ function validateChatModel(model, encodingOverride) {
405690
+ if (model) {
405691
+ if (isAnthropicModel(model)) {
405692
+ throw new Error(
405693
+ `Model "${model}" is an Anthropic model. encodeChat only supports OpenAI models.`
405694
+ );
405695
+ }
405696
+ if (isGoogleModel(model)) {
405697
+ throw new Error(
405698
+ `Model "${model}" is a Google model. encodeChat only supports OpenAI models.`
405699
+ );
405700
+ }
405701
+ if (isKnownModel(model) && !isChatModel(model)) {
405702
+ throw new Error(
405703
+ `Model "${model}" is not a chat completion model. encodeChat only supports chat models (e.g., gpt-4o, gpt-3.5-turbo).`
405704
+ );
405705
+ }
405706
+ }
405707
+ if (encodingOverride) {
405708
+ return;
405709
+ }
405710
+ if (!model) {
405711
+ throw new Error(
405712
+ "Either model or encoding must be provided. Provide a known OpenAI chat model (e.g., gpt-4o) or an explicit encoding (e.g., o200k_base)."
405713
+ );
405714
+ }
405715
+ if (!isChatModel(model)) {
405716
+ throw new Error(
405717
+ `Model "${model}" is not recognized as an OpenAI chat model. If this is a new OpenAI model, provide the encoding option explicitly (e.g., encoding: "o200k_base").`
405718
+ );
405719
+ }
405720
+ }
405721
+ function validateMessage(message) {
405722
+ const msgAny = message;
405723
+ if ("tool_calls" in msgAny && msgAny.tool_calls !== void 0) {
405724
+ throw new Error(
405725
+ "tool_calls is not supported. Use function_call with the legacy functions API."
405726
+ );
405727
+ }
405728
+ if ("tool_call_id" in msgAny && msgAny.tool_call_id !== void 0) {
405729
+ throw new Error(
405730
+ "tool_call_id is not supported. Use the legacy functions API."
405731
+ );
405732
+ }
405733
+ if (message.content !== null && message.content !== void 0 && typeof message.content !== "string") {
405734
+ throw new Error(
405735
+ "Multimodal content (arrays) is not supported. Only text content is supported."
405736
+ );
405737
+ }
405738
+ }
405739
+ function getChatTokens(encoding) {
405740
+ if (encoding === "o200k_harmony") {
405741
+ return {
405742
+ imStart: HARMONY_TOKENS.start,
405743
+ imEnd: HARMONY_TOKENS.end,
405744
+ imSep: HARMONY_TOKENS.message
405745
+ };
405746
+ }
405747
+ return CHAT_TOKENS[encoding] ?? null;
405748
+ }
405749
+ function formatFunctionCall(fc) {
405750
+ const parts = [];
405751
+ if (fc.name) parts.push(fc.name);
405752
+ if (fc.arguments) parts.push(fc.arguments);
405753
+ return parts.join("\n");
405754
+ }
405755
+ function getRoleString(message) {
405756
+ if (message.role === "function" && message.name) {
405757
+ return message.name;
405758
+ } else if (message.name) {
405759
+ return `${message.role}:${message.name}`;
405760
+ }
405761
+ return message.role;
405762
+ }
405763
+ function* encodeChatGenerator(messages, options) {
405764
+ const { model, encoding: encodingOverride, primeAssistant = true } = options ?? {};
405765
+ validateChatModel(model, encodingOverride);
405766
+ const encoding = encodingOverride ?? (model ? getOpenAIEncoding({ model }) : "o200k_base");
405767
+ if (encoding === "o200k_harmony") {
405768
+ console.warn(
405769
+ "[ai-token-estimator] o200k_harmony support is experimental. Token structure may not match actual API behavior."
405770
+ );
405771
+ }
405772
+ const chatTokens = getChatTokens(encoding);
405773
+ if (!chatTokens) {
405774
+ throw new Error(
405775
+ `Encoding "${encoding}" does not support chat format. Use cl100k_base or o200k_base for chat models.`
405776
+ );
405777
+ }
405778
+ const { imStart, imEnd, imSep } = chatTokens;
405779
+ let totalTokens = 0;
405780
+ for (const message of messages) {
405781
+ validateMessage(message);
405782
+ yield [imStart];
405783
+ totalTokens += 1;
405784
+ const roleStr = getRoleString(message);
405785
+ const roleTokens = encode(roleStr, { encoding, allowSpecial: "none" });
405786
+ yield roleTokens;
405787
+ totalTokens += roleTokens.length;
405788
+ yield [imSep];
405789
+ totalTokens += 1;
405790
+ if (message.content) {
405791
+ const contentGen = encodeGenerator(message.content, {
405792
+ encoding,
405793
+ allowSpecial: "none"
405794
+ });
405795
+ let result = contentGen.next();
405796
+ while (!result.done) {
405797
+ yield result.value;
405798
+ totalTokens += result.value.length;
405799
+ result = contentGen.next();
405800
+ }
405801
+ }
405802
+ if (message.function_call) {
405803
+ const fcContent = formatFunctionCall(message.function_call);
405804
+ const fcTokens = encode(fcContent, { encoding, allowSpecial: "none" });
405805
+ yield fcTokens;
405806
+ totalTokens += fcTokens.length;
405807
+ }
405808
+ yield [imEnd];
405809
+ totalTokens += 1;
405810
+ }
405811
+ if (primeAssistant) {
405812
+ yield [imStart];
405813
+ totalTokens += 1;
405814
+ const assistantTokens = encode("assistant", {
405815
+ encoding,
405816
+ allowSpecial: "none"
405817
+ });
405818
+ yield assistantTokens;
405819
+ totalTokens += assistantTokens.length;
405820
+ yield [imSep];
405821
+ totalTokens += 1;
405822
+ }
405823
+ return totalTokens;
405824
+ }
405443
405825
  // Annotate the CommonJS export names for ESM import in node:
405444
405826
  0 && (module.exports = {
405445
405827
  DEFAULT_MODELS,
@@ -405453,9 +405835,14 @@ function isChatWithinTokenLimit(input) {
405453
405835
  countSentencePieceTokensAsync,
405454
405836
  countTokens,
405455
405837
  decode,
405838
+ decodeAsyncGenerator,
405839
+ decodeGenerator,
405456
405840
  decodeSentencePiece,
405457
405841
  decodeSentencePieceAsync,
405458
405842
  encode,
405843
+ encodeChat,
405844
+ encodeChatGenerator,
405845
+ encodeGenerator,
405459
405846
  encodeSentencePiece,
405460
405847
  encodeSentencePieceAsync,
405461
405848
  ensureSentencePieceModel,