ai-token-estimator 1.5.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +172 -0
- package/dist/index.cjs +388 -1
- package/dist/index.d.cts +153 -1
- package/dist/index.d.ts +153 -1
- package/dist/index.js +383 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -11,6 +11,9 @@ The best way to estimate **tokens + input cost** for LLM calls — with **exact
|
|
|
11
11
|
## Features
|
|
12
12
|
|
|
13
13
|
- **Exact OpenAI tokenization** (tiktoken-compatible BPE): `encode()` / `decode()` / `openai_exact`
|
|
14
|
+
- **Chat-aware tokenization**: `encodeChat()` returns exact token IDs for chat messages using ChatML format
|
|
15
|
+
- **Fast token limit checking**: `isWithinTokenLimit()` / `isChatWithinTokenLimit()` with early-exit optimization (up to 1000x faster for large texts)
|
|
16
|
+
- **Generator-based streaming**: `encodeGenerator()` / `encodeChatGenerator()` / `decodeGenerator()` / `decodeAsyncGenerator()` for memory-efficient tokenization
|
|
14
17
|
- **OpenAI chat completion token counting** (legacy `functions` API): `countChatCompletionTokens()` with optional per-message breakdown
|
|
15
18
|
- **Pure TypeScript SentencePiece tokenizer** (no native dependencies):
|
|
16
19
|
- Supports `.model` files (protobuf format)
|
|
@@ -64,6 +67,21 @@ console.log(countTokens({ text: 'Hello, world!', model: 'gpt-5.1' }));
|
|
|
64
67
|
|
|
65
68
|
## Quick Recipes
|
|
66
69
|
|
|
70
|
+
### Encode chat messages to tokens (ChatML format)
|
|
71
|
+
|
|
72
|
+
```ts
|
|
73
|
+
import { encodeChat, decode } from 'ai-token-estimator';
|
|
74
|
+
|
|
75
|
+
const tokens = encodeChat([
|
|
76
|
+
{ role: 'system', content: 'You are helpful.' },
|
|
77
|
+
{ role: 'user', content: 'Hello!' }
|
|
78
|
+
], { model: 'gpt-4o' });
|
|
79
|
+
|
|
80
|
+
console.log(tokens); // [200264, 9125, 200266, 2610, 525, 11190, 13, 200265, ...]
|
|
81
|
+
console.log(decode(tokens, { encoding: 'o200k_base' }));
|
|
82
|
+
// <|im_start|>system<|im_sep|>You are helpful.<|im_end|>...
|
|
83
|
+
```
|
|
84
|
+
|
|
67
85
|
### OpenAI chat completion tokens (legacy functions API)
|
|
68
86
|
|
|
69
87
|
```ts
|
|
@@ -75,6 +93,43 @@ const { totalTokens } = countChatCompletionTokens({
|
|
|
75
93
|
});
|
|
76
94
|
```
|
|
77
95
|
|
|
96
|
+
### Fast token limit checking (early exit)
|
|
97
|
+
|
|
98
|
+
```ts
|
|
99
|
+
import { isWithinTokenLimit, isChatWithinTokenLimit } from 'ai-token-estimator';
|
|
100
|
+
|
|
101
|
+
// Plain text - returns token count or false if exceeded
|
|
102
|
+
const count = isWithinTokenLimit(longText, 4096, { model: 'gpt-4o' });
|
|
103
|
+
if (count === false) console.log('Text exceeds limit');
|
|
104
|
+
|
|
105
|
+
// Chat messages - same early-exit optimization
|
|
106
|
+
const chatCount = isChatWithinTokenLimit({
|
|
107
|
+
messages: [{ role: 'user', content: longText }],
|
|
108
|
+
model: 'gpt-4o',
|
|
109
|
+
tokenLimit: 4096,
|
|
110
|
+
});
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### Generator-based streaming tokenization
|
|
114
|
+
|
|
115
|
+
```ts
|
|
116
|
+
import { encodeGenerator, decodeAsyncGenerator } from 'ai-token-estimator';
|
|
117
|
+
|
|
118
|
+
// Stream-encode large text (memory efficient)
|
|
119
|
+
let tokenCount = 0;
|
|
120
|
+
for (const tokenChunk of encodeGenerator(hugeText, { model: 'gpt-4o' })) {
|
|
121
|
+
tokenCount += tokenChunk.length;
|
|
122
|
+
// Process chunk...
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Decode streaming LLM response
|
|
126
|
+
async function decodeLLMStream(tokenStream: AsyncIterable<number>) {
|
|
127
|
+
for await (const text of decodeAsyncGenerator(tokenStream, { model: 'gpt-4o' })) {
|
|
128
|
+
process.stdout.write(text);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
```
|
|
132
|
+
|
|
78
133
|
### Local SentencePiece token counting
|
|
79
134
|
|
|
80
135
|
```ts
|
|
@@ -561,6 +616,48 @@ Encodes text into **OpenAI token IDs** using tiktoken-compatible BPE tokenizatio
|
|
|
561
616
|
|
|
562
617
|
Decodes OpenAI token IDs back into text using the selected encoding/model.
|
|
563
618
|
|
|
619
|
+
### `encodeChat(messages: ChatMessage[], options?: EncodeChatOptions): number[]`
|
|
620
|
+
|
|
621
|
+
Encodes chat messages into **exact token IDs** using ChatML format. Returns the ChatML message prompt tokens (messages + optional assistant priming), including special delimiter tokens (`<|im_start|>`, `<|im_sep|>`, `<|im_end|>`).
|
|
622
|
+
|
|
623
|
+
```ts
|
|
624
|
+
import { encodeChat, decode } from 'ai-token-estimator';
|
|
625
|
+
|
|
626
|
+
const tokens = encodeChat([
|
|
627
|
+
{ role: 'system', content: 'You are helpful.' },
|
|
628
|
+
{ role: 'user', content: 'Hello!' }
|
|
629
|
+
], { model: 'gpt-4o' });
|
|
630
|
+
|
|
631
|
+
// Tokens include ChatML structure:
|
|
632
|
+
// <|im_start|>system<|im_sep|>You are helpful.<|im_end|>
|
|
633
|
+
// <|im_start|>user<|im_sep|>Hello!<|im_end|>
|
|
634
|
+
// <|im_start|>assistant<|im_sep|> (priming)
|
|
635
|
+
```
|
|
636
|
+
|
|
637
|
+
**Parameters:**
|
|
638
|
+
|
|
639
|
+
```typescript
|
|
640
|
+
interface EncodeChatOptions {
|
|
641
|
+
model?: string; // OpenAI model (e.g., 'gpt-4o')
|
|
642
|
+
encoding?: OpenAIEncoding; // Explicit encoding override
|
|
643
|
+
primeAssistant?: boolean; // Append assistant priming (default: true)
|
|
644
|
+
}
|
|
645
|
+
```
|
|
646
|
+
|
|
647
|
+
**Supported encodings:**
|
|
648
|
+
- `cl100k_base` (GPT-4, GPT-3.5-turbo)
|
|
649
|
+
- `o200k_base` (GPT-4o, GPT-4o-mini)
|
|
650
|
+
- `o200k_harmony` (experimental)
|
|
651
|
+
|
|
652
|
+
**Limitations:**
|
|
653
|
+
- **OpenAI models only** — throws for claude-*, gemini-*
|
|
654
|
+
- **Legacy functions API only** — throws for tool_calls, tool_call_id
|
|
655
|
+
- **Text content only** — throws for multimodal content (arrays)
|
|
656
|
+
|
|
657
|
+
**Note on function_call:** Messages with `function_call` are encoded with the function name and arguments as content. The token count differs from `countChatCompletionTokens()` because the latter includes `FUNCTION_CALL_METADATA_TOKEN_OVERHEAD` (3 tokens) for API accounting. The exact difference depends on whether both name and arguments are present (2 token difference due to newline separator) or only one field is present (3 token difference).
|
|
658
|
+
|
|
659
|
+
**Note on o200k_harmony:** Support for `o200k_harmony` encoding is experimental. The token structure may not match actual API behavior.
|
|
660
|
+
|
|
564
661
|
### `isWithinTokenLimit(text, tokenLimit, options?): false | number`
|
|
565
662
|
|
|
566
663
|
Checks if text is within a token limit with **early exit optimization**. Returns `false` if the limit is exceeded, or the actual token count if within limit.
|
|
@@ -642,6 +739,81 @@ interface IsChatWithinTokenLimitInput {
|
|
|
642
739
|
- `Error` if tools, tool_choice, tool_calls, or tool_call_id are present
|
|
643
740
|
- `Error` if any message has non-string content
|
|
644
741
|
|
|
742
|
+
### Generator APIs
|
|
743
|
+
|
|
744
|
+
Generator-based APIs for memory-efficient streaming tokenization.
|
|
745
|
+
|
|
746
|
+
#### `encodeGenerator(text, options?): Generator<number[], number, undefined>`
|
|
747
|
+
|
|
748
|
+
Encode text yielding token chunks. Memory-efficient for large inputs.
|
|
749
|
+
|
|
750
|
+
- **Yields:** `number[]` — token IDs per regex-matched piece (word/punctuation)
|
|
751
|
+
- **Returns:** `number` — total token count when iteration completes
|
|
752
|
+
|
|
753
|
+
```typescript
|
|
754
|
+
import { encodeGenerator } from 'ai-token-estimator';
|
|
755
|
+
|
|
756
|
+
// Stream-encode large text
|
|
757
|
+
let tokenCount = 0;
|
|
758
|
+
for (const tokenChunk of encodeGenerator(hugeText, { model: 'gpt-4o' })) {
|
|
759
|
+
tokenCount += tokenChunk.length;
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
// Or get total count from return value
|
|
763
|
+
const gen = encodeGenerator(text, { model: 'gpt-4o' });
|
|
764
|
+
let result = gen.next();
|
|
765
|
+
while (!result.done) result = gen.next();
|
|
766
|
+
console.log('Total tokens:', result.value);
|
|
767
|
+
```
|
|
768
|
+
|
|
769
|
+
#### `encodeChatGenerator(messages, options?): Generator<number[], number, undefined>`
|
|
770
|
+
|
|
771
|
+
Encode chat messages yielding token chunks per message component.
|
|
772
|
+
|
|
773
|
+
- **Yields:** `number[]` — token IDs per component (special tokens, role, content chunks, etc.)
|
|
774
|
+
- **Returns:** `number` — total token count
|
|
775
|
+
|
|
776
|
+
```typescript
|
|
777
|
+
import { encodeChatGenerator } from 'ai-token-estimator';
|
|
778
|
+
|
|
779
|
+
const messages = [
|
|
780
|
+
{ role: 'system', content: 'You are helpful.' },
|
|
781
|
+
{ role: 'user', content: 'Hello!' }
|
|
782
|
+
];
|
|
783
|
+
|
|
784
|
+
for (const tokenChunk of encodeChatGenerator(messages, { model: 'gpt-4o' })) {
|
|
785
|
+
console.log('Chunk:', tokenChunk);
|
|
786
|
+
}
|
|
787
|
+
```
|
|
788
|
+
|
|
789
|
+
#### `decodeGenerator(tokens, options?): Generator<string, void, void>`
|
|
790
|
+
|
|
791
|
+
Decode tokens yielding text chunks. Uses TextDecoder streaming mode — may yield empty strings when buffering incomplete UTF-8 sequences.
|
|
792
|
+
|
|
793
|
+
```typescript
|
|
794
|
+
import { encode, decodeGenerator } from 'ai-token-estimator';
|
|
795
|
+
|
|
796
|
+
const tokens = encode('Hello, world!', { model: 'gpt-4o' });
|
|
797
|
+
for (const textChunk of decodeGenerator(tokens, { model: 'gpt-4o' })) {
|
|
798
|
+
process.stdout.write(textChunk);
|
|
799
|
+
}
|
|
800
|
+
```
|
|
801
|
+
|
|
802
|
+
#### `decodeAsyncGenerator(tokens, options?): AsyncGenerator<string, void, void>`
|
|
803
|
+
|
|
804
|
+
Decode async token stream yielding text chunks. Accepts `AsyncIterable<number | number[]>` for flexibility with streaming APIs.
|
|
805
|
+
|
|
806
|
+
```typescript
|
|
807
|
+
import { decodeAsyncGenerator } from 'ai-token-estimator';
|
|
808
|
+
|
|
809
|
+
// Decode streaming LLM response
|
|
810
|
+
async function decodeLLMStream(tokenStream: AsyncIterable<number>) {
|
|
811
|
+
for await (const text of decodeAsyncGenerator(tokenStream, { model: 'gpt-4o' })) {
|
|
812
|
+
process.stdout.write(text);
|
|
813
|
+
}
|
|
814
|
+
}
|
|
815
|
+
```
|
|
816
|
+
|
|
645
817
|
### `getModelConfig(model: string): ModelConfig`
|
|
646
818
|
|
|
647
819
|
Returns the configuration for a specific model. Throws if the model is not found.
|
package/dist/index.cjs
CHANGED
|
@@ -41,9 +41,14 @@ __export(index_exports, {
|
|
|
41
41
|
countSentencePieceTokensAsync: () => countSentencePieceTokensAsync,
|
|
42
42
|
countTokens: () => countTokens,
|
|
43
43
|
decode: () => decode,
|
|
44
|
+
decodeAsyncGenerator: () => decodeAsyncGenerator,
|
|
45
|
+
decodeGenerator: () => decodeGenerator,
|
|
44
46
|
decodeSentencePiece: () => decodeSentencePiece,
|
|
45
47
|
decodeSentencePieceAsync: () => decodeSentencePieceAsync,
|
|
46
48
|
encode: () => encode,
|
|
49
|
+
encodeChat: () => encodeChat,
|
|
50
|
+
encodeChatGenerator: () => encodeChatGenerator,
|
|
51
|
+
encodeGenerator: () => encodeGenerator,
|
|
47
52
|
encodeSentencePiece: () => encodeSentencePiece,
|
|
48
53
|
encodeSentencePieceAsync: () => encodeSentencePieceAsync,
|
|
49
54
|
ensureSentencePieceModel: () => ensureSentencePieceModel,
|
|
@@ -777,6 +782,7 @@ var BPETokenizer = class {
|
|
|
777
782
|
}
|
|
778
783
|
/**
|
|
779
784
|
* Add an entry to the cache, evicting LRU entries if necessary.
|
|
785
|
+
* Freezes the array to prevent mutation by consumers (especially generator yields).
|
|
780
786
|
*/
|
|
781
787
|
addToCache(key, value) {
|
|
782
788
|
if (this.cacheCapacity <= 0) return;
|
|
@@ -789,6 +795,7 @@ var BPETokenizer = class {
|
|
|
789
795
|
removed++;
|
|
790
796
|
}
|
|
791
797
|
}
|
|
798
|
+
Object.freeze(value);
|
|
792
799
|
this.tokenCache.set(key, value);
|
|
793
800
|
}
|
|
794
801
|
/**
|
|
@@ -829,6 +836,158 @@ var BPETokenizer = class {
|
|
|
829
836
|
clearCache() {
|
|
830
837
|
this.tokenCache.clear();
|
|
831
838
|
}
|
|
839
|
+
// ===========================================================================
|
|
840
|
+
// Generator Methods
|
|
841
|
+
// ===========================================================================
|
|
842
|
+
/**
|
|
843
|
+
* Generator version of encodeText. Yields token arrays per regex-matched piece.
|
|
844
|
+
* Returns total token count.
|
|
845
|
+
*
|
|
846
|
+
* @param text - The text to encode
|
|
847
|
+
* @param allowedSpecial - Controls special token handling (same as encodeText)
|
|
848
|
+
* @returns Generator that yields token arrays and returns total count
|
|
849
|
+
*/
|
|
850
|
+
*encodeTextGenerator(text, allowedSpecial) {
|
|
851
|
+
if (!text) return 0;
|
|
852
|
+
let totalTokens = 0;
|
|
853
|
+
if (allowedSpecial === "skip") {
|
|
854
|
+
const gen = this.encodeOrdinaryGenerator(text);
|
|
855
|
+
let result = gen.next();
|
|
856
|
+
while (!result.done) {
|
|
857
|
+
yield result.value;
|
|
858
|
+
totalTokens += result.value.length;
|
|
859
|
+
result = gen.next();
|
|
860
|
+
}
|
|
861
|
+
return totalTokens;
|
|
862
|
+
}
|
|
863
|
+
if (this.specialTokenMap.size > 0) {
|
|
864
|
+
const parts = this.splitOnSpecialTokens(text, allowedSpecial);
|
|
865
|
+
for (const part of parts) {
|
|
866
|
+
if (part.isSpecial) {
|
|
867
|
+
const tokenId = this.specialTokenMap.get(part.text);
|
|
868
|
+
yield [tokenId];
|
|
869
|
+
totalTokens += 1;
|
|
870
|
+
} else {
|
|
871
|
+
const gen = this.encodeOrdinaryGenerator(part.text);
|
|
872
|
+
let result = gen.next();
|
|
873
|
+
while (!result.done) {
|
|
874
|
+
yield result.value;
|
|
875
|
+
totalTokens += result.value.length;
|
|
876
|
+
result = gen.next();
|
|
877
|
+
}
|
|
878
|
+
}
|
|
879
|
+
}
|
|
880
|
+
} else {
|
|
881
|
+
const gen = this.encodeOrdinaryGenerator(text);
|
|
882
|
+
let result = gen.next();
|
|
883
|
+
while (!result.done) {
|
|
884
|
+
yield result.value;
|
|
885
|
+
totalTokens += result.value.length;
|
|
886
|
+
result = gen.next();
|
|
887
|
+
}
|
|
888
|
+
}
|
|
889
|
+
return totalTokens;
|
|
890
|
+
}
|
|
891
|
+
/**
|
|
892
|
+
* Generator version of encodeOrdinary. Yields token arrays per regex piece.
|
|
893
|
+
* Uses same cache logic as encodeOrdinary.
|
|
894
|
+
*/
|
|
895
|
+
*encodeOrdinaryGenerator(text) {
|
|
896
|
+
if (!text) return;
|
|
897
|
+
const regex = new RegExp(
|
|
898
|
+
this.tokenSplitRegex.source,
|
|
899
|
+
this.tokenSplitRegex.flags.includes("g") ? this.tokenSplitRegex.flags : this.tokenSplitRegex.flags + "g"
|
|
900
|
+
);
|
|
901
|
+
let match;
|
|
902
|
+
while ((match = regex.exec(text)) !== null) {
|
|
903
|
+
const piece = match[0];
|
|
904
|
+
if (piece.length === 0) {
|
|
905
|
+
regex.lastIndex++;
|
|
906
|
+
continue;
|
|
907
|
+
}
|
|
908
|
+
const cached = this.getFromCache(piece);
|
|
909
|
+
if (cached) {
|
|
910
|
+
yield cached;
|
|
911
|
+
continue;
|
|
912
|
+
}
|
|
913
|
+
const pieceBytes = this.textEncoder.encode(piece);
|
|
914
|
+
const key = bytesToLatin1(pieceBytes);
|
|
915
|
+
const directRank = this.encoder.get(key);
|
|
916
|
+
if (directRank !== void 0) {
|
|
917
|
+
const tokens = [directRank];
|
|
918
|
+
this.addToCache(piece, tokens);
|
|
919
|
+
yield tokens;
|
|
920
|
+
continue;
|
|
921
|
+
}
|
|
922
|
+
const pieceTokens = this.mergeBytePairs(pieceBytes);
|
|
923
|
+
this.addToCache(piece, pieceTokens);
|
|
924
|
+
yield pieceTokens;
|
|
925
|
+
}
|
|
926
|
+
}
|
|
927
|
+
/**
|
|
928
|
+
* Generator version of decodeTokens. Yields text chunks.
|
|
929
|
+
* Uses TextDecoder streaming mode to handle partial UTF-8 correctly.
|
|
930
|
+
* May yield empty strings when buffering incomplete sequences.
|
|
931
|
+
*
|
|
932
|
+
* Streaming semantics:
|
|
933
|
+
* - During iteration: decode(bytes, { stream: true }) - buffers incomplete UTF-8
|
|
934
|
+
* - At end/flush: decode() with no stream flag (defaults to false) - emits buffered bytes
|
|
935
|
+
*/
|
|
936
|
+
*decodeTokensGenerator(tokens) {
|
|
937
|
+
const streamingDecoder = new TextDecoder("utf-8", { fatal: false });
|
|
938
|
+
for (const token of tokens) {
|
|
939
|
+
const specialToken = this.specialTokenDecoder.get(token);
|
|
940
|
+
if (specialToken !== void 0) {
|
|
941
|
+
const flushed = streamingDecoder.decode(new Uint8Array(0));
|
|
942
|
+
if (flushed) yield flushed;
|
|
943
|
+
yield specialToken;
|
|
944
|
+
continue;
|
|
945
|
+
}
|
|
946
|
+
const tokenBytes = this.decoder.get(token);
|
|
947
|
+
if (!tokenBytes) {
|
|
948
|
+
throw new Error(
|
|
949
|
+
`Invalid token ID: ${token}. Token not found in vocabulary or special tokens.`
|
|
950
|
+
);
|
|
951
|
+
}
|
|
952
|
+
const decoded = streamingDecoder.decode(tokenBytes, { stream: true });
|
|
953
|
+
yield decoded;
|
|
954
|
+
}
|
|
955
|
+
const final = streamingDecoder.decode();
|
|
956
|
+
if (final) yield final;
|
|
957
|
+
}
|
|
958
|
+
/**
|
|
959
|
+
* Async generator version of decodeTokens.
|
|
960
|
+
* Accepts AsyncIterable<number | number[]> for flexibility.
|
|
961
|
+
*
|
|
962
|
+
* Streaming semantics:
|
|
963
|
+
* - During iteration: decode(bytes, { stream: true }) - buffers incomplete UTF-8
|
|
964
|
+
* - At end/flush: decode() with no stream flag (defaults to false) - emits buffered bytes
|
|
965
|
+
*/
|
|
966
|
+
async *decodeTokensAsyncGenerator(tokens) {
|
|
967
|
+
const streamingDecoder = new TextDecoder("utf-8", { fatal: false });
|
|
968
|
+
for await (const tokenOrChunk of tokens) {
|
|
969
|
+
const tokenArray = typeof tokenOrChunk === "number" ? [tokenOrChunk] : tokenOrChunk;
|
|
970
|
+
for (const token of tokenArray) {
|
|
971
|
+
const specialToken = this.specialTokenDecoder.get(token);
|
|
972
|
+
if (specialToken !== void 0) {
|
|
973
|
+
const flushed = streamingDecoder.decode(new Uint8Array(0));
|
|
974
|
+
if (flushed) yield flushed;
|
|
975
|
+
yield specialToken;
|
|
976
|
+
continue;
|
|
977
|
+
}
|
|
978
|
+
const tokenBytes = this.decoder.get(token);
|
|
979
|
+
if (!tokenBytes) {
|
|
980
|
+
throw new Error(
|
|
981
|
+
`Invalid token ID: ${token}. Token not found in vocabulary or special tokens.`
|
|
982
|
+
);
|
|
983
|
+
}
|
|
984
|
+
const decoded = streamingDecoder.decode(tokenBytes, { stream: true });
|
|
985
|
+
yield decoded;
|
|
986
|
+
}
|
|
987
|
+
}
|
|
988
|
+
const final = streamingDecoder.decode();
|
|
989
|
+
if (final) yield final;
|
|
990
|
+
}
|
|
832
991
|
};
|
|
833
992
|
|
|
834
993
|
// src/bpe/special-tokens.ts
|
|
@@ -849,10 +1008,18 @@ var CL100K_BASE_SPECIAL_TOKENS = [
|
|
|
849
1008
|
["<|fim_prefix|>", 100258],
|
|
850
1009
|
["<|fim_middle|>", 100259],
|
|
851
1010
|
["<|fim_suffix|>", 100260],
|
|
1011
|
+
// ChatML tokens for chat completion
|
|
1012
|
+
["<|im_start|>", 100264],
|
|
1013
|
+
["<|im_end|>", 100265],
|
|
1014
|
+
["<|im_sep|>", 100266],
|
|
852
1015
|
["<|endofprompt|>", 100276]
|
|
853
1016
|
];
|
|
854
1017
|
var O200K_BASE_SPECIAL_TOKENS = [
|
|
855
1018
|
["<|endoftext|>", 199999],
|
|
1019
|
+
// ChatML tokens for chat completion
|
|
1020
|
+
["<|im_start|>", 200264],
|
|
1021
|
+
["<|im_end|>", 200265],
|
|
1022
|
+
["<|im_sep|>", 200266],
|
|
856
1023
|
["<|endofprompt|>", 200018]
|
|
857
1024
|
];
|
|
858
1025
|
function buildO200kHarmonySpecialTokens() {
|
|
@@ -401972,7 +402139,11 @@ function getTokenizer(encoding) {
|
|
|
401972
402139
|
return {
|
|
401973
402140
|
encode: (text, allowedSpecial) => tokenizer.encodeText(text, allowedSpecial),
|
|
401974
402141
|
decode: (tokens) => tokenizer.decodeTokens(tokens),
|
|
401975
|
-
encodeTextWithLimit: (text, limit, allowedSpecial) => tokenizer.encodeTextWithLimit(text, limit, allowedSpecial)
|
|
402142
|
+
encodeTextWithLimit: (text, limit, allowedSpecial) => tokenizer.encodeTextWithLimit(text, limit, allowedSpecial),
|
|
402143
|
+
// Generator methods - delegate to BPETokenizer
|
|
402144
|
+
encodeGenerator: (text, allowedSpecial) => tokenizer.encodeTextGenerator(text, allowedSpecial),
|
|
402145
|
+
decodeGenerator: (tokens) => tokenizer.decodeTokensGenerator(tokens),
|
|
402146
|
+
decodeAsyncGenerator: (tokens) => tokenizer.decodeTokensAsyncGenerator(tokens)
|
|
401976
402147
|
};
|
|
401977
402148
|
}
|
|
401978
402149
|
function resolveEncoding(options) {
|
|
@@ -402045,6 +402216,22 @@ function isWithinTokenLimit(text, tokenLimit, options) {
|
|
|
402045
402216
|
const result = api.encodeTextWithLimit(text, tokenLimit, allowedSpecial);
|
|
402046
402217
|
return result.exceeded ? false : result.count;
|
|
402047
402218
|
}
|
|
402219
|
+
function encodeGenerator(text, options) {
|
|
402220
|
+
const encoding = resolveEncoding(options);
|
|
402221
|
+
const api = getTokenizer(encoding);
|
|
402222
|
+
const allowedSpecial = resolveAllowedSpecial(options?.allowSpecial);
|
|
402223
|
+
return api.encodeGenerator(text, allowedSpecial);
|
|
402224
|
+
}
|
|
402225
|
+
function* decodeGenerator(tokens, options) {
|
|
402226
|
+
const encoding = resolveEncoding(options);
|
|
402227
|
+
const api = getTokenizer(encoding);
|
|
402228
|
+
yield* api.decodeGenerator(tokens);
|
|
402229
|
+
}
|
|
402230
|
+
async function* decodeAsyncGenerator(tokens, options) {
|
|
402231
|
+
const encoding = resolveEncoding(options);
|
|
402232
|
+
const api = getTokenizer(encoding);
|
|
402233
|
+
yield* api.decodeAsyncGenerator(tokens);
|
|
402234
|
+
}
|
|
402048
402235
|
|
|
402049
402236
|
// src/token-counter.ts
|
|
402050
402237
|
function isNonOpenAIModel(model) {
|
|
@@ -405440,6 +405627,201 @@ function isChatWithinTokenLimit(input) {
|
|
|
405440
405627
|
}
|
|
405441
405628
|
return count;
|
|
405442
405629
|
}
|
|
405630
|
+
|
|
405631
|
+
// src/encode-chat.ts
|
|
405632
|
+
var CHAT_TOKENS = {
|
|
405633
|
+
cl100k_base: { imStart: 100264, imEnd: 100265, imSep: 100266 },
|
|
405634
|
+
o200k_base: { imStart: 200264, imEnd: 200265, imSep: 200266 }
|
|
405635
|
+
};
|
|
405636
|
+
var HARMONY_TOKENS = {
|
|
405637
|
+
start: 200006,
|
|
405638
|
+
end: 200007,
|
|
405639
|
+
message: 200008
|
|
405640
|
+
};
|
|
405641
|
+
function encodeChat(messages, options) {
|
|
405642
|
+
const { model, encoding: encodingOverride, primeAssistant = true } = options ?? {};
|
|
405643
|
+
validateChatModel(model, encodingOverride);
|
|
405644
|
+
const encoding = encodingOverride ?? (model ? getOpenAIEncoding({ model }) : "o200k_base");
|
|
405645
|
+
if (encoding === "o200k_harmony") {
|
|
405646
|
+
console.warn(
|
|
405647
|
+
"[ai-token-estimator] o200k_harmony support is experimental. Token structure may not match actual API behavior."
|
|
405648
|
+
);
|
|
405649
|
+
}
|
|
405650
|
+
const chatTokens = getChatTokens(encoding);
|
|
405651
|
+
if (!chatTokens) {
|
|
405652
|
+
throw new Error(
|
|
405653
|
+
`Encoding "${encoding}" does not support chat format. Use cl100k_base or o200k_base for chat models.`
|
|
405654
|
+
);
|
|
405655
|
+
}
|
|
405656
|
+
const { imStart, imEnd, imSep } = chatTokens;
|
|
405657
|
+
const tokens = [];
|
|
405658
|
+
for (const message of messages) {
|
|
405659
|
+
validateMessage(message);
|
|
405660
|
+
tokens.push(imStart);
|
|
405661
|
+
let roleStr;
|
|
405662
|
+
if (message.role === "function" && message.name) {
|
|
405663
|
+
roleStr = message.name;
|
|
405664
|
+
} else if (message.name) {
|
|
405665
|
+
roleStr = `${message.role}:${message.name}`;
|
|
405666
|
+
} else {
|
|
405667
|
+
roleStr = message.role;
|
|
405668
|
+
}
|
|
405669
|
+
tokens.push(...encode(roleStr, { encoding, allowSpecial: "none" }));
|
|
405670
|
+
tokens.push(imSep);
|
|
405671
|
+
if (message.content) {
|
|
405672
|
+
tokens.push(
|
|
405673
|
+
...encode(message.content, { encoding, allowSpecial: "none" })
|
|
405674
|
+
);
|
|
405675
|
+
}
|
|
405676
|
+
if (message.function_call) {
|
|
405677
|
+
const fcContent = formatFunctionCall(message.function_call);
|
|
405678
|
+
tokens.push(...encode(fcContent, { encoding, allowSpecial: "none" }));
|
|
405679
|
+
}
|
|
405680
|
+
tokens.push(imEnd);
|
|
405681
|
+
}
|
|
405682
|
+
if (primeAssistant) {
|
|
405683
|
+
tokens.push(imStart);
|
|
405684
|
+
tokens.push(...encode("assistant", { encoding, allowSpecial: "none" }));
|
|
405685
|
+
tokens.push(imSep);
|
|
405686
|
+
}
|
|
405687
|
+
return tokens;
|
|
405688
|
+
}
|
|
405689
|
+
function validateChatModel(model, encodingOverride) {
|
|
405690
|
+
if (model) {
|
|
405691
|
+
if (isAnthropicModel(model)) {
|
|
405692
|
+
throw new Error(
|
|
405693
|
+
`Model "${model}" is an Anthropic model. encodeChat only supports OpenAI models.`
|
|
405694
|
+
);
|
|
405695
|
+
}
|
|
405696
|
+
if (isGoogleModel(model)) {
|
|
405697
|
+
throw new Error(
|
|
405698
|
+
`Model "${model}" is a Google model. encodeChat only supports OpenAI models.`
|
|
405699
|
+
);
|
|
405700
|
+
}
|
|
405701
|
+
if (isKnownModel(model) && !isChatModel(model)) {
|
|
405702
|
+
throw new Error(
|
|
405703
|
+
`Model "${model}" is not a chat completion model. encodeChat only supports chat models (e.g., gpt-4o, gpt-3.5-turbo).`
|
|
405704
|
+
);
|
|
405705
|
+
}
|
|
405706
|
+
}
|
|
405707
|
+
if (encodingOverride) {
|
|
405708
|
+
return;
|
|
405709
|
+
}
|
|
405710
|
+
if (!model) {
|
|
405711
|
+
throw new Error(
|
|
405712
|
+
"Either model or encoding must be provided. Provide a known OpenAI chat model (e.g., gpt-4o) or an explicit encoding (e.g., o200k_base)."
|
|
405713
|
+
);
|
|
405714
|
+
}
|
|
405715
|
+
if (!isChatModel(model)) {
|
|
405716
|
+
throw new Error(
|
|
405717
|
+
`Model "${model}" is not recognized as an OpenAI chat model. If this is a new OpenAI model, provide the encoding option explicitly (e.g., encoding: "o200k_base").`
|
|
405718
|
+
);
|
|
405719
|
+
}
|
|
405720
|
+
}
|
|
405721
|
+
function validateMessage(message) {
|
|
405722
|
+
const msgAny = message;
|
|
405723
|
+
if ("tool_calls" in msgAny && msgAny.tool_calls !== void 0) {
|
|
405724
|
+
throw new Error(
|
|
405725
|
+
"tool_calls is not supported. Use function_call with the legacy functions API."
|
|
405726
|
+
);
|
|
405727
|
+
}
|
|
405728
|
+
if ("tool_call_id" in msgAny && msgAny.tool_call_id !== void 0) {
|
|
405729
|
+
throw new Error(
|
|
405730
|
+
"tool_call_id is not supported. Use the legacy functions API."
|
|
405731
|
+
);
|
|
405732
|
+
}
|
|
405733
|
+
if (message.content !== null && message.content !== void 0 && typeof message.content !== "string") {
|
|
405734
|
+
throw new Error(
|
|
405735
|
+
"Multimodal content (arrays) is not supported. Only text content is supported."
|
|
405736
|
+
);
|
|
405737
|
+
}
|
|
405738
|
+
}
|
|
405739
|
+
function getChatTokens(encoding) {
|
|
405740
|
+
if (encoding === "o200k_harmony") {
|
|
405741
|
+
return {
|
|
405742
|
+
imStart: HARMONY_TOKENS.start,
|
|
405743
|
+
imEnd: HARMONY_TOKENS.end,
|
|
405744
|
+
imSep: HARMONY_TOKENS.message
|
|
405745
|
+
};
|
|
405746
|
+
}
|
|
405747
|
+
return CHAT_TOKENS[encoding] ?? null;
|
|
405748
|
+
}
|
|
405749
|
+
function formatFunctionCall(fc) {
|
|
405750
|
+
const parts = [];
|
|
405751
|
+
if (fc.name) parts.push(fc.name);
|
|
405752
|
+
if (fc.arguments) parts.push(fc.arguments);
|
|
405753
|
+
return parts.join("\n");
|
|
405754
|
+
}
|
|
405755
|
+
function getRoleString(message) {
|
|
405756
|
+
if (message.role === "function" && message.name) {
|
|
405757
|
+
return message.name;
|
|
405758
|
+
} else if (message.name) {
|
|
405759
|
+
return `${message.role}:${message.name}`;
|
|
405760
|
+
}
|
|
405761
|
+
return message.role;
|
|
405762
|
+
}
|
|
405763
|
+
function* encodeChatGenerator(messages, options) {
|
|
405764
|
+
const { model, encoding: encodingOverride, primeAssistant = true } = options ?? {};
|
|
405765
|
+
validateChatModel(model, encodingOverride);
|
|
405766
|
+
const encoding = encodingOverride ?? (model ? getOpenAIEncoding({ model }) : "o200k_base");
|
|
405767
|
+
if (encoding === "o200k_harmony") {
|
|
405768
|
+
console.warn(
|
|
405769
|
+
"[ai-token-estimator] o200k_harmony support is experimental. Token structure may not match actual API behavior."
|
|
405770
|
+
);
|
|
405771
|
+
}
|
|
405772
|
+
const chatTokens = getChatTokens(encoding);
|
|
405773
|
+
if (!chatTokens) {
|
|
405774
|
+
throw new Error(
|
|
405775
|
+
`Encoding "${encoding}" does not support chat format. Use cl100k_base or o200k_base for chat models.`
|
|
405776
|
+
);
|
|
405777
|
+
}
|
|
405778
|
+
const { imStart, imEnd, imSep } = chatTokens;
|
|
405779
|
+
let totalTokens = 0;
|
|
405780
|
+
for (const message of messages) {
|
|
405781
|
+
validateMessage(message);
|
|
405782
|
+
yield [imStart];
|
|
405783
|
+
totalTokens += 1;
|
|
405784
|
+
const roleStr = getRoleString(message);
|
|
405785
|
+
const roleTokens = encode(roleStr, { encoding, allowSpecial: "none" });
|
|
405786
|
+
yield roleTokens;
|
|
405787
|
+
totalTokens += roleTokens.length;
|
|
405788
|
+
yield [imSep];
|
|
405789
|
+
totalTokens += 1;
|
|
405790
|
+
if (message.content) {
|
|
405791
|
+
const contentGen = encodeGenerator(message.content, {
|
|
405792
|
+
encoding,
|
|
405793
|
+
allowSpecial: "none"
|
|
405794
|
+
});
|
|
405795
|
+
let result = contentGen.next();
|
|
405796
|
+
while (!result.done) {
|
|
405797
|
+
yield result.value;
|
|
405798
|
+
totalTokens += result.value.length;
|
|
405799
|
+
result = contentGen.next();
|
|
405800
|
+
}
|
|
405801
|
+
}
|
|
405802
|
+
if (message.function_call) {
|
|
405803
|
+
const fcContent = formatFunctionCall(message.function_call);
|
|
405804
|
+
const fcTokens = encode(fcContent, { encoding, allowSpecial: "none" });
|
|
405805
|
+
yield fcTokens;
|
|
405806
|
+
totalTokens += fcTokens.length;
|
|
405807
|
+
}
|
|
405808
|
+
yield [imEnd];
|
|
405809
|
+
totalTokens += 1;
|
|
405810
|
+
}
|
|
405811
|
+
if (primeAssistant) {
|
|
405812
|
+
yield [imStart];
|
|
405813
|
+
totalTokens += 1;
|
|
405814
|
+
const assistantTokens = encode("assistant", {
|
|
405815
|
+
encoding,
|
|
405816
|
+
allowSpecial: "none"
|
|
405817
|
+
});
|
|
405818
|
+
yield assistantTokens;
|
|
405819
|
+
totalTokens += assistantTokens.length;
|
|
405820
|
+
yield [imSep];
|
|
405821
|
+
totalTokens += 1;
|
|
405822
|
+
}
|
|
405823
|
+
return totalTokens;
|
|
405824
|
+
}
|
|
405443
405825
|
// Annotate the CommonJS export names for ESM import in node:
|
|
405444
405826
|
0 && (module.exports = {
|
|
405445
405827
|
DEFAULT_MODELS,
|
|
@@ -405453,9 +405835,14 @@ function isChatWithinTokenLimit(input) {
|
|
|
405453
405835
|
countSentencePieceTokensAsync,
|
|
405454
405836
|
countTokens,
|
|
405455
405837
|
decode,
|
|
405838
|
+
decodeAsyncGenerator,
|
|
405839
|
+
decodeGenerator,
|
|
405456
405840
|
decodeSentencePiece,
|
|
405457
405841
|
decodeSentencePieceAsync,
|
|
405458
405842
|
encode,
|
|
405843
|
+
encodeChat,
|
|
405844
|
+
encodeChatGenerator,
|
|
405845
|
+
encodeGenerator,
|
|
405459
405846
|
encodeSentencePiece,
|
|
405460
405847
|
encodeSentencePieceAsync,
|
|
405461
405848
|
ensureSentencePieceModel,
|