ai-token-estimator 1.6.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +114 -0
- package/dist/index.cjs +253 -1
- package/dist/index.d.cts +105 -1
- package/dist/index.d.ts +105 -1
- package/dist/index.js +249 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -12,6 +12,8 @@ The best way to estimate **tokens + input cost** for LLM calls — with **exact
|
|
|
12
12
|
|
|
13
13
|
- **Exact OpenAI tokenization** (tiktoken-compatible BPE): `encode()` / `decode()` / `openai_exact`
|
|
14
14
|
- **Chat-aware tokenization**: `encodeChat()` returns exact token IDs for chat messages using ChatML format
|
|
15
|
+
- **Fast token limit checking**: `isWithinTokenLimit()` / `isChatWithinTokenLimit()` with early-exit optimization (up to 1000x faster for large texts)
|
|
16
|
+
- **Generator-based streaming**: `encodeGenerator()` / `encodeChatGenerator()` / `decodeGenerator()` / `decodeAsyncGenerator()` for memory-efficient tokenization
|
|
15
17
|
- **OpenAI chat completion token counting** (legacy `functions` API): `countChatCompletionTokens()` with optional per-message breakdown
|
|
16
18
|
- **Pure TypeScript SentencePiece tokenizer** (no native dependencies):
|
|
17
19
|
- Supports `.model` files (protobuf format)
|
|
@@ -91,6 +93,43 @@ const { totalTokens } = countChatCompletionTokens({
|
|
|
91
93
|
});
|
|
92
94
|
```
|
|
93
95
|
|
|
96
|
+
### Fast token limit checking (early exit)
|
|
97
|
+
|
|
98
|
+
```ts
|
|
99
|
+
import { isWithinTokenLimit, isChatWithinTokenLimit } from 'ai-token-estimator';
|
|
100
|
+
|
|
101
|
+
// Plain text - returns token count or false if exceeded
|
|
102
|
+
const count = isWithinTokenLimit(longText, 4096, { model: 'gpt-4o' });
|
|
103
|
+
if (count === false) console.log('Text exceeds limit');
|
|
104
|
+
|
|
105
|
+
// Chat messages - same early-exit optimization
|
|
106
|
+
const chatCount = isChatWithinTokenLimit({
|
|
107
|
+
messages: [{ role: 'user', content: longText }],
|
|
108
|
+
model: 'gpt-4o',
|
|
109
|
+
tokenLimit: 4096,
|
|
110
|
+
});
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### Generator-based streaming tokenization
|
|
114
|
+
|
|
115
|
+
```ts
|
|
116
|
+
import { encodeGenerator, decodeAsyncGenerator } from 'ai-token-estimator';
|
|
117
|
+
|
|
118
|
+
// Stream-encode large text (memory efficient)
|
|
119
|
+
let tokenCount = 0;
|
|
120
|
+
for (const tokenChunk of encodeGenerator(hugeText, { model: 'gpt-4o' })) {
|
|
121
|
+
tokenCount += tokenChunk.length;
|
|
122
|
+
// Process chunk...
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Decode streaming LLM response
|
|
126
|
+
async function decodeLLMStream(tokenStream: AsyncIterable<number>) {
|
|
127
|
+
for await (const text of decodeAsyncGenerator(tokenStream, { model: 'gpt-4o' })) {
|
|
128
|
+
process.stdout.write(text);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
```
|
|
132
|
+
|
|
94
133
|
### Local SentencePiece token counting
|
|
95
134
|
|
|
96
135
|
```ts
|
|
@@ -700,6 +739,81 @@ interface IsChatWithinTokenLimitInput {
|
|
|
700
739
|
- `Error` if tools, tool_choice, tool_calls, or tool_call_id are present
|
|
701
740
|
- `Error` if any message has non-string content
|
|
702
741
|
|
|
742
|
+
### Generator APIs
|
|
743
|
+
|
|
744
|
+
Generator-based APIs for memory-efficient streaming tokenization.
|
|
745
|
+
|
|
746
|
+
#### `encodeGenerator(text, options?): Generator<number[], number, undefined>`
|
|
747
|
+
|
|
748
|
+
Encode text yielding token chunks. Memory-efficient for large inputs.
|
|
749
|
+
|
|
750
|
+
- **Yields:** `number[]` — token IDs per regex-matched piece (word/punctuation)
|
|
751
|
+
- **Returns:** `number` — total token count when iteration completes
|
|
752
|
+
|
|
753
|
+
```typescript
|
|
754
|
+
import { encodeGenerator } from 'ai-token-estimator';
|
|
755
|
+
|
|
756
|
+
// Stream-encode large text
|
|
757
|
+
let tokenCount = 0;
|
|
758
|
+
for (const tokenChunk of encodeGenerator(hugeText, { model: 'gpt-4o' })) {
|
|
759
|
+
tokenCount += tokenChunk.length;
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
// Or get total count from return value
|
|
763
|
+
const gen = encodeGenerator(text, { model: 'gpt-4o' });
|
|
764
|
+
let result = gen.next();
|
|
765
|
+
while (!result.done) result = gen.next();
|
|
766
|
+
console.log('Total tokens:', result.value);
|
|
767
|
+
```
|
|
768
|
+
|
|
769
|
+
#### `encodeChatGenerator(messages, options?): Generator<number[], number, undefined>`
|
|
770
|
+
|
|
771
|
+
Encode chat messages yielding token chunks per message component.
|
|
772
|
+
|
|
773
|
+
- **Yields:** `number[]` — token IDs per component (special tokens, role, content chunks, etc.)
|
|
774
|
+
- **Returns:** `number` — total token count
|
|
775
|
+
|
|
776
|
+
```typescript
|
|
777
|
+
import { encodeChatGenerator } from 'ai-token-estimator';
|
|
778
|
+
|
|
779
|
+
const messages = [
|
|
780
|
+
{ role: 'system', content: 'You are helpful.' },
|
|
781
|
+
{ role: 'user', content: 'Hello!' }
|
|
782
|
+
];
|
|
783
|
+
|
|
784
|
+
for (const tokenChunk of encodeChatGenerator(messages, { model: 'gpt-4o' })) {
|
|
785
|
+
console.log('Chunk:', tokenChunk);
|
|
786
|
+
}
|
|
787
|
+
```
|
|
788
|
+
|
|
789
|
+
#### `decodeGenerator(tokens, options?): Generator<string, void, void>`
|
|
790
|
+
|
|
791
|
+
Decode tokens yielding text chunks. Uses TextDecoder streaming mode — may yield empty strings when buffering incomplete UTF-8 sequences.
|
|
792
|
+
|
|
793
|
+
```typescript
|
|
794
|
+
import { encode, decodeGenerator } from 'ai-token-estimator';
|
|
795
|
+
|
|
796
|
+
const tokens = encode('Hello, world!', { model: 'gpt-4o' });
|
|
797
|
+
for (const textChunk of decodeGenerator(tokens, { model: 'gpt-4o' })) {
|
|
798
|
+
process.stdout.write(textChunk);
|
|
799
|
+
}
|
|
800
|
+
```
|
|
801
|
+
|
|
802
|
+
#### `decodeAsyncGenerator(tokens, options?): AsyncGenerator<string, void, void>`
|
|
803
|
+
|
|
804
|
+
Decode async token stream yielding text chunks. Accepts `AsyncIterable<number | number[]>` for flexibility with streaming APIs.
|
|
805
|
+
|
|
806
|
+
```typescript
|
|
807
|
+
import { decodeAsyncGenerator } from 'ai-token-estimator';
|
|
808
|
+
|
|
809
|
+
// Decode streaming LLM response
|
|
810
|
+
async function decodeLLMStream(tokenStream: AsyncIterable<number>) {
|
|
811
|
+
for await (const text of decodeAsyncGenerator(tokenStream, { model: 'gpt-4o' })) {
|
|
812
|
+
process.stdout.write(text);
|
|
813
|
+
}
|
|
814
|
+
}
|
|
815
|
+
```
|
|
816
|
+
|
|
703
817
|
### `getModelConfig(model: string): ModelConfig`
|
|
704
818
|
|
|
705
819
|
Returns the configuration for a specific model. Throws if the model is not found.
|
package/dist/index.cjs
CHANGED
|
@@ -41,10 +41,14 @@ __export(index_exports, {
|
|
|
41
41
|
countSentencePieceTokensAsync: () => countSentencePieceTokensAsync,
|
|
42
42
|
countTokens: () => countTokens,
|
|
43
43
|
decode: () => decode,
|
|
44
|
+
decodeAsyncGenerator: () => decodeAsyncGenerator,
|
|
45
|
+
decodeGenerator: () => decodeGenerator,
|
|
44
46
|
decodeSentencePiece: () => decodeSentencePiece,
|
|
45
47
|
decodeSentencePieceAsync: () => decodeSentencePieceAsync,
|
|
46
48
|
encode: () => encode,
|
|
47
49
|
encodeChat: () => encodeChat,
|
|
50
|
+
encodeChatGenerator: () => encodeChatGenerator,
|
|
51
|
+
encodeGenerator: () => encodeGenerator,
|
|
48
52
|
encodeSentencePiece: () => encodeSentencePiece,
|
|
49
53
|
encodeSentencePieceAsync: () => encodeSentencePieceAsync,
|
|
50
54
|
ensureSentencePieceModel: () => ensureSentencePieceModel,
|
|
@@ -778,6 +782,7 @@ var BPETokenizer = class {
|
|
|
778
782
|
}
|
|
779
783
|
/**
|
|
780
784
|
* Add an entry to the cache, evicting LRU entries if necessary.
|
|
785
|
+
* Freezes the array to prevent mutation by consumers (especially generator yields).
|
|
781
786
|
*/
|
|
782
787
|
addToCache(key, value) {
|
|
783
788
|
if (this.cacheCapacity <= 0) return;
|
|
@@ -790,6 +795,7 @@ var BPETokenizer = class {
|
|
|
790
795
|
removed++;
|
|
791
796
|
}
|
|
792
797
|
}
|
|
798
|
+
Object.freeze(value);
|
|
793
799
|
this.tokenCache.set(key, value);
|
|
794
800
|
}
|
|
795
801
|
/**
|
|
@@ -830,6 +836,158 @@ var BPETokenizer = class {
|
|
|
830
836
|
clearCache() {
|
|
831
837
|
this.tokenCache.clear();
|
|
832
838
|
}
|
|
839
|
+
// ===========================================================================
|
|
840
|
+
// Generator Methods
|
|
841
|
+
// ===========================================================================
|
|
842
|
+
/**
|
|
843
|
+
* Generator version of encodeText. Yields token arrays per regex-matched piece.
|
|
844
|
+
* Returns total token count.
|
|
845
|
+
*
|
|
846
|
+
* @param text - The text to encode
|
|
847
|
+
* @param allowedSpecial - Controls special token handling (same as encodeText)
|
|
848
|
+
* @returns Generator that yields token arrays and returns total count
|
|
849
|
+
*/
|
|
850
|
+
*encodeTextGenerator(text, allowedSpecial) {
|
|
851
|
+
if (!text) return 0;
|
|
852
|
+
let totalTokens = 0;
|
|
853
|
+
if (allowedSpecial === "skip") {
|
|
854
|
+
const gen = this.encodeOrdinaryGenerator(text);
|
|
855
|
+
let result = gen.next();
|
|
856
|
+
while (!result.done) {
|
|
857
|
+
yield result.value;
|
|
858
|
+
totalTokens += result.value.length;
|
|
859
|
+
result = gen.next();
|
|
860
|
+
}
|
|
861
|
+
return totalTokens;
|
|
862
|
+
}
|
|
863
|
+
if (this.specialTokenMap.size > 0) {
|
|
864
|
+
const parts = this.splitOnSpecialTokens(text, allowedSpecial);
|
|
865
|
+
for (const part of parts) {
|
|
866
|
+
if (part.isSpecial) {
|
|
867
|
+
const tokenId = this.specialTokenMap.get(part.text);
|
|
868
|
+
yield [tokenId];
|
|
869
|
+
totalTokens += 1;
|
|
870
|
+
} else {
|
|
871
|
+
const gen = this.encodeOrdinaryGenerator(part.text);
|
|
872
|
+
let result = gen.next();
|
|
873
|
+
while (!result.done) {
|
|
874
|
+
yield result.value;
|
|
875
|
+
totalTokens += result.value.length;
|
|
876
|
+
result = gen.next();
|
|
877
|
+
}
|
|
878
|
+
}
|
|
879
|
+
}
|
|
880
|
+
} else {
|
|
881
|
+
const gen = this.encodeOrdinaryGenerator(text);
|
|
882
|
+
let result = gen.next();
|
|
883
|
+
while (!result.done) {
|
|
884
|
+
yield result.value;
|
|
885
|
+
totalTokens += result.value.length;
|
|
886
|
+
result = gen.next();
|
|
887
|
+
}
|
|
888
|
+
}
|
|
889
|
+
return totalTokens;
|
|
890
|
+
}
|
|
891
|
+
/**
|
|
892
|
+
* Generator version of encodeOrdinary. Yields token arrays per regex piece.
|
|
893
|
+
* Uses same cache logic as encodeOrdinary.
|
|
894
|
+
*/
|
|
895
|
+
*encodeOrdinaryGenerator(text) {
|
|
896
|
+
if (!text) return;
|
|
897
|
+
const regex = new RegExp(
|
|
898
|
+
this.tokenSplitRegex.source,
|
|
899
|
+
this.tokenSplitRegex.flags.includes("g") ? this.tokenSplitRegex.flags : this.tokenSplitRegex.flags + "g"
|
|
900
|
+
);
|
|
901
|
+
let match;
|
|
902
|
+
while ((match = regex.exec(text)) !== null) {
|
|
903
|
+
const piece = match[0];
|
|
904
|
+
if (piece.length === 0) {
|
|
905
|
+
regex.lastIndex++;
|
|
906
|
+
continue;
|
|
907
|
+
}
|
|
908
|
+
const cached = this.getFromCache(piece);
|
|
909
|
+
if (cached) {
|
|
910
|
+
yield cached;
|
|
911
|
+
continue;
|
|
912
|
+
}
|
|
913
|
+
const pieceBytes = this.textEncoder.encode(piece);
|
|
914
|
+
const key = bytesToLatin1(pieceBytes);
|
|
915
|
+
const directRank = this.encoder.get(key);
|
|
916
|
+
if (directRank !== void 0) {
|
|
917
|
+
const tokens = [directRank];
|
|
918
|
+
this.addToCache(piece, tokens);
|
|
919
|
+
yield tokens;
|
|
920
|
+
continue;
|
|
921
|
+
}
|
|
922
|
+
const pieceTokens = this.mergeBytePairs(pieceBytes);
|
|
923
|
+
this.addToCache(piece, pieceTokens);
|
|
924
|
+
yield pieceTokens;
|
|
925
|
+
}
|
|
926
|
+
}
|
|
927
|
+
/**
|
|
928
|
+
* Generator version of decodeTokens. Yields text chunks.
|
|
929
|
+
* Uses TextDecoder streaming mode to handle partial UTF-8 correctly.
|
|
930
|
+
* May yield empty strings when buffering incomplete sequences.
|
|
931
|
+
*
|
|
932
|
+
* Streaming semantics:
|
|
933
|
+
* - During iteration: decode(bytes, { stream: true }) - buffers incomplete UTF-8
|
|
934
|
+
* - At end/flush: decode() with no stream flag (defaults to false) - emits buffered bytes
|
|
935
|
+
*/
|
|
936
|
+
*decodeTokensGenerator(tokens) {
|
|
937
|
+
const streamingDecoder = new TextDecoder("utf-8", { fatal: false });
|
|
938
|
+
for (const token of tokens) {
|
|
939
|
+
const specialToken = this.specialTokenDecoder.get(token);
|
|
940
|
+
if (specialToken !== void 0) {
|
|
941
|
+
const flushed = streamingDecoder.decode(new Uint8Array(0));
|
|
942
|
+
if (flushed) yield flushed;
|
|
943
|
+
yield specialToken;
|
|
944
|
+
continue;
|
|
945
|
+
}
|
|
946
|
+
const tokenBytes = this.decoder.get(token);
|
|
947
|
+
if (!tokenBytes) {
|
|
948
|
+
throw new Error(
|
|
949
|
+
`Invalid token ID: ${token}. Token not found in vocabulary or special tokens.`
|
|
950
|
+
);
|
|
951
|
+
}
|
|
952
|
+
const decoded = streamingDecoder.decode(tokenBytes, { stream: true });
|
|
953
|
+
yield decoded;
|
|
954
|
+
}
|
|
955
|
+
const final = streamingDecoder.decode();
|
|
956
|
+
if (final) yield final;
|
|
957
|
+
}
|
|
958
|
+
/**
|
|
959
|
+
* Async generator version of decodeTokens.
|
|
960
|
+
* Accepts AsyncIterable<number | number[]> for flexibility.
|
|
961
|
+
*
|
|
962
|
+
* Streaming semantics:
|
|
963
|
+
* - During iteration: decode(bytes, { stream: true }) - buffers incomplete UTF-8
|
|
964
|
+
* - At end/flush: decode() with no stream flag (defaults to false) - emits buffered bytes
|
|
965
|
+
*/
|
|
966
|
+
async *decodeTokensAsyncGenerator(tokens) {
|
|
967
|
+
const streamingDecoder = new TextDecoder("utf-8", { fatal: false });
|
|
968
|
+
for await (const tokenOrChunk of tokens) {
|
|
969
|
+
const tokenArray = typeof tokenOrChunk === "number" ? [tokenOrChunk] : tokenOrChunk;
|
|
970
|
+
for (const token of tokenArray) {
|
|
971
|
+
const specialToken = this.specialTokenDecoder.get(token);
|
|
972
|
+
if (specialToken !== void 0) {
|
|
973
|
+
const flushed = streamingDecoder.decode(new Uint8Array(0));
|
|
974
|
+
if (flushed) yield flushed;
|
|
975
|
+
yield specialToken;
|
|
976
|
+
continue;
|
|
977
|
+
}
|
|
978
|
+
const tokenBytes = this.decoder.get(token);
|
|
979
|
+
if (!tokenBytes) {
|
|
980
|
+
throw new Error(
|
|
981
|
+
`Invalid token ID: ${token}. Token not found in vocabulary or special tokens.`
|
|
982
|
+
);
|
|
983
|
+
}
|
|
984
|
+
const decoded = streamingDecoder.decode(tokenBytes, { stream: true });
|
|
985
|
+
yield decoded;
|
|
986
|
+
}
|
|
987
|
+
}
|
|
988
|
+
const final = streamingDecoder.decode();
|
|
989
|
+
if (final) yield final;
|
|
990
|
+
}
|
|
833
991
|
};
|
|
834
992
|
|
|
835
993
|
// src/bpe/special-tokens.ts
|
|
@@ -401981,7 +402139,11 @@ function getTokenizer(encoding) {
|
|
|
401981
402139
|
return {
|
|
401982
402140
|
encode: (text, allowedSpecial) => tokenizer.encodeText(text, allowedSpecial),
|
|
401983
402141
|
decode: (tokens) => tokenizer.decodeTokens(tokens),
|
|
401984
|
-
encodeTextWithLimit: (text, limit, allowedSpecial) => tokenizer.encodeTextWithLimit(text, limit, allowedSpecial)
|
|
402142
|
+
encodeTextWithLimit: (text, limit, allowedSpecial) => tokenizer.encodeTextWithLimit(text, limit, allowedSpecial),
|
|
402143
|
+
// Generator methods - delegate to BPETokenizer
|
|
402144
|
+
encodeGenerator: (text, allowedSpecial) => tokenizer.encodeTextGenerator(text, allowedSpecial),
|
|
402145
|
+
decodeGenerator: (tokens) => tokenizer.decodeTokensGenerator(tokens),
|
|
402146
|
+
decodeAsyncGenerator: (tokens) => tokenizer.decodeTokensAsyncGenerator(tokens)
|
|
401985
402147
|
};
|
|
401986
402148
|
}
|
|
401987
402149
|
function resolveEncoding(options) {
|
|
@@ -402054,6 +402216,22 @@ function isWithinTokenLimit(text, tokenLimit, options) {
|
|
|
402054
402216
|
const result = api.encodeTextWithLimit(text, tokenLimit, allowedSpecial);
|
|
402055
402217
|
return result.exceeded ? false : result.count;
|
|
402056
402218
|
}
|
|
402219
|
+
function encodeGenerator(text, options) {
|
|
402220
|
+
const encoding = resolveEncoding(options);
|
|
402221
|
+
const api = getTokenizer(encoding);
|
|
402222
|
+
const allowedSpecial = resolveAllowedSpecial(options?.allowSpecial);
|
|
402223
|
+
return api.encodeGenerator(text, allowedSpecial);
|
|
402224
|
+
}
|
|
402225
|
+
function* decodeGenerator(tokens, options) {
|
|
402226
|
+
const encoding = resolveEncoding(options);
|
|
402227
|
+
const api = getTokenizer(encoding);
|
|
402228
|
+
yield* api.decodeGenerator(tokens);
|
|
402229
|
+
}
|
|
402230
|
+
async function* decodeAsyncGenerator(tokens, options) {
|
|
402231
|
+
const encoding = resolveEncoding(options);
|
|
402232
|
+
const api = getTokenizer(encoding);
|
|
402233
|
+
yield* api.decodeAsyncGenerator(tokens);
|
|
402234
|
+
}
|
|
402057
402235
|
|
|
402058
402236
|
// src/token-counter.ts
|
|
402059
402237
|
function isNonOpenAIModel(model) {
|
|
@@ -405574,6 +405752,76 @@ function formatFunctionCall(fc) {
|
|
|
405574
405752
|
if (fc.arguments) parts.push(fc.arguments);
|
|
405575
405753
|
return parts.join("\n");
|
|
405576
405754
|
}
|
|
405755
|
+
function getRoleString(message) {
|
|
405756
|
+
if (message.role === "function" && message.name) {
|
|
405757
|
+
return message.name;
|
|
405758
|
+
} else if (message.name) {
|
|
405759
|
+
return `${message.role}:${message.name}`;
|
|
405760
|
+
}
|
|
405761
|
+
return message.role;
|
|
405762
|
+
}
|
|
405763
|
+
function* encodeChatGenerator(messages, options) {
|
|
405764
|
+
const { model, encoding: encodingOverride, primeAssistant = true } = options ?? {};
|
|
405765
|
+
validateChatModel(model, encodingOverride);
|
|
405766
|
+
const encoding = encodingOverride ?? (model ? getOpenAIEncoding({ model }) : "o200k_base");
|
|
405767
|
+
if (encoding === "o200k_harmony") {
|
|
405768
|
+
console.warn(
|
|
405769
|
+
"[ai-token-estimator] o200k_harmony support is experimental. Token structure may not match actual API behavior."
|
|
405770
|
+
);
|
|
405771
|
+
}
|
|
405772
|
+
const chatTokens = getChatTokens(encoding);
|
|
405773
|
+
if (!chatTokens) {
|
|
405774
|
+
throw new Error(
|
|
405775
|
+
`Encoding "${encoding}" does not support chat format. Use cl100k_base or o200k_base for chat models.`
|
|
405776
|
+
);
|
|
405777
|
+
}
|
|
405778
|
+
const { imStart, imEnd, imSep } = chatTokens;
|
|
405779
|
+
let totalTokens = 0;
|
|
405780
|
+
for (const message of messages) {
|
|
405781
|
+
validateMessage(message);
|
|
405782
|
+
yield [imStart];
|
|
405783
|
+
totalTokens += 1;
|
|
405784
|
+
const roleStr = getRoleString(message);
|
|
405785
|
+
const roleTokens = encode(roleStr, { encoding, allowSpecial: "none" });
|
|
405786
|
+
yield roleTokens;
|
|
405787
|
+
totalTokens += roleTokens.length;
|
|
405788
|
+
yield [imSep];
|
|
405789
|
+
totalTokens += 1;
|
|
405790
|
+
if (message.content) {
|
|
405791
|
+
const contentGen = encodeGenerator(message.content, {
|
|
405792
|
+
encoding,
|
|
405793
|
+
allowSpecial: "none"
|
|
405794
|
+
});
|
|
405795
|
+
let result = contentGen.next();
|
|
405796
|
+
while (!result.done) {
|
|
405797
|
+
yield result.value;
|
|
405798
|
+
totalTokens += result.value.length;
|
|
405799
|
+
result = contentGen.next();
|
|
405800
|
+
}
|
|
405801
|
+
}
|
|
405802
|
+
if (message.function_call) {
|
|
405803
|
+
const fcContent = formatFunctionCall(message.function_call);
|
|
405804
|
+
const fcTokens = encode(fcContent, { encoding, allowSpecial: "none" });
|
|
405805
|
+
yield fcTokens;
|
|
405806
|
+
totalTokens += fcTokens.length;
|
|
405807
|
+
}
|
|
405808
|
+
yield [imEnd];
|
|
405809
|
+
totalTokens += 1;
|
|
405810
|
+
}
|
|
405811
|
+
if (primeAssistant) {
|
|
405812
|
+
yield [imStart];
|
|
405813
|
+
totalTokens += 1;
|
|
405814
|
+
const assistantTokens = encode("assistant", {
|
|
405815
|
+
encoding,
|
|
405816
|
+
allowSpecial: "none"
|
|
405817
|
+
});
|
|
405818
|
+
yield assistantTokens;
|
|
405819
|
+
totalTokens += assistantTokens.length;
|
|
405820
|
+
yield [imSep];
|
|
405821
|
+
totalTokens += 1;
|
|
405822
|
+
}
|
|
405823
|
+
return totalTokens;
|
|
405824
|
+
}
|
|
405577
405825
|
// Annotate the CommonJS export names for ESM import in node:
|
|
405578
405826
|
0 && (module.exports = {
|
|
405579
405827
|
DEFAULT_MODELS,
|
|
@@ -405587,10 +405835,14 @@ function formatFunctionCall(fc) {
|
|
|
405587
405835
|
countSentencePieceTokensAsync,
|
|
405588
405836
|
countTokens,
|
|
405589
405837
|
decode,
|
|
405838
|
+
decodeAsyncGenerator,
|
|
405839
|
+
decodeGenerator,
|
|
405590
405840
|
decodeSentencePiece,
|
|
405591
405841
|
decodeSentencePieceAsync,
|
|
405592
405842
|
encode,
|
|
405593
405843
|
encodeChat,
|
|
405844
|
+
encodeChatGenerator,
|
|
405845
|
+
encodeGenerator,
|
|
405594
405846
|
encodeSentencePiece,
|
|
405595
405847
|
encodeSentencePieceAsync,
|
|
405596
405848
|
ensureSentencePieceModel,
|
package/dist/index.d.cts
CHANGED
|
@@ -107,6 +107,72 @@ interface IsWithinTokenLimitOptions {
|
|
|
107
107
|
* ```
|
|
108
108
|
*/
|
|
109
109
|
declare function isWithinTokenLimit(text: string, tokenLimit: number, options?: IsWithinTokenLimitOptions): false | number;
|
|
110
|
+
/**
|
|
111
|
+
* Encode text yielding token chunks. Memory-efficient for large inputs.
|
|
112
|
+
*
|
|
113
|
+
* Yields token arrays per regex-matched piece (word/punctuation), not per token.
|
|
114
|
+
* Returns total token count when iteration completes.
|
|
115
|
+
*
|
|
116
|
+
* @param text - The text to encode
|
|
117
|
+
* @param options - Encoding options
|
|
118
|
+
* @returns Generator that yields token arrays per piece, returns total count
|
|
119
|
+
*
|
|
120
|
+
* @example
|
|
121
|
+
* ```typescript
|
|
122
|
+
* // Stream-encode large text
|
|
123
|
+
* let tokenCount = 0;
|
|
124
|
+
* for (const tokenChunk of encodeGenerator(hugeText, { model: 'gpt-4o' })) {
|
|
125
|
+
* tokenCount += tokenChunk.length;
|
|
126
|
+
* }
|
|
127
|
+
*
|
|
128
|
+
* // Or get total count from return value
|
|
129
|
+
* const gen = encodeGenerator(text, { model: 'gpt-4o' });
|
|
130
|
+
* let result = gen.next();
|
|
131
|
+
* while (!result.done) result = gen.next();
|
|
132
|
+
* console.log('Total tokens:', result.value);
|
|
133
|
+
* ```
|
|
134
|
+
*/
|
|
135
|
+
declare function encodeGenerator(text: string, options?: EncodeOptions): Generator<number[], number, undefined>;
|
|
136
|
+
/**
|
|
137
|
+
* Decode tokens yielding text chunks.
|
|
138
|
+
* Uses TextDecoder streaming mode - may yield empty strings when buffering
|
|
139
|
+
* incomplete UTF-8 sequences.
|
|
140
|
+
*
|
|
141
|
+
* @param tokens - Token IDs to decode
|
|
142
|
+
* @param options - Decoding options
|
|
143
|
+
* @returns Generator that yields text chunks
|
|
144
|
+
*
|
|
145
|
+
* @example
|
|
146
|
+
* ```typescript
|
|
147
|
+
* const tokens = encode('Hello, world!', { model: 'gpt-4o' });
|
|
148
|
+
* for (const textChunk of decodeGenerator(tokens, { model: 'gpt-4o' })) {
|
|
149
|
+
* process.stdout.write(textChunk);
|
|
150
|
+
* }
|
|
151
|
+
* ```
|
|
152
|
+
*/
|
|
153
|
+
declare function decodeGenerator(tokens: Iterable<number>, options?: Pick<EncodeOptions, 'encoding' | 'model'>): Generator<string, void, void>;
|
|
154
|
+
/**
|
|
155
|
+
* Decode async token stream yielding text chunks.
|
|
156
|
+
* Accepts single tokens or token arrays for flexibility with streaming APIs.
|
|
157
|
+
*
|
|
158
|
+
* Uses TextDecoder streaming mode - may yield empty strings when buffering
|
|
159
|
+
* incomplete UTF-8 sequences.
|
|
160
|
+
*
|
|
161
|
+
* @param tokens - Async iterable of token IDs (numbers or number arrays)
|
|
162
|
+
* @param options - Decoding options
|
|
163
|
+
* @returns AsyncGenerator that yields text chunks
|
|
164
|
+
*
|
|
165
|
+
* @example
|
|
166
|
+
* ```typescript
|
|
167
|
+
* // Decode streaming LLM response
|
|
168
|
+
* async function decodeLLMStream(tokenStream: AsyncIterable<number>) {
|
|
169
|
+
* for await (const text of decodeAsyncGenerator(tokenStream, { model: 'gpt-4o' })) {
|
|
170
|
+
* process.stdout.write(text);
|
|
171
|
+
* }
|
|
172
|
+
* }
|
|
173
|
+
* ```
|
|
174
|
+
*/
|
|
175
|
+
declare function decodeAsyncGenerator(tokens: AsyncIterable<number | number[]>, options?: Pick<EncodeOptions, 'encoding' | 'model'>): AsyncGenerator<string, void, void>;
|
|
110
176
|
|
|
111
177
|
/**
|
|
112
178
|
* Configuration for a specific LLM model.
|
|
@@ -688,6 +754,44 @@ interface EncodeChatOptions {
|
|
|
688
754
|
* ```
|
|
689
755
|
*/
|
|
690
756
|
declare function encodeChat(messages: ChatMessage[], options?: EncodeChatOptions): number[];
|
|
757
|
+
/**
|
|
758
|
+
* Generator version of encodeChat. Yields token arrays per message component.
|
|
759
|
+
* Returns total token count.
|
|
760
|
+
*
|
|
761
|
+
* Yields tokens in the following order per message:
|
|
762
|
+
* - [imStart] (1 token)
|
|
763
|
+
* - role tokens
|
|
764
|
+
* - [imSep] (1 token)
|
|
765
|
+
* - content tokens (if present, yielded in chunks)
|
|
766
|
+
* - function_call tokens (if present)
|
|
767
|
+
* - [imEnd] (1 token)
|
|
768
|
+
*
|
|
769
|
+
* If primeAssistant is true (default), also yields assistant priming tokens at the end.
|
|
770
|
+
*
|
|
771
|
+
* @param messages - Array or iterable of chat messages
|
|
772
|
+
* @param options - Encoding options
|
|
773
|
+
* @returns Generator that yields token arrays per component, returns total count
|
|
774
|
+
*
|
|
775
|
+
* @example
|
|
776
|
+
* ```typescript
|
|
777
|
+
* const messages = [
|
|
778
|
+
* { role: 'system', content: 'You are helpful.' },
|
|
779
|
+
* { role: 'user', content: 'Hello!' }
|
|
780
|
+
* ];
|
|
781
|
+
*
|
|
782
|
+
* // Stream-encode messages
|
|
783
|
+
* for (const tokenChunk of encodeChatGenerator(messages, { model: 'gpt-4o' })) {
|
|
784
|
+
* console.log('Chunk:', tokenChunk);
|
|
785
|
+
* }
|
|
786
|
+
*
|
|
787
|
+
* // Get total count from return value
|
|
788
|
+
* const gen = encodeChatGenerator(messages, { model: 'gpt-4o' });
|
|
789
|
+
* let result = gen.next();
|
|
790
|
+
* while (!result.done) result = gen.next();
|
|
791
|
+
* console.log('Total tokens:', result.value);
|
|
792
|
+
* ```
|
|
793
|
+
*/
|
|
794
|
+
declare function encodeChatGenerator(messages: ChatMessage[] | Iterable<ChatMessage>, options?: EncodeChatOptions): Generator<number[], number, undefined>;
|
|
691
795
|
|
|
692
796
|
interface AnthropicCountTokensParams {
|
|
693
797
|
/** Claude model id, e.g. `claude-sonnet-4-5` */
|
|
@@ -964,4 +1068,4 @@ declare function clearModelCache(): void;
|
|
|
964
1068
|
*/
|
|
965
1069
|
declare function parseModelProto(buffer: Uint8Array): ModelProto;
|
|
966
1070
|
|
|
967
|
-
export { type AnthropicCountTokensParams, type ChatCompletionTokenCountInput, type ChatCompletionTokenCountOutput, type ChatMessage, type CostEstimate, DEFAULT_MODELS, type DataOptions, type DownloadOptions, type EncodeChatOptions, type EncodeOptions, type EstimateAsyncInput, type EstimateCostFromTextAsyncOptions, type EstimateCostFromTextOptions, type EstimateCostInput, type EstimateInput, type EstimateOutput, type FileOptions, type FunctionCallOption, type FunctionDefinition, type FunctionParameterProperty, type FunctionParameters, type GeminiCountTokensParams, type GemmaSentencePieceCountTokensParams, type IsChatWithinTokenLimitInput, type IsWithinTokenLimitOptions, type KnownTokenizer, LAST_UPDATED, type ModelConfig, type ModelInfo, type ModelProto, type NormalizerSpec, type OpenAIEncoding, type SentencePiece, type SentencePieceTokenizer, type SpecialTokenHandling, type TokenCountInput, type TokenCountOutput, type TokenizerMode, type TokenizerModeAsync, type TrainerSpec, clearModelCache, countAnthropicInputTokens, countChatCompletionTokens, countGeminiTokens, countGemmaSentencePieceTokens, countSentencePieceTokens, countSentencePieceTokensAsync, countTokens, decode, decodeSentencePiece, decodeSentencePieceAsync, encode, encodeChat, encodeSentencePiece, encodeSentencePieceAsync, ensureSentencePieceModel, estimate, estimateAsync, estimateCost, estimateCostFromText, estimateCostFromTextAsync, getAvailableModels, getModelConfig, getOpenAIEncoding, getSentencePieceTokenizer, getTotalCost, isChatWithinTokenLimit, isWithinTokenLimit, loadSentencePieceTokenizer, parseModelProto };
|
|
1071
|
+
export { type AnthropicCountTokensParams, type ChatCompletionTokenCountInput, type ChatCompletionTokenCountOutput, type ChatMessage, type CostEstimate, DEFAULT_MODELS, type DataOptions, type DownloadOptions, type EncodeChatOptions, type EncodeOptions, type EstimateAsyncInput, type EstimateCostFromTextAsyncOptions, type EstimateCostFromTextOptions, type EstimateCostInput, type EstimateInput, type EstimateOutput, type FileOptions, type FunctionCallOption, type FunctionDefinition, type FunctionParameterProperty, type FunctionParameters, type GeminiCountTokensParams, type GemmaSentencePieceCountTokensParams, type IsChatWithinTokenLimitInput, type IsWithinTokenLimitOptions, type KnownTokenizer, LAST_UPDATED, type ModelConfig, type ModelInfo, type ModelProto, type NormalizerSpec, type OpenAIEncoding, type SentencePiece, type SentencePieceTokenizer, type SpecialTokenHandling, type TokenCountInput, type TokenCountOutput, type TokenizerMode, type TokenizerModeAsync, type TrainerSpec, clearModelCache, countAnthropicInputTokens, countChatCompletionTokens, countGeminiTokens, countGemmaSentencePieceTokens, countSentencePieceTokens, countSentencePieceTokensAsync, countTokens, decode, decodeAsyncGenerator, decodeGenerator, decodeSentencePiece, decodeSentencePieceAsync, encode, encodeChat, encodeChatGenerator, encodeGenerator, encodeSentencePiece, encodeSentencePieceAsync, ensureSentencePieceModel, estimate, estimateAsync, estimateCost, estimateCostFromText, estimateCostFromTextAsync, getAvailableModels, getModelConfig, getOpenAIEncoding, getSentencePieceTokenizer, getTotalCost, isChatWithinTokenLimit, isWithinTokenLimit, loadSentencePieceTokenizer, parseModelProto };
|
package/dist/index.d.ts
CHANGED
|
@@ -107,6 +107,72 @@ interface IsWithinTokenLimitOptions {
|
|
|
107
107
|
* ```
|
|
108
108
|
*/
|
|
109
109
|
declare function isWithinTokenLimit(text: string, tokenLimit: number, options?: IsWithinTokenLimitOptions): false | number;
|
|
110
|
+
/**
|
|
111
|
+
* Encode text yielding token chunks. Memory-efficient for large inputs.
|
|
112
|
+
*
|
|
113
|
+
* Yields token arrays per regex-matched piece (word/punctuation), not per token.
|
|
114
|
+
* Returns total token count when iteration completes.
|
|
115
|
+
*
|
|
116
|
+
* @param text - The text to encode
|
|
117
|
+
* @param options - Encoding options
|
|
118
|
+
* @returns Generator that yields token arrays per piece, returns total count
|
|
119
|
+
*
|
|
120
|
+
* @example
|
|
121
|
+
* ```typescript
|
|
122
|
+
* // Stream-encode large text
|
|
123
|
+
* let tokenCount = 0;
|
|
124
|
+
* for (const tokenChunk of encodeGenerator(hugeText, { model: 'gpt-4o' })) {
|
|
125
|
+
* tokenCount += tokenChunk.length;
|
|
126
|
+
* }
|
|
127
|
+
*
|
|
128
|
+
* // Or get total count from return value
|
|
129
|
+
* const gen = encodeGenerator(text, { model: 'gpt-4o' });
|
|
130
|
+
* let result = gen.next();
|
|
131
|
+
* while (!result.done) result = gen.next();
|
|
132
|
+
* console.log('Total tokens:', result.value);
|
|
133
|
+
* ```
|
|
134
|
+
*/
|
|
135
|
+
declare function encodeGenerator(text: string, options?: EncodeOptions): Generator<number[], number, undefined>;
|
|
136
|
+
/**
|
|
137
|
+
* Decode tokens yielding text chunks.
|
|
138
|
+
* Uses TextDecoder streaming mode - may yield empty strings when buffering
|
|
139
|
+
* incomplete UTF-8 sequences.
|
|
140
|
+
*
|
|
141
|
+
* @param tokens - Token IDs to decode
|
|
142
|
+
* @param options - Decoding options
|
|
143
|
+
* @returns Generator that yields text chunks
|
|
144
|
+
*
|
|
145
|
+
* @example
|
|
146
|
+
* ```typescript
|
|
147
|
+
* const tokens = encode('Hello, world!', { model: 'gpt-4o' });
|
|
148
|
+
* for (const textChunk of decodeGenerator(tokens, { model: 'gpt-4o' })) {
|
|
149
|
+
* process.stdout.write(textChunk);
|
|
150
|
+
* }
|
|
151
|
+
* ```
|
|
152
|
+
*/
|
|
153
|
+
declare function decodeGenerator(tokens: Iterable<number>, options?: Pick<EncodeOptions, 'encoding' | 'model'>): Generator<string, void, void>;
|
|
154
|
+
/**
|
|
155
|
+
* Decode async token stream yielding text chunks.
|
|
156
|
+
* Accepts single tokens or token arrays for flexibility with streaming APIs.
|
|
157
|
+
*
|
|
158
|
+
* Uses TextDecoder streaming mode - may yield empty strings when buffering
|
|
159
|
+
* incomplete UTF-8 sequences.
|
|
160
|
+
*
|
|
161
|
+
* @param tokens - Async iterable of token IDs (numbers or number arrays)
|
|
162
|
+
* @param options - Decoding options
|
|
163
|
+
* @returns AsyncGenerator that yields text chunks
|
|
164
|
+
*
|
|
165
|
+
* @example
|
|
166
|
+
* ```typescript
|
|
167
|
+
* // Decode streaming LLM response
|
|
168
|
+
* async function decodeLLMStream(tokenStream: AsyncIterable<number>) {
|
|
169
|
+
* for await (const text of decodeAsyncGenerator(tokenStream, { model: 'gpt-4o' })) {
|
|
170
|
+
* process.stdout.write(text);
|
|
171
|
+
* }
|
|
172
|
+
* }
|
|
173
|
+
* ```
|
|
174
|
+
*/
|
|
175
|
+
declare function decodeAsyncGenerator(tokens: AsyncIterable<number | number[]>, options?: Pick<EncodeOptions, 'encoding' | 'model'>): AsyncGenerator<string, void, void>;
|
|
110
176
|
|
|
111
177
|
/**
|
|
112
178
|
* Configuration for a specific LLM model.
|
|
@@ -688,6 +754,44 @@ interface EncodeChatOptions {
|
|
|
688
754
|
* ```
|
|
689
755
|
*/
|
|
690
756
|
declare function encodeChat(messages: ChatMessage[], options?: EncodeChatOptions): number[];
|
|
757
|
+
/**
|
|
758
|
+
* Generator version of encodeChat. Yields token arrays per message component.
|
|
759
|
+
* Returns total token count.
|
|
760
|
+
*
|
|
761
|
+
* Yields tokens in the following order per message:
|
|
762
|
+
* - [imStart] (1 token)
|
|
763
|
+
* - role tokens
|
|
764
|
+
* - [imSep] (1 token)
|
|
765
|
+
* - content tokens (if present, yielded in chunks)
|
|
766
|
+
* - function_call tokens (if present)
|
|
767
|
+
* - [imEnd] (1 token)
|
|
768
|
+
*
|
|
769
|
+
* If primeAssistant is true (default), also yields assistant priming tokens at the end.
|
|
770
|
+
*
|
|
771
|
+
* @param messages - Array or iterable of chat messages
|
|
772
|
+
* @param options - Encoding options
|
|
773
|
+
* @returns Generator that yields token arrays per component, returns total count
|
|
774
|
+
*
|
|
775
|
+
* @example
|
|
776
|
+
* ```typescript
|
|
777
|
+
* const messages = [
|
|
778
|
+
* { role: 'system', content: 'You are helpful.' },
|
|
779
|
+
* { role: 'user', content: 'Hello!' }
|
|
780
|
+
* ];
|
|
781
|
+
*
|
|
782
|
+
* // Stream-encode messages
|
|
783
|
+
* for (const tokenChunk of encodeChatGenerator(messages, { model: 'gpt-4o' })) {
|
|
784
|
+
* console.log('Chunk:', tokenChunk);
|
|
785
|
+
* }
|
|
786
|
+
*
|
|
787
|
+
* // Get total count from return value
|
|
788
|
+
* const gen = encodeChatGenerator(messages, { model: 'gpt-4o' });
|
|
789
|
+
* let result = gen.next();
|
|
790
|
+
* while (!result.done) result = gen.next();
|
|
791
|
+
* console.log('Total tokens:', result.value);
|
|
792
|
+
* ```
|
|
793
|
+
*/
|
|
794
|
+
declare function encodeChatGenerator(messages: ChatMessage[] | Iterable<ChatMessage>, options?: EncodeChatOptions): Generator<number[], number, undefined>;
|
|
691
795
|
|
|
692
796
|
interface AnthropicCountTokensParams {
|
|
693
797
|
/** Claude model id, e.g. `claude-sonnet-4-5` */
|
|
@@ -964,4 +1068,4 @@ declare function clearModelCache(): void;
|
|
|
964
1068
|
*/
|
|
965
1069
|
declare function parseModelProto(buffer: Uint8Array): ModelProto;
|
|
966
1070
|
|
|
967
|
-
export { type AnthropicCountTokensParams, type ChatCompletionTokenCountInput, type ChatCompletionTokenCountOutput, type ChatMessage, type CostEstimate, DEFAULT_MODELS, type DataOptions, type DownloadOptions, type EncodeChatOptions, type EncodeOptions, type EstimateAsyncInput, type EstimateCostFromTextAsyncOptions, type EstimateCostFromTextOptions, type EstimateCostInput, type EstimateInput, type EstimateOutput, type FileOptions, type FunctionCallOption, type FunctionDefinition, type FunctionParameterProperty, type FunctionParameters, type GeminiCountTokensParams, type GemmaSentencePieceCountTokensParams, type IsChatWithinTokenLimitInput, type IsWithinTokenLimitOptions, type KnownTokenizer, LAST_UPDATED, type ModelConfig, type ModelInfo, type ModelProto, type NormalizerSpec, type OpenAIEncoding, type SentencePiece, type SentencePieceTokenizer, type SpecialTokenHandling, type TokenCountInput, type TokenCountOutput, type TokenizerMode, type TokenizerModeAsync, type TrainerSpec, clearModelCache, countAnthropicInputTokens, countChatCompletionTokens, countGeminiTokens, countGemmaSentencePieceTokens, countSentencePieceTokens, countSentencePieceTokensAsync, countTokens, decode, decodeSentencePiece, decodeSentencePieceAsync, encode, encodeChat, encodeSentencePiece, encodeSentencePieceAsync, ensureSentencePieceModel, estimate, estimateAsync, estimateCost, estimateCostFromText, estimateCostFromTextAsync, getAvailableModels, getModelConfig, getOpenAIEncoding, getSentencePieceTokenizer, getTotalCost, isChatWithinTokenLimit, isWithinTokenLimit, loadSentencePieceTokenizer, parseModelProto };
|
|
1071
|
+
export { type AnthropicCountTokensParams, type ChatCompletionTokenCountInput, type ChatCompletionTokenCountOutput, type ChatMessage, type CostEstimate, DEFAULT_MODELS, type DataOptions, type DownloadOptions, type EncodeChatOptions, type EncodeOptions, type EstimateAsyncInput, type EstimateCostFromTextAsyncOptions, type EstimateCostFromTextOptions, type EstimateCostInput, type EstimateInput, type EstimateOutput, type FileOptions, type FunctionCallOption, type FunctionDefinition, type FunctionParameterProperty, type FunctionParameters, type GeminiCountTokensParams, type GemmaSentencePieceCountTokensParams, type IsChatWithinTokenLimitInput, type IsWithinTokenLimitOptions, type KnownTokenizer, LAST_UPDATED, type ModelConfig, type ModelInfo, type ModelProto, type NormalizerSpec, type OpenAIEncoding, type SentencePiece, type SentencePieceTokenizer, type SpecialTokenHandling, type TokenCountInput, type TokenCountOutput, type TokenizerMode, type TokenizerModeAsync, type TrainerSpec, clearModelCache, countAnthropicInputTokens, countChatCompletionTokens, countGeminiTokens, countGemmaSentencePieceTokens, countSentencePieceTokens, countSentencePieceTokensAsync, countTokens, decode, decodeAsyncGenerator, decodeGenerator, decodeSentencePiece, decodeSentencePieceAsync, encode, encodeChat, encodeChatGenerator, encodeGenerator, encodeSentencePiece, encodeSentencePieceAsync, ensureSentencePieceModel, estimate, estimateAsync, estimateCost, estimateCostFromText, estimateCostFromTextAsync, getAvailableModels, getModelConfig, getOpenAIEncoding, getSentencePieceTokenizer, getTotalCost, isChatWithinTokenLimit, isWithinTokenLimit, loadSentencePieceTokenizer, parseModelProto };
|
package/dist/index.js
CHANGED
|
@@ -711,6 +711,7 @@ var BPETokenizer = class {
|
|
|
711
711
|
}
|
|
712
712
|
/**
|
|
713
713
|
* Add an entry to the cache, evicting LRU entries if necessary.
|
|
714
|
+
* Freezes the array to prevent mutation by consumers (especially generator yields).
|
|
714
715
|
*/
|
|
715
716
|
addToCache(key, value) {
|
|
716
717
|
if (this.cacheCapacity <= 0) return;
|
|
@@ -723,6 +724,7 @@ var BPETokenizer = class {
|
|
|
723
724
|
removed++;
|
|
724
725
|
}
|
|
725
726
|
}
|
|
727
|
+
Object.freeze(value);
|
|
726
728
|
this.tokenCache.set(key, value);
|
|
727
729
|
}
|
|
728
730
|
/**
|
|
@@ -763,6 +765,158 @@ var BPETokenizer = class {
|
|
|
763
765
|
clearCache() {
|
|
764
766
|
this.tokenCache.clear();
|
|
765
767
|
}
|
|
768
|
+
// ===========================================================================
|
|
769
|
+
// Generator Methods
|
|
770
|
+
// ===========================================================================
|
|
771
|
+
/**
|
|
772
|
+
* Generator version of encodeText. Yields token arrays per regex-matched piece.
|
|
773
|
+
* Returns total token count.
|
|
774
|
+
*
|
|
775
|
+
* @param text - The text to encode
|
|
776
|
+
* @param allowedSpecial - Controls special token handling (same as encodeText)
|
|
777
|
+
* @returns Generator that yields token arrays and returns total count
|
|
778
|
+
*/
|
|
779
|
+
*encodeTextGenerator(text, allowedSpecial) {
|
|
780
|
+
if (!text) return 0;
|
|
781
|
+
let totalTokens = 0;
|
|
782
|
+
if (allowedSpecial === "skip") {
|
|
783
|
+
const gen = this.encodeOrdinaryGenerator(text);
|
|
784
|
+
let result = gen.next();
|
|
785
|
+
while (!result.done) {
|
|
786
|
+
yield result.value;
|
|
787
|
+
totalTokens += result.value.length;
|
|
788
|
+
result = gen.next();
|
|
789
|
+
}
|
|
790
|
+
return totalTokens;
|
|
791
|
+
}
|
|
792
|
+
if (this.specialTokenMap.size > 0) {
|
|
793
|
+
const parts = this.splitOnSpecialTokens(text, allowedSpecial);
|
|
794
|
+
for (const part of parts) {
|
|
795
|
+
if (part.isSpecial) {
|
|
796
|
+
const tokenId = this.specialTokenMap.get(part.text);
|
|
797
|
+
yield [tokenId];
|
|
798
|
+
totalTokens += 1;
|
|
799
|
+
} else {
|
|
800
|
+
const gen = this.encodeOrdinaryGenerator(part.text);
|
|
801
|
+
let result = gen.next();
|
|
802
|
+
while (!result.done) {
|
|
803
|
+
yield result.value;
|
|
804
|
+
totalTokens += result.value.length;
|
|
805
|
+
result = gen.next();
|
|
806
|
+
}
|
|
807
|
+
}
|
|
808
|
+
}
|
|
809
|
+
} else {
|
|
810
|
+
const gen = this.encodeOrdinaryGenerator(text);
|
|
811
|
+
let result = gen.next();
|
|
812
|
+
while (!result.done) {
|
|
813
|
+
yield result.value;
|
|
814
|
+
totalTokens += result.value.length;
|
|
815
|
+
result = gen.next();
|
|
816
|
+
}
|
|
817
|
+
}
|
|
818
|
+
return totalTokens;
|
|
819
|
+
}
|
|
820
|
+
/**
|
|
821
|
+
* Generator version of encodeOrdinary. Yields token arrays per regex piece.
|
|
822
|
+
* Uses same cache logic as encodeOrdinary.
|
|
823
|
+
*/
|
|
824
|
+
*encodeOrdinaryGenerator(text) {
|
|
825
|
+
if (!text) return;
|
|
826
|
+
const regex = new RegExp(
|
|
827
|
+
this.tokenSplitRegex.source,
|
|
828
|
+
this.tokenSplitRegex.flags.includes("g") ? this.tokenSplitRegex.flags : this.tokenSplitRegex.flags + "g"
|
|
829
|
+
);
|
|
830
|
+
let match;
|
|
831
|
+
while ((match = regex.exec(text)) !== null) {
|
|
832
|
+
const piece = match[0];
|
|
833
|
+
if (piece.length === 0) {
|
|
834
|
+
regex.lastIndex++;
|
|
835
|
+
continue;
|
|
836
|
+
}
|
|
837
|
+
const cached = this.getFromCache(piece);
|
|
838
|
+
if (cached) {
|
|
839
|
+
yield cached;
|
|
840
|
+
continue;
|
|
841
|
+
}
|
|
842
|
+
const pieceBytes = this.textEncoder.encode(piece);
|
|
843
|
+
const key = bytesToLatin1(pieceBytes);
|
|
844
|
+
const directRank = this.encoder.get(key);
|
|
845
|
+
if (directRank !== void 0) {
|
|
846
|
+
const tokens = [directRank];
|
|
847
|
+
this.addToCache(piece, tokens);
|
|
848
|
+
yield tokens;
|
|
849
|
+
continue;
|
|
850
|
+
}
|
|
851
|
+
const pieceTokens = this.mergeBytePairs(pieceBytes);
|
|
852
|
+
this.addToCache(piece, pieceTokens);
|
|
853
|
+
yield pieceTokens;
|
|
854
|
+
}
|
|
855
|
+
}
|
|
856
|
+
/**
|
|
857
|
+
* Generator version of decodeTokens. Yields text chunks.
|
|
858
|
+
* Uses TextDecoder streaming mode to handle partial UTF-8 correctly.
|
|
859
|
+
* May yield empty strings when buffering incomplete sequences.
|
|
860
|
+
*
|
|
861
|
+
* Streaming semantics:
|
|
862
|
+
* - During iteration: decode(bytes, { stream: true }) - buffers incomplete UTF-8
|
|
863
|
+
* - At end/flush: decode() with no stream flag (defaults to false) - emits buffered bytes
|
|
864
|
+
*/
|
|
865
|
+
*decodeTokensGenerator(tokens) {
|
|
866
|
+
const streamingDecoder = new TextDecoder("utf-8", { fatal: false });
|
|
867
|
+
for (const token of tokens) {
|
|
868
|
+
const specialToken = this.specialTokenDecoder.get(token);
|
|
869
|
+
if (specialToken !== void 0) {
|
|
870
|
+
const flushed = streamingDecoder.decode(new Uint8Array(0));
|
|
871
|
+
if (flushed) yield flushed;
|
|
872
|
+
yield specialToken;
|
|
873
|
+
continue;
|
|
874
|
+
}
|
|
875
|
+
const tokenBytes = this.decoder.get(token);
|
|
876
|
+
if (!tokenBytes) {
|
|
877
|
+
throw new Error(
|
|
878
|
+
`Invalid token ID: ${token}. Token not found in vocabulary or special tokens.`
|
|
879
|
+
);
|
|
880
|
+
}
|
|
881
|
+
const decoded = streamingDecoder.decode(tokenBytes, { stream: true });
|
|
882
|
+
yield decoded;
|
|
883
|
+
}
|
|
884
|
+
const final = streamingDecoder.decode();
|
|
885
|
+
if (final) yield final;
|
|
886
|
+
}
|
|
887
|
+
/**
|
|
888
|
+
* Async generator version of decodeTokens.
|
|
889
|
+
* Accepts AsyncIterable<number | number[]> for flexibility.
|
|
890
|
+
*
|
|
891
|
+
* Streaming semantics:
|
|
892
|
+
* - During iteration: decode(bytes, { stream: true }) - buffers incomplete UTF-8
|
|
893
|
+
* - At end/flush: decode() with no stream flag (defaults to false) - emits buffered bytes
|
|
894
|
+
*/
|
|
895
|
+
async *decodeTokensAsyncGenerator(tokens) {
|
|
896
|
+
const streamingDecoder = new TextDecoder("utf-8", { fatal: false });
|
|
897
|
+
for await (const tokenOrChunk of tokens) {
|
|
898
|
+
const tokenArray = typeof tokenOrChunk === "number" ? [tokenOrChunk] : tokenOrChunk;
|
|
899
|
+
for (const token of tokenArray) {
|
|
900
|
+
const specialToken = this.specialTokenDecoder.get(token);
|
|
901
|
+
if (specialToken !== void 0) {
|
|
902
|
+
const flushed = streamingDecoder.decode(new Uint8Array(0));
|
|
903
|
+
if (flushed) yield flushed;
|
|
904
|
+
yield specialToken;
|
|
905
|
+
continue;
|
|
906
|
+
}
|
|
907
|
+
const tokenBytes = this.decoder.get(token);
|
|
908
|
+
if (!tokenBytes) {
|
|
909
|
+
throw new Error(
|
|
910
|
+
`Invalid token ID: ${token}. Token not found in vocabulary or special tokens.`
|
|
911
|
+
);
|
|
912
|
+
}
|
|
913
|
+
const decoded = streamingDecoder.decode(tokenBytes, { stream: true });
|
|
914
|
+
yield decoded;
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
const final = streamingDecoder.decode();
|
|
918
|
+
if (final) yield final;
|
|
919
|
+
}
|
|
766
920
|
};
|
|
767
921
|
|
|
768
922
|
// src/bpe/special-tokens.ts
|
|
@@ -401914,7 +402068,11 @@ function getTokenizer(encoding) {
|
|
|
401914
402068
|
return {
|
|
401915
402069
|
encode: (text, allowedSpecial) => tokenizer.encodeText(text, allowedSpecial),
|
|
401916
402070
|
decode: (tokens) => tokenizer.decodeTokens(tokens),
|
|
401917
|
-
encodeTextWithLimit: (text, limit, allowedSpecial) => tokenizer.encodeTextWithLimit(text, limit, allowedSpecial)
|
|
402071
|
+
encodeTextWithLimit: (text, limit, allowedSpecial) => tokenizer.encodeTextWithLimit(text, limit, allowedSpecial),
|
|
402072
|
+
// Generator methods - delegate to BPETokenizer
|
|
402073
|
+
encodeGenerator: (text, allowedSpecial) => tokenizer.encodeTextGenerator(text, allowedSpecial),
|
|
402074
|
+
decodeGenerator: (tokens) => tokenizer.decodeTokensGenerator(tokens),
|
|
402075
|
+
decodeAsyncGenerator: (tokens) => tokenizer.decodeTokensAsyncGenerator(tokens)
|
|
401918
402076
|
};
|
|
401919
402077
|
}
|
|
401920
402078
|
function resolveEncoding(options) {
|
|
@@ -401987,6 +402145,22 @@ function isWithinTokenLimit(text, tokenLimit, options) {
|
|
|
401987
402145
|
const result = api.encodeTextWithLimit(text, tokenLimit, allowedSpecial);
|
|
401988
402146
|
return result.exceeded ? false : result.count;
|
|
401989
402147
|
}
|
|
402148
|
+
function encodeGenerator(text, options) {
|
|
402149
|
+
const encoding = resolveEncoding(options);
|
|
402150
|
+
const api = getTokenizer(encoding);
|
|
402151
|
+
const allowedSpecial = resolveAllowedSpecial(options?.allowSpecial);
|
|
402152
|
+
return api.encodeGenerator(text, allowedSpecial);
|
|
402153
|
+
}
|
|
402154
|
+
function* decodeGenerator(tokens, options) {
|
|
402155
|
+
const encoding = resolveEncoding(options);
|
|
402156
|
+
const api = getTokenizer(encoding);
|
|
402157
|
+
yield* api.decodeGenerator(tokens);
|
|
402158
|
+
}
|
|
402159
|
+
async function* decodeAsyncGenerator(tokens, options) {
|
|
402160
|
+
const encoding = resolveEncoding(options);
|
|
402161
|
+
const api = getTokenizer(encoding);
|
|
402162
|
+
yield* api.decodeAsyncGenerator(tokens);
|
|
402163
|
+
}
|
|
401990
402164
|
|
|
401991
402165
|
// src/token-counter.ts
|
|
401992
402166
|
function isNonOpenAIModel(model) {
|
|
@@ -405507,6 +405681,76 @@ function formatFunctionCall(fc) {
|
|
|
405507
405681
|
if (fc.arguments) parts.push(fc.arguments);
|
|
405508
405682
|
return parts.join("\n");
|
|
405509
405683
|
}
|
|
405684
|
+
function getRoleString(message) {
|
|
405685
|
+
if (message.role === "function" && message.name) {
|
|
405686
|
+
return message.name;
|
|
405687
|
+
} else if (message.name) {
|
|
405688
|
+
return `${message.role}:${message.name}`;
|
|
405689
|
+
}
|
|
405690
|
+
return message.role;
|
|
405691
|
+
}
|
|
405692
|
+
function* encodeChatGenerator(messages, options) {
|
|
405693
|
+
const { model, encoding: encodingOverride, primeAssistant = true } = options ?? {};
|
|
405694
|
+
validateChatModel(model, encodingOverride);
|
|
405695
|
+
const encoding = encodingOverride ?? (model ? getOpenAIEncoding({ model }) : "o200k_base");
|
|
405696
|
+
if (encoding === "o200k_harmony") {
|
|
405697
|
+
console.warn(
|
|
405698
|
+
"[ai-token-estimator] o200k_harmony support is experimental. Token structure may not match actual API behavior."
|
|
405699
|
+
);
|
|
405700
|
+
}
|
|
405701
|
+
const chatTokens = getChatTokens(encoding);
|
|
405702
|
+
if (!chatTokens) {
|
|
405703
|
+
throw new Error(
|
|
405704
|
+
`Encoding "${encoding}" does not support chat format. Use cl100k_base or o200k_base for chat models.`
|
|
405705
|
+
);
|
|
405706
|
+
}
|
|
405707
|
+
const { imStart, imEnd, imSep } = chatTokens;
|
|
405708
|
+
let totalTokens = 0;
|
|
405709
|
+
for (const message of messages) {
|
|
405710
|
+
validateMessage(message);
|
|
405711
|
+
yield [imStart];
|
|
405712
|
+
totalTokens += 1;
|
|
405713
|
+
const roleStr = getRoleString(message);
|
|
405714
|
+
const roleTokens = encode(roleStr, { encoding, allowSpecial: "none" });
|
|
405715
|
+
yield roleTokens;
|
|
405716
|
+
totalTokens += roleTokens.length;
|
|
405717
|
+
yield [imSep];
|
|
405718
|
+
totalTokens += 1;
|
|
405719
|
+
if (message.content) {
|
|
405720
|
+
const contentGen = encodeGenerator(message.content, {
|
|
405721
|
+
encoding,
|
|
405722
|
+
allowSpecial: "none"
|
|
405723
|
+
});
|
|
405724
|
+
let result = contentGen.next();
|
|
405725
|
+
while (!result.done) {
|
|
405726
|
+
yield result.value;
|
|
405727
|
+
totalTokens += result.value.length;
|
|
405728
|
+
result = contentGen.next();
|
|
405729
|
+
}
|
|
405730
|
+
}
|
|
405731
|
+
if (message.function_call) {
|
|
405732
|
+
const fcContent = formatFunctionCall(message.function_call);
|
|
405733
|
+
const fcTokens = encode(fcContent, { encoding, allowSpecial: "none" });
|
|
405734
|
+
yield fcTokens;
|
|
405735
|
+
totalTokens += fcTokens.length;
|
|
405736
|
+
}
|
|
405737
|
+
yield [imEnd];
|
|
405738
|
+
totalTokens += 1;
|
|
405739
|
+
}
|
|
405740
|
+
if (primeAssistant) {
|
|
405741
|
+
yield [imStart];
|
|
405742
|
+
totalTokens += 1;
|
|
405743
|
+
const assistantTokens = encode("assistant", {
|
|
405744
|
+
encoding,
|
|
405745
|
+
allowSpecial: "none"
|
|
405746
|
+
});
|
|
405747
|
+
yield assistantTokens;
|
|
405748
|
+
totalTokens += assistantTokens.length;
|
|
405749
|
+
yield [imSep];
|
|
405750
|
+
totalTokens += 1;
|
|
405751
|
+
}
|
|
405752
|
+
return totalTokens;
|
|
405753
|
+
}
|
|
405510
405754
|
export {
|
|
405511
405755
|
DEFAULT_MODELS,
|
|
405512
405756
|
LAST_UPDATED,
|
|
@@ -405519,10 +405763,14 @@ export {
|
|
|
405519
405763
|
countSentencePieceTokensAsync,
|
|
405520
405764
|
countTokens,
|
|
405521
405765
|
decode,
|
|
405766
|
+
decodeAsyncGenerator,
|
|
405767
|
+
decodeGenerator,
|
|
405522
405768
|
decodeSentencePiece,
|
|
405523
405769
|
decodeSentencePieceAsync,
|
|
405524
405770
|
encode,
|
|
405525
405771
|
encodeChat,
|
|
405772
|
+
encodeChatGenerator,
|
|
405773
|
+
encodeGenerator,
|
|
405526
405774
|
encodeSentencePiece,
|
|
405527
405775
|
encodeSentencePieceAsync,
|
|
405528
405776
|
ensureSentencePieceModel,
|