ai-token-estimator 1.4.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +139 -0
- package/dist/index.cjs +370 -1
- package/dist/index.d.cts +147 -1
- package/dist/index.d.ts +147 -1
- package/dist/index.js +367 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -11,6 +11,7 @@ The best way to estimate **tokens + input cost** for LLM calls — with **exact
|
|
|
11
11
|
## Features
|
|
12
12
|
|
|
13
13
|
- **Exact OpenAI tokenization** (tiktoken-compatible BPE): `encode()` / `decode()` / `openai_exact`
|
|
14
|
+
- **Chat-aware tokenization**: `encodeChat()` returns exact token IDs for chat messages using ChatML format
|
|
14
15
|
- **OpenAI chat completion token counting** (legacy `functions` API): `countChatCompletionTokens()` with optional per-message breakdown
|
|
15
16
|
- **Pure TypeScript SentencePiece tokenizer** (no native dependencies):
|
|
16
17
|
- Supports `.model` files (protobuf format)
|
|
@@ -64,6 +65,21 @@ console.log(countTokens({ text: 'Hello, world!', model: 'gpt-5.1' }));
|
|
|
64
65
|
|
|
65
66
|
## Quick Recipes
|
|
66
67
|
|
|
68
|
+
### Encode chat messages to tokens (ChatML format)
|
|
69
|
+
|
|
70
|
+
```ts
|
|
71
|
+
import { encodeChat, decode } from 'ai-token-estimator';
|
|
72
|
+
|
|
73
|
+
const tokens = encodeChat([
|
|
74
|
+
{ role: 'system', content: 'You are helpful.' },
|
|
75
|
+
{ role: 'user', content: 'Hello!' }
|
|
76
|
+
], { model: 'gpt-4o' });
|
|
77
|
+
|
|
78
|
+
console.log(tokens); // [200264, 9125, 200266, 2610, 525, 11190, 13, 200265, ...]
|
|
79
|
+
console.log(decode(tokens, { encoding: 'o200k_base' }));
|
|
80
|
+
// <|im_start|>system<|im_sep|>You are helpful.<|im_end|>...
|
|
81
|
+
```
|
|
82
|
+
|
|
67
83
|
### OpenAI chat completion tokens (legacy functions API)
|
|
68
84
|
|
|
69
85
|
```ts
|
|
@@ -561,6 +577,129 @@ Encodes text into **OpenAI token IDs** using tiktoken-compatible BPE tokenizatio
|
|
|
561
577
|
|
|
562
578
|
Decodes OpenAI token IDs back into text using the selected encoding/model.
|
|
563
579
|
|
|
580
|
+
### `encodeChat(messages: ChatMessage[], options?: EncodeChatOptions): number[]`
|
|
581
|
+
|
|
582
|
+
Encodes chat messages into **exact token IDs** using ChatML format. Returns the ChatML message prompt tokens (messages + optional assistant priming), including special delimiter tokens (`<|im_start|>`, `<|im_sep|>`, `<|im_end|>`).
|
|
583
|
+
|
|
584
|
+
```ts
|
|
585
|
+
import { encodeChat, decode } from 'ai-token-estimator';
|
|
586
|
+
|
|
587
|
+
const tokens = encodeChat([
|
|
588
|
+
{ role: 'system', content: 'You are helpful.' },
|
|
589
|
+
{ role: 'user', content: 'Hello!' }
|
|
590
|
+
], { model: 'gpt-4o' });
|
|
591
|
+
|
|
592
|
+
// Tokens include ChatML structure:
|
|
593
|
+
// <|im_start|>system<|im_sep|>You are helpful.<|im_end|>
|
|
594
|
+
// <|im_start|>user<|im_sep|>Hello!<|im_end|>
|
|
595
|
+
// <|im_start|>assistant<|im_sep|> (priming)
|
|
596
|
+
```
|
|
597
|
+
|
|
598
|
+
**Parameters:**
|
|
599
|
+
|
|
600
|
+
```typescript
|
|
601
|
+
interface EncodeChatOptions {
|
|
602
|
+
model?: string; // OpenAI model (e.g., 'gpt-4o')
|
|
603
|
+
encoding?: OpenAIEncoding; // Explicit encoding override
|
|
604
|
+
primeAssistant?: boolean; // Append assistant priming (default: true)
|
|
605
|
+
}
|
|
606
|
+
```
|
|
607
|
+
|
|
608
|
+
**Supported encodings:**
|
|
609
|
+
- `cl100k_base` (GPT-4, GPT-3.5-turbo)
|
|
610
|
+
- `o200k_base` (GPT-4o, GPT-4o-mini)
|
|
611
|
+
- `o200k_harmony` (experimental)
|
|
612
|
+
|
|
613
|
+
**Limitations:**
|
|
614
|
+
- **OpenAI models only** — throws for claude-*, gemini-*
|
|
615
|
+
- **Legacy functions API only** — throws for tool_calls, tool_call_id
|
|
616
|
+
- **Text content only** — throws for multimodal content (arrays)
|
|
617
|
+
|
|
618
|
+
**Note on function_call:** Messages with `function_call` are encoded with the function name and arguments as content. The token count differs from `countChatCompletionTokens()` because the latter includes `FUNCTION_CALL_METADATA_TOKEN_OVERHEAD` (3 tokens) for API accounting. The exact difference depends on whether both name and arguments are present (2 token difference due to newline separator) or only one field is present (3 token difference).
|
|
619
|
+
|
|
620
|
+
**Note on o200k_harmony:** Support for `o200k_harmony` encoding is experimental. The token structure may not match actual API behavior.
|
|
621
|
+
|
|
622
|
+
### `isWithinTokenLimit(text, tokenLimit, options?): false | number`
|
|
623
|
+
|
|
624
|
+
Checks if text is within a token limit with **early exit optimization**. Returns `false` if the limit is exceeded, or the actual token count if within limit.
|
|
625
|
+
|
|
626
|
+
This is significantly faster than full tokenization when the limit is exceeded early in the text (up to 1000x+ faster for large texts with small limits).
|
|
627
|
+
|
|
628
|
+
```typescript
|
|
629
|
+
import { isWithinTokenLimit } from 'ai-token-estimator';
|
|
630
|
+
|
|
631
|
+
// Returns token count if within limit
|
|
632
|
+
const count = isWithinTokenLimit('Hello, world!', 100, { model: 'gpt-4o' });
|
|
633
|
+
if (count !== false) {
|
|
634
|
+
console.log(`Text has ${count} tokens`);
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
// Returns false if exceeds limit (with early exit)
|
|
638
|
+
const result = isWithinTokenLimit(longText, 10, { model: 'gpt-4o' });
|
|
639
|
+
if (result === false) {
|
|
640
|
+
console.log('Text exceeds 10 tokens');
|
|
641
|
+
}
|
|
642
|
+
```
|
|
643
|
+
|
|
644
|
+
**Parameters:**
|
|
645
|
+
|
|
646
|
+
```typescript
|
|
647
|
+
interface IsWithinTokenLimitOptions {
|
|
648
|
+
model?: string; // OpenAI model (e.g., 'gpt-4o')
|
|
649
|
+
encoding?: OpenAIEncoding; // Explicit encoding override
|
|
650
|
+
allowSpecial?: SpecialTokenHandling; // How to handle special tokens
|
|
651
|
+
}
|
|
652
|
+
```
|
|
653
|
+
|
|
654
|
+
**Throws:**
|
|
655
|
+
- `Error` if `tokenLimit` is invalid (NaN, Infinity, negative, non-integer)
|
|
656
|
+
- `Error` if `model` is a known non-OpenAI model (claude-*, gemini-*)
|
|
657
|
+
|
|
658
|
+
### `isChatWithinTokenLimit(input): false | number`
|
|
659
|
+
|
|
660
|
+
Checks if chat messages are within a token limit with **early exit optimization**. Returns `false` if exceeded, or the actual token count if within limit.
|
|
661
|
+
|
|
662
|
+
Uses the same token counting logic as `countChatCompletionTokens()` but exits early when the limit is exceeded.
|
|
663
|
+
|
|
664
|
+
```typescript
|
|
665
|
+
import { isChatWithinTokenLimit } from 'ai-token-estimator';
|
|
666
|
+
|
|
667
|
+
const result = isChatWithinTokenLimit({
|
|
668
|
+
messages: [
|
|
669
|
+
{ role: 'system', content: 'You are a helpful assistant.' },
|
|
670
|
+
{ role: 'user', content: 'Hello!' }
|
|
671
|
+
],
|
|
672
|
+
model: 'gpt-4o',
|
|
673
|
+
tokenLimit: 100,
|
|
674
|
+
functions: [{ name: 'get_weather', parameters: { type: 'object' } }],
|
|
675
|
+
});
|
|
676
|
+
|
|
677
|
+
if (result === false) {
|
|
678
|
+
console.log('Messages exceed token limit');
|
|
679
|
+
} else {
|
|
680
|
+
console.log(`Messages use ${result} tokens`);
|
|
681
|
+
}
|
|
682
|
+
```
|
|
683
|
+
|
|
684
|
+
**Parameters:**
|
|
685
|
+
|
|
686
|
+
```typescript
|
|
687
|
+
interface IsChatWithinTokenLimitInput {
|
|
688
|
+
messages: ChatMessage[];
|
|
689
|
+
model: string;
|
|
690
|
+
tokenLimit: number;
|
|
691
|
+
encoding?: OpenAIEncoding;
|
|
692
|
+
functions?: FunctionDefinition[];
|
|
693
|
+
function_call?: FunctionCallOption;
|
|
694
|
+
}
|
|
695
|
+
```
|
|
696
|
+
|
|
697
|
+
**Throws:**
|
|
698
|
+
- `Error` if `tokenLimit` is invalid (NaN, Infinity, negative, non-integer)
|
|
699
|
+
- `Error` if model is not an OpenAI model (unless encoding override provided)
|
|
700
|
+
- `Error` if tools, tool_choice, tool_calls, or tool_call_id are present
|
|
701
|
+
- `Error` if any message has non-string content
|
|
702
|
+
|
|
564
703
|
### `getModelConfig(model: string): ModelConfig`
|
|
565
704
|
|
|
566
705
|
Returns the configuration for a specific model. Throws if the model is not found.
|
package/dist/index.cjs
CHANGED
|
@@ -44,6 +44,7 @@ __export(index_exports, {
|
|
|
44
44
|
decodeSentencePiece: () => decodeSentencePiece,
|
|
45
45
|
decodeSentencePieceAsync: () => decodeSentencePieceAsync,
|
|
46
46
|
encode: () => encode,
|
|
47
|
+
encodeChat: () => encodeChat,
|
|
47
48
|
encodeSentencePiece: () => encodeSentencePiece,
|
|
48
49
|
encodeSentencePieceAsync: () => encodeSentencePieceAsync,
|
|
49
50
|
ensureSentencePieceModel: () => ensureSentencePieceModel,
|
|
@@ -57,6 +58,8 @@ __export(index_exports, {
|
|
|
57
58
|
getOpenAIEncoding: () => getOpenAIEncoding,
|
|
58
59
|
getSentencePieceTokenizer: () => getSentencePieceTokenizer,
|
|
59
60
|
getTotalCost: () => getTotalCost,
|
|
61
|
+
isChatWithinTokenLimit: () => isChatWithinTokenLimit,
|
|
62
|
+
isWithinTokenLimit: () => isWithinTokenLimit,
|
|
60
63
|
loadSentencePieceTokenizer: () => loadSentencePieceTokenizer,
|
|
61
64
|
parseModelProto: () => parseModelProto
|
|
62
65
|
});
|
|
@@ -616,6 +619,83 @@ var BPETokenizer = class {
|
|
|
616
619
|
}
|
|
617
620
|
return tokens;
|
|
618
621
|
}
|
|
622
|
+
/**
|
|
623
|
+
* Encode text with a token limit, returning early if the limit is exceeded.
|
|
624
|
+
* This is optimized for fast token-limit validation without full tokenization.
|
|
625
|
+
*
|
|
626
|
+
* @param text - The text to encode
|
|
627
|
+
* @param limit - Maximum number of tokens allowed
|
|
628
|
+
* @param allowedSpecial - Controls special token handling (same as encodeText)
|
|
629
|
+
* @returns Object with count and exceeded flag
|
|
630
|
+
*/
|
|
631
|
+
encodeTextWithLimit(text, limit, allowedSpecial) {
|
|
632
|
+
if (!text) return { count: 0, exceeded: false };
|
|
633
|
+
if (limit < 0) return { count: 0, exceeded: true };
|
|
634
|
+
if (allowedSpecial === "skip") {
|
|
635
|
+
return this.encodeOrdinaryWithLimit(text, limit);
|
|
636
|
+
}
|
|
637
|
+
let count = 0;
|
|
638
|
+
if (this.specialTokenMap.size > 0) {
|
|
639
|
+
const parts = this.splitOnSpecialTokens(text, allowedSpecial);
|
|
640
|
+
for (const part of parts) {
|
|
641
|
+
if (part.isSpecial) {
|
|
642
|
+
count += 1;
|
|
643
|
+
if (count > limit) return { count, exceeded: true };
|
|
644
|
+
} else {
|
|
645
|
+
const result = this.encodeOrdinaryWithLimit(part.text, limit - count);
|
|
646
|
+
count += result.count;
|
|
647
|
+
if (result.exceeded) {
|
|
648
|
+
return { count, exceeded: true };
|
|
649
|
+
}
|
|
650
|
+
}
|
|
651
|
+
}
|
|
652
|
+
} else {
|
|
653
|
+
return this.encodeOrdinaryWithLimit(text, limit);
|
|
654
|
+
}
|
|
655
|
+
return { count, exceeded: false };
|
|
656
|
+
}
|
|
657
|
+
/**
|
|
658
|
+
* Incremental encoding with early exit.
|
|
659
|
+
* CRITICAL: Uses RegExp.exec() loop instead of text.match() to avoid
|
|
660
|
+
* allocating all pieces upfront. This enables true early exit.
|
|
661
|
+
*/
|
|
662
|
+
encodeOrdinaryWithLimit(text, limit) {
|
|
663
|
+
if (!text) return { count: 0, exceeded: false };
|
|
664
|
+
if (limit < 0) return { count: 0, exceeded: true };
|
|
665
|
+
let count = 0;
|
|
666
|
+
const regex = new RegExp(
|
|
667
|
+
this.tokenSplitRegex.source,
|
|
668
|
+
this.tokenSplitRegex.flags.includes("g") ? this.tokenSplitRegex.flags : this.tokenSplitRegex.flags + "g"
|
|
669
|
+
);
|
|
670
|
+
let match;
|
|
671
|
+
while ((match = regex.exec(text)) !== null) {
|
|
672
|
+
const piece = match[0];
|
|
673
|
+
if (piece.length === 0) {
|
|
674
|
+
regex.lastIndex++;
|
|
675
|
+
continue;
|
|
676
|
+
}
|
|
677
|
+
const cached = this.getFromCache(piece);
|
|
678
|
+
if (cached) {
|
|
679
|
+
count += cached.length;
|
|
680
|
+
if (count > limit) return { count, exceeded: true };
|
|
681
|
+
continue;
|
|
682
|
+
}
|
|
683
|
+
const pieceBytes = this.textEncoder.encode(piece);
|
|
684
|
+
const key = bytesToLatin1(pieceBytes);
|
|
685
|
+
const directRank = this.encoder.get(key);
|
|
686
|
+
if (directRank !== void 0) {
|
|
687
|
+
count += 1;
|
|
688
|
+
this.addToCache(piece, [directRank]);
|
|
689
|
+
if (count > limit) return { count, exceeded: true };
|
|
690
|
+
continue;
|
|
691
|
+
}
|
|
692
|
+
const pieceTokens = this.mergeBytePairs(pieceBytes);
|
|
693
|
+
count += pieceTokens.length;
|
|
694
|
+
this.addToCache(piece, pieceTokens);
|
|
695
|
+
if (count > limit) return { count, exceeded: true };
|
|
696
|
+
}
|
|
697
|
+
return { count, exceeded: false };
|
|
698
|
+
}
|
|
619
699
|
/**
|
|
620
700
|
* Core BPE merge algorithm.
|
|
621
701
|
*/
|
|
@@ -770,10 +850,18 @@ var CL100K_BASE_SPECIAL_TOKENS = [
|
|
|
770
850
|
["<|fim_prefix|>", 100258],
|
|
771
851
|
["<|fim_middle|>", 100259],
|
|
772
852
|
["<|fim_suffix|>", 100260],
|
|
853
|
+
// ChatML tokens for chat completion
|
|
854
|
+
["<|im_start|>", 100264],
|
|
855
|
+
["<|im_end|>", 100265],
|
|
856
|
+
["<|im_sep|>", 100266],
|
|
773
857
|
["<|endofprompt|>", 100276]
|
|
774
858
|
];
|
|
775
859
|
var O200K_BASE_SPECIAL_TOKENS = [
|
|
776
860
|
["<|endoftext|>", 199999],
|
|
861
|
+
// ChatML tokens for chat completion
|
|
862
|
+
["<|im_start|>", 200264],
|
|
863
|
+
["<|im_end|>", 200265],
|
|
864
|
+
["<|im_sep|>", 200266],
|
|
777
865
|
["<|endofprompt|>", 200018]
|
|
778
866
|
];
|
|
779
867
|
function buildO200kHarmonySpecialTokens() {
|
|
@@ -401892,7 +401980,8 @@ function getTokenizer(encoding) {
|
|
|
401892
401980
|
}
|
|
401893
401981
|
return {
|
|
401894
401982
|
encode: (text, allowedSpecial) => tokenizer.encodeText(text, allowedSpecial),
|
|
401895
|
-
decode: (tokens) => tokenizer.decodeTokens(tokens)
|
|
401983
|
+
decode: (tokens) => tokenizer.decodeTokens(tokens),
|
|
401984
|
+
encodeTextWithLimit: (text, limit, allowedSpecial) => tokenizer.encodeTextWithLimit(text, limit, allowedSpecial)
|
|
401896
401985
|
};
|
|
401897
401986
|
}
|
|
401898
401987
|
function resolveEncoding(options) {
|
|
@@ -401932,6 +402021,39 @@ function decode(tokens, options) {
|
|
|
401932
402021
|
const api = getTokenizer(encoding);
|
|
401933
402022
|
return api.decode(tokens);
|
|
401934
402023
|
}
|
|
402024
|
+
function validateTokenLimit(tokenLimit) {
|
|
402025
|
+
if (!Number.isFinite(tokenLimit)) {
|
|
402026
|
+
throw new Error("tokenLimit must be a finite number");
|
|
402027
|
+
}
|
|
402028
|
+
if (!Number.isInteger(tokenLimit)) {
|
|
402029
|
+
throw new Error("tokenLimit must be an integer");
|
|
402030
|
+
}
|
|
402031
|
+
if (tokenLimit < 0) {
|
|
402032
|
+
throw new Error("tokenLimit must be non-negative");
|
|
402033
|
+
}
|
|
402034
|
+
}
|
|
402035
|
+
function rejectNonOpenAIModel(model) {
|
|
402036
|
+
if (!model) return;
|
|
402037
|
+
if (model.startsWith("claude-")) {
|
|
402038
|
+
throw new Error(
|
|
402039
|
+
`Model "${model}" is an Anthropic model. isWithinTokenLimit only supports OpenAI models. Use the Anthropic API's count_tokens endpoint via estimateAsync() instead.`
|
|
402040
|
+
);
|
|
402041
|
+
}
|
|
402042
|
+
if (model.startsWith("gemini-")) {
|
|
402043
|
+
throw new Error(
|
|
402044
|
+
`Model "${model}" is a Google model. isWithinTokenLimit only supports OpenAI models. Use the Gemini API's countTokens endpoint via estimateAsync() instead.`
|
|
402045
|
+
);
|
|
402046
|
+
}
|
|
402047
|
+
}
|
|
402048
|
+
function isWithinTokenLimit(text, tokenLimit, options) {
|
|
402049
|
+
validateTokenLimit(tokenLimit);
|
|
402050
|
+
rejectNonOpenAIModel(options?.model);
|
|
402051
|
+
const encoding = resolveEncoding(options);
|
|
402052
|
+
const api = getTokenizer(encoding);
|
|
402053
|
+
const allowedSpecial = resolveAllowedSpecial(options?.allowSpecial);
|
|
402054
|
+
const result = api.encodeTextWithLimit(text, tokenLimit, allowedSpecial);
|
|
402055
|
+
return result.exceeded ? false : result.count;
|
|
402056
|
+
}
|
|
401935
402057
|
|
|
401936
402058
|
// src/token-counter.ts
|
|
401937
402059
|
function isNonOpenAIModel(model) {
|
|
@@ -405208,6 +405330,250 @@ function countChatCompletionTokens(input) {
|
|
|
405208
405330
|
}
|
|
405209
405331
|
return result;
|
|
405210
405332
|
}
|
|
405333
|
+
function validateTokenLimit2(tokenLimit) {
|
|
405334
|
+
if (!Number.isFinite(tokenLimit)) {
|
|
405335
|
+
throw new Error("tokenLimit must be a finite number");
|
|
405336
|
+
}
|
|
405337
|
+
if (!Number.isInteger(tokenLimit)) {
|
|
405338
|
+
throw new Error("tokenLimit must be an integer");
|
|
405339
|
+
}
|
|
405340
|
+
if (tokenLimit < 0) {
|
|
405341
|
+
throw new Error("tokenLimit must be non-negative");
|
|
405342
|
+
}
|
|
405343
|
+
}
|
|
405344
|
+
function isChatWithinTokenLimit(input) {
|
|
405345
|
+
const { messages, model, tokenLimit, encoding, functions, function_call } = input;
|
|
405346
|
+
validateTokenLimit2(tokenLimit);
|
|
405347
|
+
validateNoToolsApi(input);
|
|
405348
|
+
validateMessages(messages);
|
|
405349
|
+
validateOpenAIModel(model, encoding);
|
|
405350
|
+
const resolvedEncoding = encoding ?? getOpenAIEncoding({ model });
|
|
405351
|
+
const api = getTokenizer(resolvedEncoding);
|
|
405352
|
+
let count = COMPLETION_REQUEST_TOKEN_OVERHEAD;
|
|
405353
|
+
if (count > tokenLimit) return false;
|
|
405354
|
+
const hasFunctions = Boolean(functions?.length);
|
|
405355
|
+
const hasSystemMessage = messages.some((m) => m.role === "system");
|
|
405356
|
+
if (hasFunctions && functions) {
|
|
405357
|
+
const formatted = formatFunctionDefinitions(functions);
|
|
405358
|
+
const funcResult = api.encodeTextWithLimit(
|
|
405359
|
+
formatted,
|
|
405360
|
+
tokenLimit - count,
|
|
405361
|
+
"skip"
|
|
405362
|
+
);
|
|
405363
|
+
if (funcResult.exceeded) return false;
|
|
405364
|
+
let funcOverhead = funcResult.count + FUNCTION_DEFINITION_TOKEN_OVERHEAD;
|
|
405365
|
+
if (hasSystemMessage) {
|
|
405366
|
+
funcOverhead -= SYSTEM_FUNCTION_TOKEN_DEDUCTION;
|
|
405367
|
+
}
|
|
405368
|
+
count += funcOverhead;
|
|
405369
|
+
if (count > tokenLimit) return false;
|
|
405370
|
+
}
|
|
405371
|
+
if (function_call && function_call !== "auto") {
|
|
405372
|
+
if (function_call === "none") {
|
|
405373
|
+
count += FUNCTION_CALL_NONE_TOKEN_OVERHEAD;
|
|
405374
|
+
} else if (typeof function_call === "object" && function_call.name) {
|
|
405375
|
+
const fcNameResult = api.encodeTextWithLimit(
|
|
405376
|
+
function_call.name,
|
|
405377
|
+
tokenLimit - count,
|
|
405378
|
+
"skip"
|
|
405379
|
+
);
|
|
405380
|
+
if (fcNameResult.exceeded) return false;
|
|
405381
|
+
count += fcNameResult.count + FUNCTION_CALL_NAME_TOKEN_OVERHEAD;
|
|
405382
|
+
}
|
|
405383
|
+
if (count > tokenLimit) return false;
|
|
405384
|
+
}
|
|
405385
|
+
let systemPadded = false;
|
|
405386
|
+
for (const message of messages) {
|
|
405387
|
+
let overhead = MESSAGE_TOKEN_OVERHEAD;
|
|
405388
|
+
if (message.role) {
|
|
405389
|
+
const roleResult = api.encodeTextWithLimit(
|
|
405390
|
+
message.role,
|
|
405391
|
+
tokenLimit - count,
|
|
405392
|
+
"skip"
|
|
405393
|
+
);
|
|
405394
|
+
if (roleResult.exceeded) return false;
|
|
405395
|
+
count += roleResult.count;
|
|
405396
|
+
}
|
|
405397
|
+
let content = message.content ?? "";
|
|
405398
|
+
if (hasFunctions && message.role === "system" && !systemPadded) {
|
|
405399
|
+
if (content && !content.endsWith("\n")) {
|
|
405400
|
+
content = content + "\n";
|
|
405401
|
+
}
|
|
405402
|
+
systemPadded = true;
|
|
405403
|
+
}
|
|
405404
|
+
if (content) {
|
|
405405
|
+
const contentResult = api.encodeTextWithLimit(
|
|
405406
|
+
content,
|
|
405407
|
+
tokenLimit - count,
|
|
405408
|
+
"skip"
|
|
405409
|
+
);
|
|
405410
|
+
if (contentResult.exceeded) return false;
|
|
405411
|
+
count += contentResult.count;
|
|
405412
|
+
}
|
|
405413
|
+
if (message.name) {
|
|
405414
|
+
const nameResult = api.encodeTextWithLimit(
|
|
405415
|
+
message.name,
|
|
405416
|
+
tokenLimit - count,
|
|
405417
|
+
"skip"
|
|
405418
|
+
);
|
|
405419
|
+
if (nameResult.exceeded) return false;
|
|
405420
|
+
count += nameResult.count;
|
|
405421
|
+
overhead += MESSAGE_NAME_TOKEN_OVERHEAD;
|
|
405422
|
+
}
|
|
405423
|
+
if (message.function_call) {
|
|
405424
|
+
if (message.function_call.name) {
|
|
405425
|
+
const fcNameResult = api.encodeTextWithLimit(
|
|
405426
|
+
message.function_call.name,
|
|
405427
|
+
tokenLimit - count,
|
|
405428
|
+
"skip"
|
|
405429
|
+
);
|
|
405430
|
+
if (fcNameResult.exceeded) return false;
|
|
405431
|
+
count += fcNameResult.count;
|
|
405432
|
+
}
|
|
405433
|
+
if (message.function_call.arguments) {
|
|
405434
|
+
const fcArgsResult = api.encodeTextWithLimit(
|
|
405435
|
+
message.function_call.arguments,
|
|
405436
|
+
tokenLimit - count,
|
|
405437
|
+
"skip"
|
|
405438
|
+
);
|
|
405439
|
+
if (fcArgsResult.exceeded) return false;
|
|
405440
|
+
count += fcArgsResult.count;
|
|
405441
|
+
}
|
|
405442
|
+
overhead += FUNCTION_CALL_METADATA_TOKEN_OVERHEAD;
|
|
405443
|
+
}
|
|
405444
|
+
if (message.role === "function") {
|
|
405445
|
+
overhead -= FUNCTION_ROLE_TOKEN_DISCOUNT;
|
|
405446
|
+
}
|
|
405447
|
+
count += overhead;
|
|
405448
|
+
if (count > tokenLimit) return false;
|
|
405449
|
+
}
|
|
405450
|
+
return count;
|
|
405451
|
+
}
|
|
405452
|
+
|
|
405453
|
+
// src/encode-chat.ts
|
|
405454
|
+
var CHAT_TOKENS = {
|
|
405455
|
+
cl100k_base: { imStart: 100264, imEnd: 100265, imSep: 100266 },
|
|
405456
|
+
o200k_base: { imStart: 200264, imEnd: 200265, imSep: 200266 }
|
|
405457
|
+
};
|
|
405458
|
+
var HARMONY_TOKENS = {
|
|
405459
|
+
start: 200006,
|
|
405460
|
+
end: 200007,
|
|
405461
|
+
message: 200008
|
|
405462
|
+
};
|
|
405463
|
+
function encodeChat(messages, options) {
|
|
405464
|
+
const { model, encoding: encodingOverride, primeAssistant = true } = options ?? {};
|
|
405465
|
+
validateChatModel(model, encodingOverride);
|
|
405466
|
+
const encoding = encodingOverride ?? (model ? getOpenAIEncoding({ model }) : "o200k_base");
|
|
405467
|
+
if (encoding === "o200k_harmony") {
|
|
405468
|
+
console.warn(
|
|
405469
|
+
"[ai-token-estimator] o200k_harmony support is experimental. Token structure may not match actual API behavior."
|
|
405470
|
+
);
|
|
405471
|
+
}
|
|
405472
|
+
const chatTokens = getChatTokens(encoding);
|
|
405473
|
+
if (!chatTokens) {
|
|
405474
|
+
throw new Error(
|
|
405475
|
+
`Encoding "${encoding}" does not support chat format. Use cl100k_base or o200k_base for chat models.`
|
|
405476
|
+
);
|
|
405477
|
+
}
|
|
405478
|
+
const { imStart, imEnd, imSep } = chatTokens;
|
|
405479
|
+
const tokens = [];
|
|
405480
|
+
for (const message of messages) {
|
|
405481
|
+
validateMessage(message);
|
|
405482
|
+
tokens.push(imStart);
|
|
405483
|
+
let roleStr;
|
|
405484
|
+
if (message.role === "function" && message.name) {
|
|
405485
|
+
roleStr = message.name;
|
|
405486
|
+
} else if (message.name) {
|
|
405487
|
+
roleStr = `${message.role}:${message.name}`;
|
|
405488
|
+
} else {
|
|
405489
|
+
roleStr = message.role;
|
|
405490
|
+
}
|
|
405491
|
+
tokens.push(...encode(roleStr, { encoding, allowSpecial: "none" }));
|
|
405492
|
+
tokens.push(imSep);
|
|
405493
|
+
if (message.content) {
|
|
405494
|
+
tokens.push(
|
|
405495
|
+
...encode(message.content, { encoding, allowSpecial: "none" })
|
|
405496
|
+
);
|
|
405497
|
+
}
|
|
405498
|
+
if (message.function_call) {
|
|
405499
|
+
const fcContent = formatFunctionCall(message.function_call);
|
|
405500
|
+
tokens.push(...encode(fcContent, { encoding, allowSpecial: "none" }));
|
|
405501
|
+
}
|
|
405502
|
+
tokens.push(imEnd);
|
|
405503
|
+
}
|
|
405504
|
+
if (primeAssistant) {
|
|
405505
|
+
tokens.push(imStart);
|
|
405506
|
+
tokens.push(...encode("assistant", { encoding, allowSpecial: "none" }));
|
|
405507
|
+
tokens.push(imSep);
|
|
405508
|
+
}
|
|
405509
|
+
return tokens;
|
|
405510
|
+
}
|
|
405511
|
+
function validateChatModel(model, encodingOverride) {
|
|
405512
|
+
if (model) {
|
|
405513
|
+
if (isAnthropicModel(model)) {
|
|
405514
|
+
throw new Error(
|
|
405515
|
+
`Model "${model}" is an Anthropic model. encodeChat only supports OpenAI models.`
|
|
405516
|
+
);
|
|
405517
|
+
}
|
|
405518
|
+
if (isGoogleModel(model)) {
|
|
405519
|
+
throw new Error(
|
|
405520
|
+
`Model "${model}" is a Google model. encodeChat only supports OpenAI models.`
|
|
405521
|
+
);
|
|
405522
|
+
}
|
|
405523
|
+
if (isKnownModel(model) && !isChatModel(model)) {
|
|
405524
|
+
throw new Error(
|
|
405525
|
+
`Model "${model}" is not a chat completion model. encodeChat only supports chat models (e.g., gpt-4o, gpt-3.5-turbo).`
|
|
405526
|
+
);
|
|
405527
|
+
}
|
|
405528
|
+
}
|
|
405529
|
+
if (encodingOverride) {
|
|
405530
|
+
return;
|
|
405531
|
+
}
|
|
405532
|
+
if (!model) {
|
|
405533
|
+
throw new Error(
|
|
405534
|
+
"Either model or encoding must be provided. Provide a known OpenAI chat model (e.g., gpt-4o) or an explicit encoding (e.g., o200k_base)."
|
|
405535
|
+
);
|
|
405536
|
+
}
|
|
405537
|
+
if (!isChatModel(model)) {
|
|
405538
|
+
throw new Error(
|
|
405539
|
+
`Model "${model}" is not recognized as an OpenAI chat model. If this is a new OpenAI model, provide the encoding option explicitly (e.g., encoding: "o200k_base").`
|
|
405540
|
+
);
|
|
405541
|
+
}
|
|
405542
|
+
}
|
|
405543
|
+
function validateMessage(message) {
|
|
405544
|
+
const msgAny = message;
|
|
405545
|
+
if ("tool_calls" in msgAny && msgAny.tool_calls !== void 0) {
|
|
405546
|
+
throw new Error(
|
|
405547
|
+
"tool_calls is not supported. Use function_call with the legacy functions API."
|
|
405548
|
+
);
|
|
405549
|
+
}
|
|
405550
|
+
if ("tool_call_id" in msgAny && msgAny.tool_call_id !== void 0) {
|
|
405551
|
+
throw new Error(
|
|
405552
|
+
"tool_call_id is not supported. Use the legacy functions API."
|
|
405553
|
+
);
|
|
405554
|
+
}
|
|
405555
|
+
if (message.content !== null && message.content !== void 0 && typeof message.content !== "string") {
|
|
405556
|
+
throw new Error(
|
|
405557
|
+
"Multimodal content (arrays) is not supported. Only text content is supported."
|
|
405558
|
+
);
|
|
405559
|
+
}
|
|
405560
|
+
}
|
|
405561
|
+
function getChatTokens(encoding) {
|
|
405562
|
+
if (encoding === "o200k_harmony") {
|
|
405563
|
+
return {
|
|
405564
|
+
imStart: HARMONY_TOKENS.start,
|
|
405565
|
+
imEnd: HARMONY_TOKENS.end,
|
|
405566
|
+
imSep: HARMONY_TOKENS.message
|
|
405567
|
+
};
|
|
405568
|
+
}
|
|
405569
|
+
return CHAT_TOKENS[encoding] ?? null;
|
|
405570
|
+
}
|
|
405571
|
+
function formatFunctionCall(fc) {
|
|
405572
|
+
const parts = [];
|
|
405573
|
+
if (fc.name) parts.push(fc.name);
|
|
405574
|
+
if (fc.arguments) parts.push(fc.arguments);
|
|
405575
|
+
return parts.join("\n");
|
|
405576
|
+
}
|
|
405211
405577
|
// Annotate the CommonJS export names for ESM import in node:
|
|
405212
405578
|
0 && (module.exports = {
|
|
405213
405579
|
DEFAULT_MODELS,
|
|
@@ -405224,6 +405590,7 @@ function countChatCompletionTokens(input) {
|
|
|
405224
405590
|
decodeSentencePiece,
|
|
405225
405591
|
decodeSentencePieceAsync,
|
|
405226
405592
|
encode,
|
|
405593
|
+
encodeChat,
|
|
405227
405594
|
encodeSentencePiece,
|
|
405228
405595
|
encodeSentencePieceAsync,
|
|
405229
405596
|
ensureSentencePieceModel,
|
|
@@ -405237,6 +405604,8 @@ function countChatCompletionTokens(input) {
|
|
|
405237
405604
|
getOpenAIEncoding,
|
|
405238
405605
|
getSentencePieceTokenizer,
|
|
405239
405606
|
getTotalCost,
|
|
405607
|
+
isChatWithinTokenLimit,
|
|
405608
|
+
isWithinTokenLimit,
|
|
405240
405609
|
loadSentencePieceTokenizer,
|
|
405241
405610
|
parseModelProto
|
|
405242
405611
|
});
|
package/dist/index.d.cts
CHANGED
|
@@ -55,6 +55,58 @@ declare function encode(text: string, options?: EncodeOptions): number[];
|
|
|
55
55
|
* Decode OpenAI token IDs into text using tiktoken-compatible BPE encoding.
|
|
56
56
|
*/
|
|
57
57
|
declare function decode(tokens: Iterable<number>, options?: Pick<EncodeOptions, 'encoding' | 'model'>): string;
|
|
58
|
+
/**
|
|
59
|
+
* Options for isWithinTokenLimit.
|
|
60
|
+
*/
|
|
61
|
+
interface IsWithinTokenLimitOptions {
|
|
62
|
+
/**
|
|
63
|
+
* Explicit OpenAI encoding override.
|
|
64
|
+
* When provided, this takes precedence over `model`.
|
|
65
|
+
*/
|
|
66
|
+
encoding?: OpenAIEncoding;
|
|
67
|
+
/**
|
|
68
|
+
* OpenAI model ID used to select the appropriate encoding.
|
|
69
|
+
* Note: Non-OpenAI models (claude-*, gemini-*) are rejected.
|
|
70
|
+
*/
|
|
71
|
+
model?: string;
|
|
72
|
+
/**
|
|
73
|
+
* How special tokens are handled.
|
|
74
|
+
* - `none_raise` (default): throw if special tokens appear
|
|
75
|
+
* - `none`: treat special tokens as regular text
|
|
76
|
+
* - `all`: allow special tokens and encode them as special token IDs
|
|
77
|
+
*/
|
|
78
|
+
allowSpecial?: SpecialTokenHandling;
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Check if text is within a token limit, with early exit optimization.
|
|
82
|
+
*
|
|
83
|
+
* Returns `false` if the token count exceeds the limit, otherwise returns the
|
|
84
|
+
* actual token count. This is significantly faster than full tokenization when
|
|
85
|
+
* the limit is exceeded early in the text.
|
|
86
|
+
*
|
|
87
|
+
* @param text - The text to check
|
|
88
|
+
* @param tokenLimit - Maximum allowed tokens (must be non-negative finite integer)
|
|
89
|
+
* @param options - Encoding options
|
|
90
|
+
* @returns `false` if exceeded, or the actual token count if within limit
|
|
91
|
+
* @throws Error if tokenLimit is invalid (NaN, Infinity, negative, non-integer)
|
|
92
|
+
* @throws Error if model is a known non-OpenAI model (claude-*, gemini-*)
|
|
93
|
+
*
|
|
94
|
+
* @example
|
|
95
|
+
* ```typescript
|
|
96
|
+
* // Returns token count if within limit
|
|
97
|
+
* const count = isWithinTokenLimit('Hello, world!', 100, { model: 'gpt-4o' });
|
|
98
|
+
* if (count !== false) {
|
|
99
|
+
* console.log(`Text has ${count} tokens`);
|
|
100
|
+
* }
|
|
101
|
+
*
|
|
102
|
+
* // Returns false if exceeds limit
|
|
103
|
+
* const result = isWithinTokenLimit(longText, 10, { model: 'gpt-4o' });
|
|
104
|
+
* if (result === false) {
|
|
105
|
+
* console.log('Text exceeds 10 tokens');
|
|
106
|
+
* }
|
|
107
|
+
* ```
|
|
108
|
+
*/
|
|
109
|
+
declare function isWithinTokenLimit(text: string, tokenLimit: number, options?: IsWithinTokenLimitOptions): false | number;
|
|
58
110
|
|
|
59
111
|
/**
|
|
60
112
|
* Configuration for a specific LLM model.
|
|
@@ -542,6 +594,100 @@ declare function countTokens(input: TokenCountInput): TokenCountOutput;
|
|
|
542
594
|
* ```
|
|
543
595
|
*/
|
|
544
596
|
declare function countChatCompletionTokens(input: ChatCompletionTokenCountInput): ChatCompletionTokenCountOutput;
|
|
597
|
+
/**
|
|
598
|
+
* Input for isChatWithinTokenLimit.
|
|
599
|
+
* Object-style input to match countChatCompletionTokens API.
|
|
600
|
+
*/
|
|
601
|
+
interface IsChatWithinTokenLimitInput {
|
|
602
|
+
messages: ChatMessage[];
|
|
603
|
+
model: string;
|
|
604
|
+
tokenLimit: number;
|
|
605
|
+
encoding?: OpenAIEncoding;
|
|
606
|
+
functions?: FunctionDefinition[];
|
|
607
|
+
function_call?: FunctionCallOption;
|
|
608
|
+
}
|
|
609
|
+
/**
|
|
610
|
+
* Check if chat messages are within a token limit, with early exit optimization.
|
|
611
|
+
*
|
|
612
|
+
* Uses object-style input to match countChatCompletionTokens API.
|
|
613
|
+
* Returns `false` if the token count exceeds the limit, otherwise returns
|
|
614
|
+
* the actual token count.
|
|
615
|
+
*
|
|
616
|
+
* This is significantly faster than full tokenization when the limit is
|
|
617
|
+
* exceeded early in the input.
|
|
618
|
+
*
|
|
619
|
+
* @throws {Error} If tokenLimit is invalid (NaN, Infinity, negative, non-integer)
|
|
620
|
+
* @throws {Error} If model is not an OpenAI model (unless encoding override provided)
|
|
621
|
+
* @throws {Error} If tools, tool_choice, tool_calls, or tool_call_id are present
|
|
622
|
+
* @throws {Error} If any message has non-string content (arrays, numbers, objects)
|
|
623
|
+
*
|
|
624
|
+
* @example
|
|
625
|
+
* ```typescript
|
|
626
|
+
* const result = isChatWithinTokenLimit({
|
|
627
|
+
* messages: [
|
|
628
|
+
* { role: 'system', content: 'You are a helpful assistant.' },
|
|
629
|
+
* { role: 'user', content: 'Hello!' }
|
|
630
|
+
* ],
|
|
631
|
+
* model: 'gpt-4o',
|
|
632
|
+
* tokenLimit: 100,
|
|
633
|
+
* });
|
|
634
|
+
*
|
|
635
|
+
* if (result === false) {
|
|
636
|
+
* console.log('Messages exceed token limit');
|
|
637
|
+
* } else {
|
|
638
|
+
* console.log(`Messages use ${result} tokens`);
|
|
639
|
+
* }
|
|
640
|
+
* ```
|
|
641
|
+
*/
|
|
642
|
+
declare function isChatWithinTokenLimit(input: IsChatWithinTokenLimitInput): false | number;
|
|
643
|
+
|
|
644
|
+
/**
|
|
645
|
+
* Chat-aware tokenization using ChatML format.
|
|
646
|
+
*
|
|
647
|
+
* Encodes chat messages into ChatML message prompt tokens including special
|
|
648
|
+
* delimiter tokens (<|im_start|>, <|im_sep|>, <|im_end|>).
|
|
649
|
+
*/
|
|
650
|
+
|
|
651
|
+
/**
|
|
652
|
+
* Options for encodeChat.
|
|
653
|
+
*/
|
|
654
|
+
interface EncodeChatOptions {
|
|
655
|
+
/**
|
|
656
|
+
* OpenAI model ID used to select the appropriate encoding.
|
|
657
|
+
* Note: Non-OpenAI models (claude-*, gemini-*) are rejected.
|
|
658
|
+
*/
|
|
659
|
+
model?: string;
|
|
660
|
+
/**
|
|
661
|
+
* Explicit OpenAI encoding override.
|
|
662
|
+
* When provided, this takes precedence over `model`.
|
|
663
|
+
*/
|
|
664
|
+
encoding?: OpenAIEncoding;
|
|
665
|
+
/**
|
|
666
|
+
* Prime the output with the start of an assistant response.
|
|
667
|
+
* When true (default), appends <|im_start|>assistant<|im_sep|> at the end.
|
|
668
|
+
* Set to false to get just the messages without assistant priming.
|
|
669
|
+
*/
|
|
670
|
+
primeAssistant?: boolean;
|
|
671
|
+
}
|
|
672
|
+
/**
|
|
673
|
+
* Encode chat messages into token IDs using ChatML format.
|
|
674
|
+
*
|
|
675
|
+
* Returns the exact token sequence that OpenAI models expect for chat
|
|
676
|
+
* completions, including special delimiter tokens.
|
|
677
|
+
*
|
|
678
|
+
* @param messages - Array of chat messages
|
|
679
|
+
* @param options - Encoding options
|
|
680
|
+
* @returns Token IDs representing the chat prompt
|
|
681
|
+
*
|
|
682
|
+
* @example
|
|
683
|
+
* ```typescript
|
|
684
|
+
* const tokens = encodeChat([
|
|
685
|
+
* { role: 'system', content: 'You are helpful.' },
|
|
686
|
+
* { role: 'user', content: 'Hello!' }
|
|
687
|
+
* ], { model: 'gpt-4o' });
|
|
688
|
+
* ```
|
|
689
|
+
*/
|
|
690
|
+
declare function encodeChat(messages: ChatMessage[], options?: EncodeChatOptions): number[];
|
|
545
691
|
|
|
546
692
|
interface AnthropicCountTokensParams {
|
|
547
693
|
/** Claude model id, e.g. `claude-sonnet-4-5` */
|
|
@@ -818,4 +964,4 @@ declare function clearModelCache(): void;
|
|
|
818
964
|
*/
|
|
819
965
|
declare function parseModelProto(buffer: Uint8Array): ModelProto;
|
|
820
966
|
|
|
821
|
-
export { type AnthropicCountTokensParams, type ChatCompletionTokenCountInput, type ChatCompletionTokenCountOutput, type ChatMessage, type CostEstimate, DEFAULT_MODELS, type DataOptions, type DownloadOptions, type EncodeOptions, type EstimateAsyncInput, type EstimateCostFromTextAsyncOptions, type EstimateCostFromTextOptions, type EstimateCostInput, type EstimateInput, type EstimateOutput, type FileOptions, type FunctionCallOption, type FunctionDefinition, type FunctionParameterProperty, type FunctionParameters, type GeminiCountTokensParams, type GemmaSentencePieceCountTokensParams, type KnownTokenizer, LAST_UPDATED, type ModelConfig, type ModelInfo, type ModelProto, type NormalizerSpec, type OpenAIEncoding, type SentencePiece, type SentencePieceTokenizer, type SpecialTokenHandling, type TokenCountInput, type TokenCountOutput, type TokenizerMode, type TokenizerModeAsync, type TrainerSpec, clearModelCache, countAnthropicInputTokens, countChatCompletionTokens, countGeminiTokens, countGemmaSentencePieceTokens, countSentencePieceTokens, countSentencePieceTokensAsync, countTokens, decode, decodeSentencePiece, decodeSentencePieceAsync, encode, encodeSentencePiece, encodeSentencePieceAsync, ensureSentencePieceModel, estimate, estimateAsync, estimateCost, estimateCostFromText, estimateCostFromTextAsync, getAvailableModels, getModelConfig, getOpenAIEncoding, getSentencePieceTokenizer, getTotalCost, loadSentencePieceTokenizer, parseModelProto };
|
|
967
|
+
export { type AnthropicCountTokensParams, type ChatCompletionTokenCountInput, type ChatCompletionTokenCountOutput, type ChatMessage, type CostEstimate, DEFAULT_MODELS, type DataOptions, type DownloadOptions, type EncodeChatOptions, type EncodeOptions, type EstimateAsyncInput, type EstimateCostFromTextAsyncOptions, type EstimateCostFromTextOptions, type EstimateCostInput, type EstimateInput, type EstimateOutput, type FileOptions, type FunctionCallOption, type FunctionDefinition, type FunctionParameterProperty, type FunctionParameters, type GeminiCountTokensParams, type GemmaSentencePieceCountTokensParams, type IsChatWithinTokenLimitInput, type IsWithinTokenLimitOptions, type KnownTokenizer, LAST_UPDATED, type ModelConfig, type ModelInfo, type ModelProto, type NormalizerSpec, type OpenAIEncoding, type SentencePiece, type SentencePieceTokenizer, type SpecialTokenHandling, type TokenCountInput, type TokenCountOutput, type TokenizerMode, type TokenizerModeAsync, type TrainerSpec, clearModelCache, countAnthropicInputTokens, countChatCompletionTokens, countGeminiTokens, countGemmaSentencePieceTokens, countSentencePieceTokens, countSentencePieceTokensAsync, countTokens, decode, decodeSentencePiece, decodeSentencePieceAsync, encode, encodeChat, encodeSentencePiece, encodeSentencePieceAsync, ensureSentencePieceModel, estimate, estimateAsync, estimateCost, estimateCostFromText, estimateCostFromTextAsync, getAvailableModels, getModelConfig, getOpenAIEncoding, getSentencePieceTokenizer, getTotalCost, isChatWithinTokenLimit, isWithinTokenLimit, loadSentencePieceTokenizer, parseModelProto };
|
package/dist/index.d.ts
CHANGED
|
@@ -55,6 +55,58 @@ declare function encode(text: string, options?: EncodeOptions): number[];
|
|
|
55
55
|
* Decode OpenAI token IDs into text using tiktoken-compatible BPE encoding.
|
|
56
56
|
*/
|
|
57
57
|
declare function decode(tokens: Iterable<number>, options?: Pick<EncodeOptions, 'encoding' | 'model'>): string;
|
|
58
|
+
/**
|
|
59
|
+
* Options for isWithinTokenLimit.
|
|
60
|
+
*/
|
|
61
|
+
interface IsWithinTokenLimitOptions {
|
|
62
|
+
/**
|
|
63
|
+
* Explicit OpenAI encoding override.
|
|
64
|
+
* When provided, this takes precedence over `model`.
|
|
65
|
+
*/
|
|
66
|
+
encoding?: OpenAIEncoding;
|
|
67
|
+
/**
|
|
68
|
+
* OpenAI model ID used to select the appropriate encoding.
|
|
69
|
+
* Note: Non-OpenAI models (claude-*, gemini-*) are rejected.
|
|
70
|
+
*/
|
|
71
|
+
model?: string;
|
|
72
|
+
/**
|
|
73
|
+
* How special tokens are handled.
|
|
74
|
+
* - `none_raise` (default): throw if special tokens appear
|
|
75
|
+
* - `none`: treat special tokens as regular text
|
|
76
|
+
* - `all`: allow special tokens and encode them as special token IDs
|
|
77
|
+
*/
|
|
78
|
+
allowSpecial?: SpecialTokenHandling;
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Check if text is within a token limit, with early exit optimization.
|
|
82
|
+
*
|
|
83
|
+
* Returns `false` if the token count exceeds the limit, otherwise returns the
|
|
84
|
+
* actual token count. This is significantly faster than full tokenization when
|
|
85
|
+
* the limit is exceeded early in the text.
|
|
86
|
+
*
|
|
87
|
+
* @param text - The text to check
|
|
88
|
+
* @param tokenLimit - Maximum allowed tokens (must be non-negative finite integer)
|
|
89
|
+
* @param options - Encoding options
|
|
90
|
+
* @returns `false` if exceeded, or the actual token count if within limit
|
|
91
|
+
* @throws Error if tokenLimit is invalid (NaN, Infinity, negative, non-integer)
|
|
92
|
+
* @throws Error if model is a known non-OpenAI model (claude-*, gemini-*)
|
|
93
|
+
*
|
|
94
|
+
* @example
|
|
95
|
+
* ```typescript
|
|
96
|
+
* // Returns token count if within limit
|
|
97
|
+
* const count = isWithinTokenLimit('Hello, world!', 100, { model: 'gpt-4o' });
|
|
98
|
+
* if (count !== false) {
|
|
99
|
+
* console.log(`Text has ${count} tokens`);
|
|
100
|
+
* }
|
|
101
|
+
*
|
|
102
|
+
* // Returns false if exceeds limit
|
|
103
|
+
* const result = isWithinTokenLimit(longText, 10, { model: 'gpt-4o' });
|
|
104
|
+
* if (result === false) {
|
|
105
|
+
* console.log('Text exceeds 10 tokens');
|
|
106
|
+
* }
|
|
107
|
+
* ```
|
|
108
|
+
*/
|
|
109
|
+
declare function isWithinTokenLimit(text: string, tokenLimit: number, options?: IsWithinTokenLimitOptions): false | number;
|
|
58
110
|
|
|
59
111
|
/**
|
|
60
112
|
* Configuration for a specific LLM model.
|
|
@@ -542,6 +594,100 @@ declare function countTokens(input: TokenCountInput): TokenCountOutput;
|
|
|
542
594
|
* ```
|
|
543
595
|
*/
|
|
544
596
|
declare function countChatCompletionTokens(input: ChatCompletionTokenCountInput): ChatCompletionTokenCountOutput;
|
|
597
|
+
/**
|
|
598
|
+
* Input for isChatWithinTokenLimit.
|
|
599
|
+
* Object-style input to match countChatCompletionTokens API.
|
|
600
|
+
*/
|
|
601
|
+
interface IsChatWithinTokenLimitInput {
|
|
602
|
+
messages: ChatMessage[];
|
|
603
|
+
model: string;
|
|
604
|
+
tokenLimit: number;
|
|
605
|
+
encoding?: OpenAIEncoding;
|
|
606
|
+
functions?: FunctionDefinition[];
|
|
607
|
+
function_call?: FunctionCallOption;
|
|
608
|
+
}
|
|
609
|
+
/**
|
|
610
|
+
* Check if chat messages are within a token limit, with early exit optimization.
|
|
611
|
+
*
|
|
612
|
+
* Uses object-style input to match countChatCompletionTokens API.
|
|
613
|
+
* Returns `false` if the token count exceeds the limit, otherwise returns
|
|
614
|
+
* the actual token count.
|
|
615
|
+
*
|
|
616
|
+
* This is significantly faster than full tokenization when the limit is
|
|
617
|
+
* exceeded early in the input.
|
|
618
|
+
*
|
|
619
|
+
* @throws {Error} If tokenLimit is invalid (NaN, Infinity, negative, non-integer)
|
|
620
|
+
* @throws {Error} If model is not an OpenAI model (unless encoding override provided)
|
|
621
|
+
* @throws {Error} If tools, tool_choice, tool_calls, or tool_call_id are present
|
|
622
|
+
* @throws {Error} If any message has non-string content (arrays, numbers, objects)
|
|
623
|
+
*
|
|
624
|
+
* @example
|
|
625
|
+
* ```typescript
|
|
626
|
+
* const result = isChatWithinTokenLimit({
|
|
627
|
+
* messages: [
|
|
628
|
+
* { role: 'system', content: 'You are a helpful assistant.' },
|
|
629
|
+
* { role: 'user', content: 'Hello!' }
|
|
630
|
+
* ],
|
|
631
|
+
* model: 'gpt-4o',
|
|
632
|
+
* tokenLimit: 100,
|
|
633
|
+
* });
|
|
634
|
+
*
|
|
635
|
+
* if (result === false) {
|
|
636
|
+
* console.log('Messages exceed token limit');
|
|
637
|
+
* } else {
|
|
638
|
+
* console.log(`Messages use ${result} tokens`);
|
|
639
|
+
* }
|
|
640
|
+
* ```
|
|
641
|
+
*/
|
|
642
|
+
declare function isChatWithinTokenLimit(input: IsChatWithinTokenLimitInput): false | number;
|
|
643
|
+
|
|
644
|
+
/**
|
|
645
|
+
* Chat-aware tokenization using ChatML format.
|
|
646
|
+
*
|
|
647
|
+
* Encodes chat messages into ChatML message prompt tokens including special
|
|
648
|
+
* delimiter tokens (<|im_start|>, <|im_sep|>, <|im_end|>).
|
|
649
|
+
*/
|
|
650
|
+
|
|
651
|
+
/**
|
|
652
|
+
* Options for encodeChat.
|
|
653
|
+
*/
|
|
654
|
+
interface EncodeChatOptions {
|
|
655
|
+
/**
|
|
656
|
+
* OpenAI model ID used to select the appropriate encoding.
|
|
657
|
+
* Note: Non-OpenAI models (claude-*, gemini-*) are rejected.
|
|
658
|
+
*/
|
|
659
|
+
model?: string;
|
|
660
|
+
/**
|
|
661
|
+
* Explicit OpenAI encoding override.
|
|
662
|
+
* When provided, this takes precedence over `model`.
|
|
663
|
+
*/
|
|
664
|
+
encoding?: OpenAIEncoding;
|
|
665
|
+
/**
|
|
666
|
+
* Prime the output with the start of an assistant response.
|
|
667
|
+
* When true (default), appends <|im_start|>assistant<|im_sep|> at the end.
|
|
668
|
+
* Set to false to get just the messages without assistant priming.
|
|
669
|
+
*/
|
|
670
|
+
primeAssistant?: boolean;
|
|
671
|
+
}
|
|
672
|
+
/**
|
|
673
|
+
* Encode chat messages into token IDs using ChatML format.
|
|
674
|
+
*
|
|
675
|
+
* Returns the exact token sequence that OpenAI models expect for chat
|
|
676
|
+
* completions, including special delimiter tokens.
|
|
677
|
+
*
|
|
678
|
+
* @param messages - Array of chat messages
|
|
679
|
+
* @param options - Encoding options
|
|
680
|
+
* @returns Token IDs representing the chat prompt
|
|
681
|
+
*
|
|
682
|
+
* @example
|
|
683
|
+
* ```typescript
|
|
684
|
+
* const tokens = encodeChat([
|
|
685
|
+
* { role: 'system', content: 'You are helpful.' },
|
|
686
|
+
* { role: 'user', content: 'Hello!' }
|
|
687
|
+
* ], { model: 'gpt-4o' });
|
|
688
|
+
* ```
|
|
689
|
+
*/
|
|
690
|
+
declare function encodeChat(messages: ChatMessage[], options?: EncodeChatOptions): number[];
|
|
545
691
|
|
|
546
692
|
interface AnthropicCountTokensParams {
|
|
547
693
|
/** Claude model id, e.g. `claude-sonnet-4-5` */
|
|
@@ -818,4 +964,4 @@ declare function clearModelCache(): void;
|
|
|
818
964
|
*/
|
|
819
965
|
declare function parseModelProto(buffer: Uint8Array): ModelProto;
|
|
820
966
|
|
|
821
|
-
export { type AnthropicCountTokensParams, type ChatCompletionTokenCountInput, type ChatCompletionTokenCountOutput, type ChatMessage, type CostEstimate, DEFAULT_MODELS, type DataOptions, type DownloadOptions, type EncodeOptions, type EstimateAsyncInput, type EstimateCostFromTextAsyncOptions, type EstimateCostFromTextOptions, type EstimateCostInput, type EstimateInput, type EstimateOutput, type FileOptions, type FunctionCallOption, type FunctionDefinition, type FunctionParameterProperty, type FunctionParameters, type GeminiCountTokensParams, type GemmaSentencePieceCountTokensParams, type KnownTokenizer, LAST_UPDATED, type ModelConfig, type ModelInfo, type ModelProto, type NormalizerSpec, type OpenAIEncoding, type SentencePiece, type SentencePieceTokenizer, type SpecialTokenHandling, type TokenCountInput, type TokenCountOutput, type TokenizerMode, type TokenizerModeAsync, type TrainerSpec, clearModelCache, countAnthropicInputTokens, countChatCompletionTokens, countGeminiTokens, countGemmaSentencePieceTokens, countSentencePieceTokens, countSentencePieceTokensAsync, countTokens, decode, decodeSentencePiece, decodeSentencePieceAsync, encode, encodeSentencePiece, encodeSentencePieceAsync, ensureSentencePieceModel, estimate, estimateAsync, estimateCost, estimateCostFromText, estimateCostFromTextAsync, getAvailableModels, getModelConfig, getOpenAIEncoding, getSentencePieceTokenizer, getTotalCost, loadSentencePieceTokenizer, parseModelProto };
|
|
967
|
+
export { type AnthropicCountTokensParams, type ChatCompletionTokenCountInput, type ChatCompletionTokenCountOutput, type ChatMessage, type CostEstimate, DEFAULT_MODELS, type DataOptions, type DownloadOptions, type EncodeChatOptions, type EncodeOptions, type EstimateAsyncInput, type EstimateCostFromTextAsyncOptions, type EstimateCostFromTextOptions, type EstimateCostInput, type EstimateInput, type EstimateOutput, type FileOptions, type FunctionCallOption, type FunctionDefinition, type FunctionParameterProperty, type FunctionParameters, type GeminiCountTokensParams, type GemmaSentencePieceCountTokensParams, type IsChatWithinTokenLimitInput, type IsWithinTokenLimitOptions, type KnownTokenizer, LAST_UPDATED, type ModelConfig, type ModelInfo, type ModelProto, type NormalizerSpec, type OpenAIEncoding, type SentencePiece, type SentencePieceTokenizer, type SpecialTokenHandling, type TokenCountInput, type TokenCountOutput, type TokenizerMode, type TokenizerModeAsync, type TrainerSpec, clearModelCache, countAnthropicInputTokens, countChatCompletionTokens, countGeminiTokens, countGemmaSentencePieceTokens, countSentencePieceTokens, countSentencePieceTokensAsync, countTokens, decode, decodeSentencePiece, decodeSentencePieceAsync, encode, encodeChat, encodeSentencePiece, encodeSentencePieceAsync, ensureSentencePieceModel, estimate, estimateAsync, estimateCost, estimateCostFromText, estimateCostFromTextAsync, getAvailableModels, getModelConfig, getOpenAIEncoding, getSentencePieceTokenizer, getTotalCost, isChatWithinTokenLimit, isWithinTokenLimit, loadSentencePieceTokenizer, parseModelProto };
|
package/dist/index.js
CHANGED
|
@@ -552,6 +552,83 @@ var BPETokenizer = class {
|
|
|
552
552
|
}
|
|
553
553
|
return tokens;
|
|
554
554
|
}
|
|
555
|
+
/**
|
|
556
|
+
* Encode text with a token limit, returning early if the limit is exceeded.
|
|
557
|
+
* This is optimized for fast token-limit validation without full tokenization.
|
|
558
|
+
*
|
|
559
|
+
* @param text - The text to encode
|
|
560
|
+
* @param limit - Maximum number of tokens allowed
|
|
561
|
+
* @param allowedSpecial - Controls special token handling (same as encodeText)
|
|
562
|
+
* @returns Object with count and exceeded flag
|
|
563
|
+
*/
|
|
564
|
+
encodeTextWithLimit(text, limit, allowedSpecial) {
|
|
565
|
+
if (!text) return { count: 0, exceeded: false };
|
|
566
|
+
if (limit < 0) return { count: 0, exceeded: true };
|
|
567
|
+
if (allowedSpecial === "skip") {
|
|
568
|
+
return this.encodeOrdinaryWithLimit(text, limit);
|
|
569
|
+
}
|
|
570
|
+
let count = 0;
|
|
571
|
+
if (this.specialTokenMap.size > 0) {
|
|
572
|
+
const parts = this.splitOnSpecialTokens(text, allowedSpecial);
|
|
573
|
+
for (const part of parts) {
|
|
574
|
+
if (part.isSpecial) {
|
|
575
|
+
count += 1;
|
|
576
|
+
if (count > limit) return { count, exceeded: true };
|
|
577
|
+
} else {
|
|
578
|
+
const result = this.encodeOrdinaryWithLimit(part.text, limit - count);
|
|
579
|
+
count += result.count;
|
|
580
|
+
if (result.exceeded) {
|
|
581
|
+
return { count, exceeded: true };
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
} else {
|
|
586
|
+
return this.encodeOrdinaryWithLimit(text, limit);
|
|
587
|
+
}
|
|
588
|
+
return { count, exceeded: false };
|
|
589
|
+
}
|
|
590
|
+
/**
|
|
591
|
+
* Incremental encoding with early exit.
|
|
592
|
+
* CRITICAL: Uses RegExp.exec() loop instead of text.match() to avoid
|
|
593
|
+
* allocating all pieces upfront. This enables true early exit.
|
|
594
|
+
*/
|
|
595
|
+
encodeOrdinaryWithLimit(text, limit) {
|
|
596
|
+
if (!text) return { count: 0, exceeded: false };
|
|
597
|
+
if (limit < 0) return { count: 0, exceeded: true };
|
|
598
|
+
let count = 0;
|
|
599
|
+
const regex = new RegExp(
|
|
600
|
+
this.tokenSplitRegex.source,
|
|
601
|
+
this.tokenSplitRegex.flags.includes("g") ? this.tokenSplitRegex.flags : this.tokenSplitRegex.flags + "g"
|
|
602
|
+
);
|
|
603
|
+
let match;
|
|
604
|
+
while ((match = regex.exec(text)) !== null) {
|
|
605
|
+
const piece = match[0];
|
|
606
|
+
if (piece.length === 0) {
|
|
607
|
+
regex.lastIndex++;
|
|
608
|
+
continue;
|
|
609
|
+
}
|
|
610
|
+
const cached = this.getFromCache(piece);
|
|
611
|
+
if (cached) {
|
|
612
|
+
count += cached.length;
|
|
613
|
+
if (count > limit) return { count, exceeded: true };
|
|
614
|
+
continue;
|
|
615
|
+
}
|
|
616
|
+
const pieceBytes = this.textEncoder.encode(piece);
|
|
617
|
+
const key = bytesToLatin1(pieceBytes);
|
|
618
|
+
const directRank = this.encoder.get(key);
|
|
619
|
+
if (directRank !== void 0) {
|
|
620
|
+
count += 1;
|
|
621
|
+
this.addToCache(piece, [directRank]);
|
|
622
|
+
if (count > limit) return { count, exceeded: true };
|
|
623
|
+
continue;
|
|
624
|
+
}
|
|
625
|
+
const pieceTokens = this.mergeBytePairs(pieceBytes);
|
|
626
|
+
count += pieceTokens.length;
|
|
627
|
+
this.addToCache(piece, pieceTokens);
|
|
628
|
+
if (count > limit) return { count, exceeded: true };
|
|
629
|
+
}
|
|
630
|
+
return { count, exceeded: false };
|
|
631
|
+
}
|
|
555
632
|
/**
|
|
556
633
|
* Core BPE merge algorithm.
|
|
557
634
|
*/
|
|
@@ -706,10 +783,18 @@ var CL100K_BASE_SPECIAL_TOKENS = [
|
|
|
706
783
|
["<|fim_prefix|>", 100258],
|
|
707
784
|
["<|fim_middle|>", 100259],
|
|
708
785
|
["<|fim_suffix|>", 100260],
|
|
786
|
+
// ChatML tokens for chat completion
|
|
787
|
+
["<|im_start|>", 100264],
|
|
788
|
+
["<|im_end|>", 100265],
|
|
789
|
+
["<|im_sep|>", 100266],
|
|
709
790
|
["<|endofprompt|>", 100276]
|
|
710
791
|
];
|
|
711
792
|
var O200K_BASE_SPECIAL_TOKENS = [
|
|
712
793
|
["<|endoftext|>", 199999],
|
|
794
|
+
// ChatML tokens for chat completion
|
|
795
|
+
["<|im_start|>", 200264],
|
|
796
|
+
["<|im_end|>", 200265],
|
|
797
|
+
["<|im_sep|>", 200266],
|
|
713
798
|
["<|endofprompt|>", 200018]
|
|
714
799
|
];
|
|
715
800
|
function buildO200kHarmonySpecialTokens() {
|
|
@@ -401828,7 +401913,8 @@ function getTokenizer(encoding) {
|
|
|
401828
401913
|
}
|
|
401829
401914
|
return {
|
|
401830
401915
|
encode: (text, allowedSpecial) => tokenizer.encodeText(text, allowedSpecial),
|
|
401831
|
-
decode: (tokens) => tokenizer.decodeTokens(tokens)
|
|
401916
|
+
decode: (tokens) => tokenizer.decodeTokens(tokens),
|
|
401917
|
+
encodeTextWithLimit: (text, limit, allowedSpecial) => tokenizer.encodeTextWithLimit(text, limit, allowedSpecial)
|
|
401832
401918
|
};
|
|
401833
401919
|
}
|
|
401834
401920
|
function resolveEncoding(options) {
|
|
@@ -401868,6 +401954,39 @@ function decode(tokens, options) {
|
|
|
401868
401954
|
const api = getTokenizer(encoding);
|
|
401869
401955
|
return api.decode(tokens);
|
|
401870
401956
|
}
|
|
401957
|
+
function validateTokenLimit(tokenLimit) {
|
|
401958
|
+
if (!Number.isFinite(tokenLimit)) {
|
|
401959
|
+
throw new Error("tokenLimit must be a finite number");
|
|
401960
|
+
}
|
|
401961
|
+
if (!Number.isInteger(tokenLimit)) {
|
|
401962
|
+
throw new Error("tokenLimit must be an integer");
|
|
401963
|
+
}
|
|
401964
|
+
if (tokenLimit < 0) {
|
|
401965
|
+
throw new Error("tokenLimit must be non-negative");
|
|
401966
|
+
}
|
|
401967
|
+
}
|
|
401968
|
+
function rejectNonOpenAIModel(model) {
|
|
401969
|
+
if (!model) return;
|
|
401970
|
+
if (model.startsWith("claude-")) {
|
|
401971
|
+
throw new Error(
|
|
401972
|
+
`Model "${model}" is an Anthropic model. isWithinTokenLimit only supports OpenAI models. Use the Anthropic API's count_tokens endpoint via estimateAsync() instead.`
|
|
401973
|
+
);
|
|
401974
|
+
}
|
|
401975
|
+
if (model.startsWith("gemini-")) {
|
|
401976
|
+
throw new Error(
|
|
401977
|
+
`Model "${model}" is a Google model. isWithinTokenLimit only supports OpenAI models. Use the Gemini API's countTokens endpoint via estimateAsync() instead.`
|
|
401978
|
+
);
|
|
401979
|
+
}
|
|
401980
|
+
}
|
|
401981
|
+
function isWithinTokenLimit(text, tokenLimit, options) {
|
|
401982
|
+
validateTokenLimit(tokenLimit);
|
|
401983
|
+
rejectNonOpenAIModel(options?.model);
|
|
401984
|
+
const encoding = resolveEncoding(options);
|
|
401985
|
+
const api = getTokenizer(encoding);
|
|
401986
|
+
const allowedSpecial = resolveAllowedSpecial(options?.allowSpecial);
|
|
401987
|
+
const result = api.encodeTextWithLimit(text, tokenLimit, allowedSpecial);
|
|
401988
|
+
return result.exceeded ? false : result.count;
|
|
401989
|
+
}
|
|
401871
401990
|
|
|
401872
401991
|
// src/token-counter.ts
|
|
401873
401992
|
function isNonOpenAIModel(model) {
|
|
@@ -405144,6 +405263,250 @@ function countChatCompletionTokens(input) {
|
|
|
405144
405263
|
}
|
|
405145
405264
|
return result;
|
|
405146
405265
|
}
|
|
405266
|
+
function validateTokenLimit2(tokenLimit) {
|
|
405267
|
+
if (!Number.isFinite(tokenLimit)) {
|
|
405268
|
+
throw new Error("tokenLimit must be a finite number");
|
|
405269
|
+
}
|
|
405270
|
+
if (!Number.isInteger(tokenLimit)) {
|
|
405271
|
+
throw new Error("tokenLimit must be an integer");
|
|
405272
|
+
}
|
|
405273
|
+
if (tokenLimit < 0) {
|
|
405274
|
+
throw new Error("tokenLimit must be non-negative");
|
|
405275
|
+
}
|
|
405276
|
+
}
|
|
405277
|
+
function isChatWithinTokenLimit(input) {
|
|
405278
|
+
const { messages, model, tokenLimit, encoding, functions, function_call } = input;
|
|
405279
|
+
validateTokenLimit2(tokenLimit);
|
|
405280
|
+
validateNoToolsApi(input);
|
|
405281
|
+
validateMessages(messages);
|
|
405282
|
+
validateOpenAIModel(model, encoding);
|
|
405283
|
+
const resolvedEncoding = encoding ?? getOpenAIEncoding({ model });
|
|
405284
|
+
const api = getTokenizer(resolvedEncoding);
|
|
405285
|
+
let count = COMPLETION_REQUEST_TOKEN_OVERHEAD;
|
|
405286
|
+
if (count > tokenLimit) return false;
|
|
405287
|
+
const hasFunctions = Boolean(functions?.length);
|
|
405288
|
+
const hasSystemMessage = messages.some((m) => m.role === "system");
|
|
405289
|
+
if (hasFunctions && functions) {
|
|
405290
|
+
const formatted = formatFunctionDefinitions(functions);
|
|
405291
|
+
const funcResult = api.encodeTextWithLimit(
|
|
405292
|
+
formatted,
|
|
405293
|
+
tokenLimit - count,
|
|
405294
|
+
"skip"
|
|
405295
|
+
);
|
|
405296
|
+
if (funcResult.exceeded) return false;
|
|
405297
|
+
let funcOverhead = funcResult.count + FUNCTION_DEFINITION_TOKEN_OVERHEAD;
|
|
405298
|
+
if (hasSystemMessage) {
|
|
405299
|
+
funcOverhead -= SYSTEM_FUNCTION_TOKEN_DEDUCTION;
|
|
405300
|
+
}
|
|
405301
|
+
count += funcOverhead;
|
|
405302
|
+
if (count > tokenLimit) return false;
|
|
405303
|
+
}
|
|
405304
|
+
if (function_call && function_call !== "auto") {
|
|
405305
|
+
if (function_call === "none") {
|
|
405306
|
+
count += FUNCTION_CALL_NONE_TOKEN_OVERHEAD;
|
|
405307
|
+
} else if (typeof function_call === "object" && function_call.name) {
|
|
405308
|
+
const fcNameResult = api.encodeTextWithLimit(
|
|
405309
|
+
function_call.name,
|
|
405310
|
+
tokenLimit - count,
|
|
405311
|
+
"skip"
|
|
405312
|
+
);
|
|
405313
|
+
if (fcNameResult.exceeded) return false;
|
|
405314
|
+
count += fcNameResult.count + FUNCTION_CALL_NAME_TOKEN_OVERHEAD;
|
|
405315
|
+
}
|
|
405316
|
+
if (count > tokenLimit) return false;
|
|
405317
|
+
}
|
|
405318
|
+
let systemPadded = false;
|
|
405319
|
+
for (const message of messages) {
|
|
405320
|
+
let overhead = MESSAGE_TOKEN_OVERHEAD;
|
|
405321
|
+
if (message.role) {
|
|
405322
|
+
const roleResult = api.encodeTextWithLimit(
|
|
405323
|
+
message.role,
|
|
405324
|
+
tokenLimit - count,
|
|
405325
|
+
"skip"
|
|
405326
|
+
);
|
|
405327
|
+
if (roleResult.exceeded) return false;
|
|
405328
|
+
count += roleResult.count;
|
|
405329
|
+
}
|
|
405330
|
+
let content = message.content ?? "";
|
|
405331
|
+
if (hasFunctions && message.role === "system" && !systemPadded) {
|
|
405332
|
+
if (content && !content.endsWith("\n")) {
|
|
405333
|
+
content = content + "\n";
|
|
405334
|
+
}
|
|
405335
|
+
systemPadded = true;
|
|
405336
|
+
}
|
|
405337
|
+
if (content) {
|
|
405338
|
+
const contentResult = api.encodeTextWithLimit(
|
|
405339
|
+
content,
|
|
405340
|
+
tokenLimit - count,
|
|
405341
|
+
"skip"
|
|
405342
|
+
);
|
|
405343
|
+
if (contentResult.exceeded) return false;
|
|
405344
|
+
count += contentResult.count;
|
|
405345
|
+
}
|
|
405346
|
+
if (message.name) {
|
|
405347
|
+
const nameResult = api.encodeTextWithLimit(
|
|
405348
|
+
message.name,
|
|
405349
|
+
tokenLimit - count,
|
|
405350
|
+
"skip"
|
|
405351
|
+
);
|
|
405352
|
+
if (nameResult.exceeded) return false;
|
|
405353
|
+
count += nameResult.count;
|
|
405354
|
+
overhead += MESSAGE_NAME_TOKEN_OVERHEAD;
|
|
405355
|
+
}
|
|
405356
|
+
if (message.function_call) {
|
|
405357
|
+
if (message.function_call.name) {
|
|
405358
|
+
const fcNameResult = api.encodeTextWithLimit(
|
|
405359
|
+
message.function_call.name,
|
|
405360
|
+
tokenLimit - count,
|
|
405361
|
+
"skip"
|
|
405362
|
+
);
|
|
405363
|
+
if (fcNameResult.exceeded) return false;
|
|
405364
|
+
count += fcNameResult.count;
|
|
405365
|
+
}
|
|
405366
|
+
if (message.function_call.arguments) {
|
|
405367
|
+
const fcArgsResult = api.encodeTextWithLimit(
|
|
405368
|
+
message.function_call.arguments,
|
|
405369
|
+
tokenLimit - count,
|
|
405370
|
+
"skip"
|
|
405371
|
+
);
|
|
405372
|
+
if (fcArgsResult.exceeded) return false;
|
|
405373
|
+
count += fcArgsResult.count;
|
|
405374
|
+
}
|
|
405375
|
+
overhead += FUNCTION_CALL_METADATA_TOKEN_OVERHEAD;
|
|
405376
|
+
}
|
|
405377
|
+
if (message.role === "function") {
|
|
405378
|
+
overhead -= FUNCTION_ROLE_TOKEN_DISCOUNT;
|
|
405379
|
+
}
|
|
405380
|
+
count += overhead;
|
|
405381
|
+
if (count > tokenLimit) return false;
|
|
405382
|
+
}
|
|
405383
|
+
return count;
|
|
405384
|
+
}
|
|
405385
|
+
|
|
405386
|
+
// src/encode-chat.ts
|
|
405387
|
+
var CHAT_TOKENS = {
|
|
405388
|
+
cl100k_base: { imStart: 100264, imEnd: 100265, imSep: 100266 },
|
|
405389
|
+
o200k_base: { imStart: 200264, imEnd: 200265, imSep: 200266 }
|
|
405390
|
+
};
|
|
405391
|
+
var HARMONY_TOKENS = {
|
|
405392
|
+
start: 200006,
|
|
405393
|
+
end: 200007,
|
|
405394
|
+
message: 200008
|
|
405395
|
+
};
|
|
405396
|
+
function encodeChat(messages, options) {
|
|
405397
|
+
const { model, encoding: encodingOverride, primeAssistant = true } = options ?? {};
|
|
405398
|
+
validateChatModel(model, encodingOverride);
|
|
405399
|
+
const encoding = encodingOverride ?? (model ? getOpenAIEncoding({ model }) : "o200k_base");
|
|
405400
|
+
if (encoding === "o200k_harmony") {
|
|
405401
|
+
console.warn(
|
|
405402
|
+
"[ai-token-estimator] o200k_harmony support is experimental. Token structure may not match actual API behavior."
|
|
405403
|
+
);
|
|
405404
|
+
}
|
|
405405
|
+
const chatTokens = getChatTokens(encoding);
|
|
405406
|
+
if (!chatTokens) {
|
|
405407
|
+
throw new Error(
|
|
405408
|
+
`Encoding "${encoding}" does not support chat format. Use cl100k_base or o200k_base for chat models.`
|
|
405409
|
+
);
|
|
405410
|
+
}
|
|
405411
|
+
const { imStart, imEnd, imSep } = chatTokens;
|
|
405412
|
+
const tokens = [];
|
|
405413
|
+
for (const message of messages) {
|
|
405414
|
+
validateMessage(message);
|
|
405415
|
+
tokens.push(imStart);
|
|
405416
|
+
let roleStr;
|
|
405417
|
+
if (message.role === "function" && message.name) {
|
|
405418
|
+
roleStr = message.name;
|
|
405419
|
+
} else if (message.name) {
|
|
405420
|
+
roleStr = `${message.role}:${message.name}`;
|
|
405421
|
+
} else {
|
|
405422
|
+
roleStr = message.role;
|
|
405423
|
+
}
|
|
405424
|
+
tokens.push(...encode(roleStr, { encoding, allowSpecial: "none" }));
|
|
405425
|
+
tokens.push(imSep);
|
|
405426
|
+
if (message.content) {
|
|
405427
|
+
tokens.push(
|
|
405428
|
+
...encode(message.content, { encoding, allowSpecial: "none" })
|
|
405429
|
+
);
|
|
405430
|
+
}
|
|
405431
|
+
if (message.function_call) {
|
|
405432
|
+
const fcContent = formatFunctionCall(message.function_call);
|
|
405433
|
+
tokens.push(...encode(fcContent, { encoding, allowSpecial: "none" }));
|
|
405434
|
+
}
|
|
405435
|
+
tokens.push(imEnd);
|
|
405436
|
+
}
|
|
405437
|
+
if (primeAssistant) {
|
|
405438
|
+
tokens.push(imStart);
|
|
405439
|
+
tokens.push(...encode("assistant", { encoding, allowSpecial: "none" }));
|
|
405440
|
+
tokens.push(imSep);
|
|
405441
|
+
}
|
|
405442
|
+
return tokens;
|
|
405443
|
+
}
|
|
405444
|
+
function validateChatModel(model, encodingOverride) {
|
|
405445
|
+
if (model) {
|
|
405446
|
+
if (isAnthropicModel(model)) {
|
|
405447
|
+
throw new Error(
|
|
405448
|
+
`Model "${model}" is an Anthropic model. encodeChat only supports OpenAI models.`
|
|
405449
|
+
);
|
|
405450
|
+
}
|
|
405451
|
+
if (isGoogleModel(model)) {
|
|
405452
|
+
throw new Error(
|
|
405453
|
+
`Model "${model}" is a Google model. encodeChat only supports OpenAI models.`
|
|
405454
|
+
);
|
|
405455
|
+
}
|
|
405456
|
+
if (isKnownModel(model) && !isChatModel(model)) {
|
|
405457
|
+
throw new Error(
|
|
405458
|
+
`Model "${model}" is not a chat completion model. encodeChat only supports chat models (e.g., gpt-4o, gpt-3.5-turbo).`
|
|
405459
|
+
);
|
|
405460
|
+
}
|
|
405461
|
+
}
|
|
405462
|
+
if (encodingOverride) {
|
|
405463
|
+
return;
|
|
405464
|
+
}
|
|
405465
|
+
if (!model) {
|
|
405466
|
+
throw new Error(
|
|
405467
|
+
"Either model or encoding must be provided. Provide a known OpenAI chat model (e.g., gpt-4o) or an explicit encoding (e.g., o200k_base)."
|
|
405468
|
+
);
|
|
405469
|
+
}
|
|
405470
|
+
if (!isChatModel(model)) {
|
|
405471
|
+
throw new Error(
|
|
405472
|
+
`Model "${model}" is not recognized as an OpenAI chat model. If this is a new OpenAI model, provide the encoding option explicitly (e.g., encoding: "o200k_base").`
|
|
405473
|
+
);
|
|
405474
|
+
}
|
|
405475
|
+
}
|
|
405476
|
+
function validateMessage(message) {
|
|
405477
|
+
const msgAny = message;
|
|
405478
|
+
if ("tool_calls" in msgAny && msgAny.tool_calls !== void 0) {
|
|
405479
|
+
throw new Error(
|
|
405480
|
+
"tool_calls is not supported. Use function_call with the legacy functions API."
|
|
405481
|
+
);
|
|
405482
|
+
}
|
|
405483
|
+
if ("tool_call_id" in msgAny && msgAny.tool_call_id !== void 0) {
|
|
405484
|
+
throw new Error(
|
|
405485
|
+
"tool_call_id is not supported. Use the legacy functions API."
|
|
405486
|
+
);
|
|
405487
|
+
}
|
|
405488
|
+
if (message.content !== null && message.content !== void 0 && typeof message.content !== "string") {
|
|
405489
|
+
throw new Error(
|
|
405490
|
+
"Multimodal content (arrays) is not supported. Only text content is supported."
|
|
405491
|
+
);
|
|
405492
|
+
}
|
|
405493
|
+
}
|
|
405494
|
+
function getChatTokens(encoding) {
|
|
405495
|
+
if (encoding === "o200k_harmony") {
|
|
405496
|
+
return {
|
|
405497
|
+
imStart: HARMONY_TOKENS.start,
|
|
405498
|
+
imEnd: HARMONY_TOKENS.end,
|
|
405499
|
+
imSep: HARMONY_TOKENS.message
|
|
405500
|
+
};
|
|
405501
|
+
}
|
|
405502
|
+
return CHAT_TOKENS[encoding] ?? null;
|
|
405503
|
+
}
|
|
405504
|
+
function formatFunctionCall(fc) {
|
|
405505
|
+
const parts = [];
|
|
405506
|
+
if (fc.name) parts.push(fc.name);
|
|
405507
|
+
if (fc.arguments) parts.push(fc.arguments);
|
|
405508
|
+
return parts.join("\n");
|
|
405509
|
+
}
|
|
405147
405510
|
export {
|
|
405148
405511
|
DEFAULT_MODELS,
|
|
405149
405512
|
LAST_UPDATED,
|
|
@@ -405159,6 +405522,7 @@ export {
|
|
|
405159
405522
|
decodeSentencePiece,
|
|
405160
405523
|
decodeSentencePieceAsync,
|
|
405161
405524
|
encode,
|
|
405525
|
+
encodeChat,
|
|
405162
405526
|
encodeSentencePiece,
|
|
405163
405527
|
encodeSentencePieceAsync,
|
|
405164
405528
|
ensureSentencePieceModel,
|
|
@@ -405172,6 +405536,8 @@ export {
|
|
|
405172
405536
|
getOpenAIEncoding,
|
|
405173
405537
|
getSentencePieceTokenizer,
|
|
405174
405538
|
getTotalCost,
|
|
405539
|
+
isChatWithinTokenLimit,
|
|
405540
|
+
isWithinTokenLimit,
|
|
405175
405541
|
loadSentencePieceTokenizer,
|
|
405176
405542
|
parseModelProto
|
|
405177
405543
|
};
|