ai-token-estimator 1.5.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +58 -0
- package/dist/index.cjs +135 -0
- package/dist/index.d.cts +49 -1
- package/dist/index.d.ts +49 -1
- package/dist/index.js +134 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -11,6 +11,7 @@ The best way to estimate **tokens + input cost** for LLM calls — with **exact
|
|
|
11
11
|
## Features
|
|
12
12
|
|
|
13
13
|
- **Exact OpenAI tokenization** (tiktoken-compatible BPE): `encode()` / `decode()` / `openai_exact`
|
|
14
|
+
- **Chat-aware tokenization**: `encodeChat()` returns exact token IDs for chat messages using ChatML format
|
|
14
15
|
- **OpenAI chat completion token counting** (legacy `functions` API): `countChatCompletionTokens()` with optional per-message breakdown
|
|
15
16
|
- **Pure TypeScript SentencePiece tokenizer** (no native dependencies):
|
|
16
17
|
- Supports `.model` files (protobuf format)
|
|
@@ -64,6 +65,21 @@ console.log(countTokens({ text: 'Hello, world!', model: 'gpt-5.1' }));
|
|
|
64
65
|
|
|
65
66
|
## Quick Recipes
|
|
66
67
|
|
|
68
|
+
### Encode chat messages to tokens (ChatML format)
|
|
69
|
+
|
|
70
|
+
```ts
|
|
71
|
+
import { encodeChat, decode } from 'ai-token-estimator';
|
|
72
|
+
|
|
73
|
+
const tokens = encodeChat([
|
|
74
|
+
{ role: 'system', content: 'You are helpful.' },
|
|
75
|
+
{ role: 'user', content: 'Hello!' }
|
|
76
|
+
], { model: 'gpt-4o' });
|
|
77
|
+
|
|
78
|
+
console.log(tokens); // [200264, 9125, 200266, 2610, 525, 11190, 13, 200265, ...]
|
|
79
|
+
console.log(decode(tokens, { encoding: 'o200k_base' }));
|
|
80
|
+
// <|im_start|>system<|im_sep|>You are helpful.<|im_end|>...
|
|
81
|
+
```
|
|
82
|
+
|
|
67
83
|
### OpenAI chat completion tokens (legacy functions API)
|
|
68
84
|
|
|
69
85
|
```ts
|
|
@@ -561,6 +577,48 @@ Encodes text into **OpenAI token IDs** using tiktoken-compatible BPE tokenizatio
|
|
|
561
577
|
|
|
562
578
|
Decodes OpenAI token IDs back into text using the selected encoding/model.
|
|
563
579
|
|
|
580
|
+
### `encodeChat(messages: ChatMessage[], options?: EncodeChatOptions): number[]`
|
|
581
|
+
|
|
582
|
+
Encodes chat messages into **exact token IDs** using ChatML format. Returns the ChatML message prompt tokens (messages + optional assistant priming), including special delimiter tokens (`<|im_start|>`, `<|im_sep|>`, `<|im_end|>`).
|
|
583
|
+
|
|
584
|
+
```ts
|
|
585
|
+
import { encodeChat, decode } from 'ai-token-estimator';
|
|
586
|
+
|
|
587
|
+
const tokens = encodeChat([
|
|
588
|
+
{ role: 'system', content: 'You are helpful.' },
|
|
589
|
+
{ role: 'user', content: 'Hello!' }
|
|
590
|
+
], { model: 'gpt-4o' });
|
|
591
|
+
|
|
592
|
+
// Tokens include ChatML structure:
|
|
593
|
+
// <|im_start|>system<|im_sep|>You are helpful.<|im_end|>
|
|
594
|
+
// <|im_start|>user<|im_sep|>Hello!<|im_end|>
|
|
595
|
+
// <|im_start|>assistant<|im_sep|> (priming)
|
|
596
|
+
```
|
|
597
|
+
|
|
598
|
+
**Parameters:**
|
|
599
|
+
|
|
600
|
+
```typescript
|
|
601
|
+
interface EncodeChatOptions {
|
|
602
|
+
model?: string; // OpenAI model (e.g., 'gpt-4o')
|
|
603
|
+
encoding?: OpenAIEncoding; // Explicit encoding override
|
|
604
|
+
primeAssistant?: boolean; // Append assistant priming (default: true)
|
|
605
|
+
}
|
|
606
|
+
```
|
|
607
|
+
|
|
608
|
+
**Supported encodings:**
|
|
609
|
+
- `cl100k_base` (GPT-4, GPT-3.5-turbo)
|
|
610
|
+
- `o200k_base` (GPT-4o, GPT-4o-mini)
|
|
611
|
+
- `o200k_harmony` (experimental)
|
|
612
|
+
|
|
613
|
+
**Limitations:**
|
|
614
|
+
- **OpenAI models only** — throws for claude-*, gemini-*
|
|
615
|
+
- **Legacy functions API only** — throws for tool_calls, tool_call_id
|
|
616
|
+
- **Text content only** — throws for multimodal content (arrays)
|
|
617
|
+
|
|
618
|
+
**Note on function_call:** Messages with `function_call` are encoded with the function name and arguments as content. The token count differs from `countChatCompletionTokens()` because the latter includes `FUNCTION_CALL_METADATA_TOKEN_OVERHEAD` (3 tokens) for API accounting. The exact difference depends on whether both name and arguments are present (2 token difference due to newline separator) or only one field is present (3 token difference).
|
|
619
|
+
|
|
620
|
+
**Note on o200k_harmony:** Support for `o200k_harmony` encoding is experimental. The token structure may not match actual API behavior.
|
|
621
|
+
|
|
564
622
|
### `isWithinTokenLimit(text, tokenLimit, options?): false | number`
|
|
565
623
|
|
|
566
624
|
Checks if text is within a token limit with **early exit optimization**. Returns `false` if the limit is exceeded, or the actual token count if within limit.
|
package/dist/index.cjs
CHANGED
|
@@ -44,6 +44,7 @@ __export(index_exports, {
|
|
|
44
44
|
decodeSentencePiece: () => decodeSentencePiece,
|
|
45
45
|
decodeSentencePieceAsync: () => decodeSentencePieceAsync,
|
|
46
46
|
encode: () => encode,
|
|
47
|
+
encodeChat: () => encodeChat,
|
|
47
48
|
encodeSentencePiece: () => encodeSentencePiece,
|
|
48
49
|
encodeSentencePieceAsync: () => encodeSentencePieceAsync,
|
|
49
50
|
ensureSentencePieceModel: () => ensureSentencePieceModel,
|
|
@@ -849,10 +850,18 @@ var CL100K_BASE_SPECIAL_TOKENS = [
|
|
|
849
850
|
["<|fim_prefix|>", 100258],
|
|
850
851
|
["<|fim_middle|>", 100259],
|
|
851
852
|
["<|fim_suffix|>", 100260],
|
|
853
|
+
// ChatML tokens for chat completion
|
|
854
|
+
["<|im_start|>", 100264],
|
|
855
|
+
["<|im_end|>", 100265],
|
|
856
|
+
["<|im_sep|>", 100266],
|
|
852
857
|
["<|endofprompt|>", 100276]
|
|
853
858
|
];
|
|
854
859
|
var O200K_BASE_SPECIAL_TOKENS = [
|
|
855
860
|
["<|endoftext|>", 199999],
|
|
861
|
+
// ChatML tokens for chat completion
|
|
862
|
+
["<|im_start|>", 200264],
|
|
863
|
+
["<|im_end|>", 200265],
|
|
864
|
+
["<|im_sep|>", 200266],
|
|
856
865
|
["<|endofprompt|>", 200018]
|
|
857
866
|
];
|
|
858
867
|
function buildO200kHarmonySpecialTokens() {
|
|
@@ -405440,6 +405449,131 @@ function isChatWithinTokenLimit(input) {
|
|
|
405440
405449
|
}
|
|
405441
405450
|
return count;
|
|
405442
405451
|
}
|
|
405452
|
+
|
|
405453
|
+
// src/encode-chat.ts
|
|
405454
|
+
var CHAT_TOKENS = {
|
|
405455
|
+
cl100k_base: { imStart: 100264, imEnd: 100265, imSep: 100266 },
|
|
405456
|
+
o200k_base: { imStart: 200264, imEnd: 200265, imSep: 200266 }
|
|
405457
|
+
};
|
|
405458
|
+
var HARMONY_TOKENS = {
|
|
405459
|
+
start: 200006,
|
|
405460
|
+
end: 200007,
|
|
405461
|
+
message: 200008
|
|
405462
|
+
};
|
|
405463
|
+
function encodeChat(messages, options) {
|
|
405464
|
+
const { model, encoding: encodingOverride, primeAssistant = true } = options ?? {};
|
|
405465
|
+
validateChatModel(model, encodingOverride);
|
|
405466
|
+
const encoding = encodingOverride ?? (model ? getOpenAIEncoding({ model }) : "o200k_base");
|
|
405467
|
+
if (encoding === "o200k_harmony") {
|
|
405468
|
+
console.warn(
|
|
405469
|
+
"[ai-token-estimator] o200k_harmony support is experimental. Token structure may not match actual API behavior."
|
|
405470
|
+
);
|
|
405471
|
+
}
|
|
405472
|
+
const chatTokens = getChatTokens(encoding);
|
|
405473
|
+
if (!chatTokens) {
|
|
405474
|
+
throw new Error(
|
|
405475
|
+
`Encoding "${encoding}" does not support chat format. Use cl100k_base or o200k_base for chat models.`
|
|
405476
|
+
);
|
|
405477
|
+
}
|
|
405478
|
+
const { imStart, imEnd, imSep } = chatTokens;
|
|
405479
|
+
const tokens = [];
|
|
405480
|
+
for (const message of messages) {
|
|
405481
|
+
validateMessage(message);
|
|
405482
|
+
tokens.push(imStart);
|
|
405483
|
+
let roleStr;
|
|
405484
|
+
if (message.role === "function" && message.name) {
|
|
405485
|
+
roleStr = message.name;
|
|
405486
|
+
} else if (message.name) {
|
|
405487
|
+
roleStr = `${message.role}:${message.name}`;
|
|
405488
|
+
} else {
|
|
405489
|
+
roleStr = message.role;
|
|
405490
|
+
}
|
|
405491
|
+
tokens.push(...encode(roleStr, { encoding, allowSpecial: "none" }));
|
|
405492
|
+
tokens.push(imSep);
|
|
405493
|
+
if (message.content) {
|
|
405494
|
+
tokens.push(
|
|
405495
|
+
...encode(message.content, { encoding, allowSpecial: "none" })
|
|
405496
|
+
);
|
|
405497
|
+
}
|
|
405498
|
+
if (message.function_call) {
|
|
405499
|
+
const fcContent = formatFunctionCall(message.function_call);
|
|
405500
|
+
tokens.push(...encode(fcContent, { encoding, allowSpecial: "none" }));
|
|
405501
|
+
}
|
|
405502
|
+
tokens.push(imEnd);
|
|
405503
|
+
}
|
|
405504
|
+
if (primeAssistant) {
|
|
405505
|
+
tokens.push(imStart);
|
|
405506
|
+
tokens.push(...encode("assistant", { encoding, allowSpecial: "none" }));
|
|
405507
|
+
tokens.push(imSep);
|
|
405508
|
+
}
|
|
405509
|
+
return tokens;
|
|
405510
|
+
}
|
|
405511
|
+
function validateChatModel(model, encodingOverride) {
|
|
405512
|
+
if (model) {
|
|
405513
|
+
if (isAnthropicModel(model)) {
|
|
405514
|
+
throw new Error(
|
|
405515
|
+
`Model "${model}" is an Anthropic model. encodeChat only supports OpenAI models.`
|
|
405516
|
+
);
|
|
405517
|
+
}
|
|
405518
|
+
if (isGoogleModel(model)) {
|
|
405519
|
+
throw new Error(
|
|
405520
|
+
`Model "${model}" is a Google model. encodeChat only supports OpenAI models.`
|
|
405521
|
+
);
|
|
405522
|
+
}
|
|
405523
|
+
if (isKnownModel(model) && !isChatModel(model)) {
|
|
405524
|
+
throw new Error(
|
|
405525
|
+
`Model "${model}" is not a chat completion model. encodeChat only supports chat models (e.g., gpt-4o, gpt-3.5-turbo).`
|
|
405526
|
+
);
|
|
405527
|
+
}
|
|
405528
|
+
}
|
|
405529
|
+
if (encodingOverride) {
|
|
405530
|
+
return;
|
|
405531
|
+
}
|
|
405532
|
+
if (!model) {
|
|
405533
|
+
throw new Error(
|
|
405534
|
+
"Either model or encoding must be provided. Provide a known OpenAI chat model (e.g., gpt-4o) or an explicit encoding (e.g., o200k_base)."
|
|
405535
|
+
);
|
|
405536
|
+
}
|
|
405537
|
+
if (!isChatModel(model)) {
|
|
405538
|
+
throw new Error(
|
|
405539
|
+
`Model "${model}" is not recognized as an OpenAI chat model. If this is a new OpenAI model, provide the encoding option explicitly (e.g., encoding: "o200k_base").`
|
|
405540
|
+
);
|
|
405541
|
+
}
|
|
405542
|
+
}
|
|
405543
|
+
function validateMessage(message) {
|
|
405544
|
+
const msgAny = message;
|
|
405545
|
+
if ("tool_calls" in msgAny && msgAny.tool_calls !== void 0) {
|
|
405546
|
+
throw new Error(
|
|
405547
|
+
"tool_calls is not supported. Use function_call with the legacy functions API."
|
|
405548
|
+
);
|
|
405549
|
+
}
|
|
405550
|
+
if ("tool_call_id" in msgAny && msgAny.tool_call_id !== void 0) {
|
|
405551
|
+
throw new Error(
|
|
405552
|
+
"tool_call_id is not supported. Use the legacy functions API."
|
|
405553
|
+
);
|
|
405554
|
+
}
|
|
405555
|
+
if (message.content !== null && message.content !== void 0 && typeof message.content !== "string") {
|
|
405556
|
+
throw new Error(
|
|
405557
|
+
"Multimodal content (arrays) is not supported. Only text content is supported."
|
|
405558
|
+
);
|
|
405559
|
+
}
|
|
405560
|
+
}
|
|
405561
|
+
function getChatTokens(encoding) {
|
|
405562
|
+
if (encoding === "o200k_harmony") {
|
|
405563
|
+
return {
|
|
405564
|
+
imStart: HARMONY_TOKENS.start,
|
|
405565
|
+
imEnd: HARMONY_TOKENS.end,
|
|
405566
|
+
imSep: HARMONY_TOKENS.message
|
|
405567
|
+
};
|
|
405568
|
+
}
|
|
405569
|
+
return CHAT_TOKENS[encoding] ?? null;
|
|
405570
|
+
}
|
|
405571
|
+
function formatFunctionCall(fc) {
|
|
405572
|
+
const parts = [];
|
|
405573
|
+
if (fc.name) parts.push(fc.name);
|
|
405574
|
+
if (fc.arguments) parts.push(fc.arguments);
|
|
405575
|
+
return parts.join("\n");
|
|
405576
|
+
}
|
|
405443
405577
|
// Annotate the CommonJS export names for ESM import in node:
|
|
405444
405578
|
0 && (module.exports = {
|
|
405445
405579
|
DEFAULT_MODELS,
|
|
@@ -405456,6 +405590,7 @@ function isChatWithinTokenLimit(input) {
|
|
|
405456
405590
|
decodeSentencePiece,
|
|
405457
405591
|
decodeSentencePieceAsync,
|
|
405458
405592
|
encode,
|
|
405593
|
+
encodeChat,
|
|
405459
405594
|
encodeSentencePiece,
|
|
405460
405595
|
encodeSentencePieceAsync,
|
|
405461
405596
|
ensureSentencePieceModel,
|
package/dist/index.d.cts
CHANGED
|
@@ -641,6 +641,54 @@ interface IsChatWithinTokenLimitInput {
|
|
|
641
641
|
*/
|
|
642
642
|
declare function isChatWithinTokenLimit(input: IsChatWithinTokenLimitInput): false | number;
|
|
643
643
|
|
|
644
|
+
/**
|
|
645
|
+
* Chat-aware tokenization using ChatML format.
|
|
646
|
+
*
|
|
647
|
+
* Encodes chat messages into ChatML message prompt tokens including special
|
|
648
|
+
* delimiter tokens (<|im_start|>, <|im_sep|>, <|im_end|>).
|
|
649
|
+
*/
|
|
650
|
+
|
|
651
|
+
/**
|
|
652
|
+
* Options for encodeChat.
|
|
653
|
+
*/
|
|
654
|
+
interface EncodeChatOptions {
|
|
655
|
+
/**
|
|
656
|
+
* OpenAI model ID used to select the appropriate encoding.
|
|
657
|
+
* Note: Non-OpenAI models (claude-*, gemini-*) are rejected.
|
|
658
|
+
*/
|
|
659
|
+
model?: string;
|
|
660
|
+
/**
|
|
661
|
+
* Explicit OpenAI encoding override.
|
|
662
|
+
* When provided, this takes precedence over `model`.
|
|
663
|
+
*/
|
|
664
|
+
encoding?: OpenAIEncoding;
|
|
665
|
+
/**
|
|
666
|
+
* Prime the output with the start of an assistant response.
|
|
667
|
+
* When true (default), appends <|im_start|>assistant<|im_sep|> at the end.
|
|
668
|
+
* Set to false to get just the messages without assistant priming.
|
|
669
|
+
*/
|
|
670
|
+
primeAssistant?: boolean;
|
|
671
|
+
}
|
|
672
|
+
/**
|
|
673
|
+
* Encode chat messages into token IDs using ChatML format.
|
|
674
|
+
*
|
|
675
|
+
* Returns the exact token sequence that OpenAI models expect for chat
|
|
676
|
+
* completions, including special delimiter tokens.
|
|
677
|
+
*
|
|
678
|
+
* @param messages - Array of chat messages
|
|
679
|
+
* @param options - Encoding options
|
|
680
|
+
* @returns Token IDs representing the chat prompt
|
|
681
|
+
*
|
|
682
|
+
* @example
|
|
683
|
+
* ```typescript
|
|
684
|
+
* const tokens = encodeChat([
|
|
685
|
+
* { role: 'system', content: 'You are helpful.' },
|
|
686
|
+
* { role: 'user', content: 'Hello!' }
|
|
687
|
+
* ], { model: 'gpt-4o' });
|
|
688
|
+
* ```
|
|
689
|
+
*/
|
|
690
|
+
declare function encodeChat(messages: ChatMessage[], options?: EncodeChatOptions): number[];
|
|
691
|
+
|
|
644
692
|
interface AnthropicCountTokensParams {
|
|
645
693
|
/** Claude model id, e.g. `claude-sonnet-4-5` */
|
|
646
694
|
model: string;
|
|
@@ -916,4 +964,4 @@ declare function clearModelCache(): void;
|
|
|
916
964
|
*/
|
|
917
965
|
declare function parseModelProto(buffer: Uint8Array): ModelProto;
|
|
918
966
|
|
|
919
|
-
export { type AnthropicCountTokensParams, type ChatCompletionTokenCountInput, type ChatCompletionTokenCountOutput, type ChatMessage, type CostEstimate, DEFAULT_MODELS, type DataOptions, type DownloadOptions, type EncodeOptions, type EstimateAsyncInput, type EstimateCostFromTextAsyncOptions, type EstimateCostFromTextOptions, type EstimateCostInput, type EstimateInput, type EstimateOutput, type FileOptions, type FunctionCallOption, type FunctionDefinition, type FunctionParameterProperty, type FunctionParameters, type GeminiCountTokensParams, type GemmaSentencePieceCountTokensParams, type IsChatWithinTokenLimitInput, type IsWithinTokenLimitOptions, type KnownTokenizer, LAST_UPDATED, type ModelConfig, type ModelInfo, type ModelProto, type NormalizerSpec, type OpenAIEncoding, type SentencePiece, type SentencePieceTokenizer, type SpecialTokenHandling, type TokenCountInput, type TokenCountOutput, type TokenizerMode, type TokenizerModeAsync, type TrainerSpec, clearModelCache, countAnthropicInputTokens, countChatCompletionTokens, countGeminiTokens, countGemmaSentencePieceTokens, countSentencePieceTokens, countSentencePieceTokensAsync, countTokens, decode, decodeSentencePiece, decodeSentencePieceAsync, encode, encodeSentencePiece, encodeSentencePieceAsync, ensureSentencePieceModel, estimate, estimateAsync, estimateCost, estimateCostFromText, estimateCostFromTextAsync, getAvailableModels, getModelConfig, getOpenAIEncoding, getSentencePieceTokenizer, getTotalCost, isChatWithinTokenLimit, isWithinTokenLimit, loadSentencePieceTokenizer, parseModelProto };
|
|
967
|
+
export { type AnthropicCountTokensParams, type ChatCompletionTokenCountInput, type ChatCompletionTokenCountOutput, type ChatMessage, type CostEstimate, DEFAULT_MODELS, type DataOptions, type DownloadOptions, type EncodeChatOptions, type EncodeOptions, type EstimateAsyncInput, type EstimateCostFromTextAsyncOptions, type EstimateCostFromTextOptions, type EstimateCostInput, type EstimateInput, type EstimateOutput, type FileOptions, type FunctionCallOption, type FunctionDefinition, type FunctionParameterProperty, type FunctionParameters, type GeminiCountTokensParams, type GemmaSentencePieceCountTokensParams, type IsChatWithinTokenLimitInput, type IsWithinTokenLimitOptions, type KnownTokenizer, LAST_UPDATED, type ModelConfig, type ModelInfo, type ModelProto, type NormalizerSpec, type OpenAIEncoding, type SentencePiece, type SentencePieceTokenizer, type SpecialTokenHandling, type TokenCountInput, type TokenCountOutput, type TokenizerMode, type TokenizerModeAsync, type TrainerSpec, clearModelCache, countAnthropicInputTokens, countChatCompletionTokens, countGeminiTokens, countGemmaSentencePieceTokens, countSentencePieceTokens, countSentencePieceTokensAsync, countTokens, decode, decodeSentencePiece, decodeSentencePieceAsync, encode, encodeChat, encodeSentencePiece, encodeSentencePieceAsync, ensureSentencePieceModel, estimate, estimateAsync, estimateCost, estimateCostFromText, estimateCostFromTextAsync, getAvailableModels, getModelConfig, getOpenAIEncoding, getSentencePieceTokenizer, getTotalCost, isChatWithinTokenLimit, isWithinTokenLimit, loadSentencePieceTokenizer, parseModelProto };
|
package/dist/index.d.ts
CHANGED
|
@@ -641,6 +641,54 @@ interface IsChatWithinTokenLimitInput {
|
|
|
641
641
|
*/
|
|
642
642
|
declare function isChatWithinTokenLimit(input: IsChatWithinTokenLimitInput): false | number;
|
|
643
643
|
|
|
644
|
+
/**
|
|
645
|
+
* Chat-aware tokenization using ChatML format.
|
|
646
|
+
*
|
|
647
|
+
* Encodes chat messages into ChatML message prompt tokens including special
|
|
648
|
+
* delimiter tokens (<|im_start|>, <|im_sep|>, <|im_end|>).
|
|
649
|
+
*/
|
|
650
|
+
|
|
651
|
+
/**
|
|
652
|
+
* Options for encodeChat.
|
|
653
|
+
*/
|
|
654
|
+
interface EncodeChatOptions {
|
|
655
|
+
/**
|
|
656
|
+
* OpenAI model ID used to select the appropriate encoding.
|
|
657
|
+
* Note: Non-OpenAI models (claude-*, gemini-*) are rejected.
|
|
658
|
+
*/
|
|
659
|
+
model?: string;
|
|
660
|
+
/**
|
|
661
|
+
* Explicit OpenAI encoding override.
|
|
662
|
+
* When provided, this takes precedence over `model`.
|
|
663
|
+
*/
|
|
664
|
+
encoding?: OpenAIEncoding;
|
|
665
|
+
/**
|
|
666
|
+
* Prime the output with the start of an assistant response.
|
|
667
|
+
* When true (default), appends <|im_start|>assistant<|im_sep|> at the end.
|
|
668
|
+
* Set to false to get just the messages without assistant priming.
|
|
669
|
+
*/
|
|
670
|
+
primeAssistant?: boolean;
|
|
671
|
+
}
|
|
672
|
+
/**
|
|
673
|
+
* Encode chat messages into token IDs using ChatML format.
|
|
674
|
+
*
|
|
675
|
+
* Returns the exact token sequence that OpenAI models expect for chat
|
|
676
|
+
* completions, including special delimiter tokens.
|
|
677
|
+
*
|
|
678
|
+
* @param messages - Array of chat messages
|
|
679
|
+
* @param options - Encoding options
|
|
680
|
+
* @returns Token IDs representing the chat prompt
|
|
681
|
+
*
|
|
682
|
+
* @example
|
|
683
|
+
* ```typescript
|
|
684
|
+
* const tokens = encodeChat([
|
|
685
|
+
* { role: 'system', content: 'You are helpful.' },
|
|
686
|
+
* { role: 'user', content: 'Hello!' }
|
|
687
|
+
* ], { model: 'gpt-4o' });
|
|
688
|
+
* ```
|
|
689
|
+
*/
|
|
690
|
+
declare function encodeChat(messages: ChatMessage[], options?: EncodeChatOptions): number[];
|
|
691
|
+
|
|
644
692
|
interface AnthropicCountTokensParams {
|
|
645
693
|
/** Claude model id, e.g. `claude-sonnet-4-5` */
|
|
646
694
|
model: string;
|
|
@@ -916,4 +964,4 @@ declare function clearModelCache(): void;
|
|
|
916
964
|
*/
|
|
917
965
|
declare function parseModelProto(buffer: Uint8Array): ModelProto;
|
|
918
966
|
|
|
919
|
-
export { type AnthropicCountTokensParams, type ChatCompletionTokenCountInput, type ChatCompletionTokenCountOutput, type ChatMessage, type CostEstimate, DEFAULT_MODELS, type DataOptions, type DownloadOptions, type EncodeOptions, type EstimateAsyncInput, type EstimateCostFromTextAsyncOptions, type EstimateCostFromTextOptions, type EstimateCostInput, type EstimateInput, type EstimateOutput, type FileOptions, type FunctionCallOption, type FunctionDefinition, type FunctionParameterProperty, type FunctionParameters, type GeminiCountTokensParams, type GemmaSentencePieceCountTokensParams, type IsChatWithinTokenLimitInput, type IsWithinTokenLimitOptions, type KnownTokenizer, LAST_UPDATED, type ModelConfig, type ModelInfo, type ModelProto, type NormalizerSpec, type OpenAIEncoding, type SentencePiece, type SentencePieceTokenizer, type SpecialTokenHandling, type TokenCountInput, type TokenCountOutput, type TokenizerMode, type TokenizerModeAsync, type TrainerSpec, clearModelCache, countAnthropicInputTokens, countChatCompletionTokens, countGeminiTokens, countGemmaSentencePieceTokens, countSentencePieceTokens, countSentencePieceTokensAsync, countTokens, decode, decodeSentencePiece, decodeSentencePieceAsync, encode, encodeSentencePiece, encodeSentencePieceAsync, ensureSentencePieceModel, estimate, estimateAsync, estimateCost, estimateCostFromText, estimateCostFromTextAsync, getAvailableModels, getModelConfig, getOpenAIEncoding, getSentencePieceTokenizer, getTotalCost, isChatWithinTokenLimit, isWithinTokenLimit, loadSentencePieceTokenizer, parseModelProto };
|
|
967
|
+
export { type AnthropicCountTokensParams, type ChatCompletionTokenCountInput, type ChatCompletionTokenCountOutput, type ChatMessage, type CostEstimate, DEFAULT_MODELS, type DataOptions, type DownloadOptions, type EncodeChatOptions, type EncodeOptions, type EstimateAsyncInput, type EstimateCostFromTextAsyncOptions, type EstimateCostFromTextOptions, type EstimateCostInput, type EstimateInput, type EstimateOutput, type FileOptions, type FunctionCallOption, type FunctionDefinition, type FunctionParameterProperty, type FunctionParameters, type GeminiCountTokensParams, type GemmaSentencePieceCountTokensParams, type IsChatWithinTokenLimitInput, type IsWithinTokenLimitOptions, type KnownTokenizer, LAST_UPDATED, type ModelConfig, type ModelInfo, type ModelProto, type NormalizerSpec, type OpenAIEncoding, type SentencePiece, type SentencePieceTokenizer, type SpecialTokenHandling, type TokenCountInput, type TokenCountOutput, type TokenizerMode, type TokenizerModeAsync, type TrainerSpec, clearModelCache, countAnthropicInputTokens, countChatCompletionTokens, countGeminiTokens, countGemmaSentencePieceTokens, countSentencePieceTokens, countSentencePieceTokensAsync, countTokens, decode, decodeSentencePiece, decodeSentencePieceAsync, encode, encodeChat, encodeSentencePiece, encodeSentencePieceAsync, ensureSentencePieceModel, estimate, estimateAsync, estimateCost, estimateCostFromText, estimateCostFromTextAsync, getAvailableModels, getModelConfig, getOpenAIEncoding, getSentencePieceTokenizer, getTotalCost, isChatWithinTokenLimit, isWithinTokenLimit, loadSentencePieceTokenizer, parseModelProto };
|
package/dist/index.js
CHANGED
|
@@ -783,10 +783,18 @@ var CL100K_BASE_SPECIAL_TOKENS = [
|
|
|
783
783
|
["<|fim_prefix|>", 100258],
|
|
784
784
|
["<|fim_middle|>", 100259],
|
|
785
785
|
["<|fim_suffix|>", 100260],
|
|
786
|
+
// ChatML tokens for chat completion
|
|
787
|
+
["<|im_start|>", 100264],
|
|
788
|
+
["<|im_end|>", 100265],
|
|
789
|
+
["<|im_sep|>", 100266],
|
|
786
790
|
["<|endofprompt|>", 100276]
|
|
787
791
|
];
|
|
788
792
|
var O200K_BASE_SPECIAL_TOKENS = [
|
|
789
793
|
["<|endoftext|>", 199999],
|
|
794
|
+
// ChatML tokens for chat completion
|
|
795
|
+
["<|im_start|>", 200264],
|
|
796
|
+
["<|im_end|>", 200265],
|
|
797
|
+
["<|im_sep|>", 200266],
|
|
790
798
|
["<|endofprompt|>", 200018]
|
|
791
799
|
];
|
|
792
800
|
function buildO200kHarmonySpecialTokens() {
|
|
@@ -405374,6 +405382,131 @@ function isChatWithinTokenLimit(input) {
|
|
|
405374
405382
|
}
|
|
405375
405383
|
return count;
|
|
405376
405384
|
}
|
|
405385
|
+
|
|
405386
|
+
// src/encode-chat.ts
|
|
405387
|
+
var CHAT_TOKENS = {
|
|
405388
|
+
cl100k_base: { imStart: 100264, imEnd: 100265, imSep: 100266 },
|
|
405389
|
+
o200k_base: { imStart: 200264, imEnd: 200265, imSep: 200266 }
|
|
405390
|
+
};
|
|
405391
|
+
var HARMONY_TOKENS = {
|
|
405392
|
+
start: 200006,
|
|
405393
|
+
end: 200007,
|
|
405394
|
+
message: 200008
|
|
405395
|
+
};
|
|
405396
|
+
function encodeChat(messages, options) {
|
|
405397
|
+
const { model, encoding: encodingOverride, primeAssistant = true } = options ?? {};
|
|
405398
|
+
validateChatModel(model, encodingOverride);
|
|
405399
|
+
const encoding = encodingOverride ?? (model ? getOpenAIEncoding({ model }) : "o200k_base");
|
|
405400
|
+
if (encoding === "o200k_harmony") {
|
|
405401
|
+
console.warn(
|
|
405402
|
+
"[ai-token-estimator] o200k_harmony support is experimental. Token structure may not match actual API behavior."
|
|
405403
|
+
);
|
|
405404
|
+
}
|
|
405405
|
+
const chatTokens = getChatTokens(encoding);
|
|
405406
|
+
if (!chatTokens) {
|
|
405407
|
+
throw new Error(
|
|
405408
|
+
`Encoding "${encoding}" does not support chat format. Use cl100k_base or o200k_base for chat models.`
|
|
405409
|
+
);
|
|
405410
|
+
}
|
|
405411
|
+
const { imStart, imEnd, imSep } = chatTokens;
|
|
405412
|
+
const tokens = [];
|
|
405413
|
+
for (const message of messages) {
|
|
405414
|
+
validateMessage(message);
|
|
405415
|
+
tokens.push(imStart);
|
|
405416
|
+
let roleStr;
|
|
405417
|
+
if (message.role === "function" && message.name) {
|
|
405418
|
+
roleStr = message.name;
|
|
405419
|
+
} else if (message.name) {
|
|
405420
|
+
roleStr = `${message.role}:${message.name}`;
|
|
405421
|
+
} else {
|
|
405422
|
+
roleStr = message.role;
|
|
405423
|
+
}
|
|
405424
|
+
tokens.push(...encode(roleStr, { encoding, allowSpecial: "none" }));
|
|
405425
|
+
tokens.push(imSep);
|
|
405426
|
+
if (message.content) {
|
|
405427
|
+
tokens.push(
|
|
405428
|
+
...encode(message.content, { encoding, allowSpecial: "none" })
|
|
405429
|
+
);
|
|
405430
|
+
}
|
|
405431
|
+
if (message.function_call) {
|
|
405432
|
+
const fcContent = formatFunctionCall(message.function_call);
|
|
405433
|
+
tokens.push(...encode(fcContent, { encoding, allowSpecial: "none" }));
|
|
405434
|
+
}
|
|
405435
|
+
tokens.push(imEnd);
|
|
405436
|
+
}
|
|
405437
|
+
if (primeAssistant) {
|
|
405438
|
+
tokens.push(imStart);
|
|
405439
|
+
tokens.push(...encode("assistant", { encoding, allowSpecial: "none" }));
|
|
405440
|
+
tokens.push(imSep);
|
|
405441
|
+
}
|
|
405442
|
+
return tokens;
|
|
405443
|
+
}
|
|
405444
|
+
function validateChatModel(model, encodingOverride) {
|
|
405445
|
+
if (model) {
|
|
405446
|
+
if (isAnthropicModel(model)) {
|
|
405447
|
+
throw new Error(
|
|
405448
|
+
`Model "${model}" is an Anthropic model. encodeChat only supports OpenAI models.`
|
|
405449
|
+
);
|
|
405450
|
+
}
|
|
405451
|
+
if (isGoogleModel(model)) {
|
|
405452
|
+
throw new Error(
|
|
405453
|
+
`Model "${model}" is a Google model. encodeChat only supports OpenAI models.`
|
|
405454
|
+
);
|
|
405455
|
+
}
|
|
405456
|
+
if (isKnownModel(model) && !isChatModel(model)) {
|
|
405457
|
+
throw new Error(
|
|
405458
|
+
`Model "${model}" is not a chat completion model. encodeChat only supports chat models (e.g., gpt-4o, gpt-3.5-turbo).`
|
|
405459
|
+
);
|
|
405460
|
+
}
|
|
405461
|
+
}
|
|
405462
|
+
if (encodingOverride) {
|
|
405463
|
+
return;
|
|
405464
|
+
}
|
|
405465
|
+
if (!model) {
|
|
405466
|
+
throw new Error(
|
|
405467
|
+
"Either model or encoding must be provided. Provide a known OpenAI chat model (e.g., gpt-4o) or an explicit encoding (e.g., o200k_base)."
|
|
405468
|
+
);
|
|
405469
|
+
}
|
|
405470
|
+
if (!isChatModel(model)) {
|
|
405471
|
+
throw new Error(
|
|
405472
|
+
`Model "${model}" is not recognized as an OpenAI chat model. If this is a new OpenAI model, provide the encoding option explicitly (e.g., encoding: "o200k_base").`
|
|
405473
|
+
);
|
|
405474
|
+
}
|
|
405475
|
+
}
|
|
405476
|
+
function validateMessage(message) {
|
|
405477
|
+
const msgAny = message;
|
|
405478
|
+
if ("tool_calls" in msgAny && msgAny.tool_calls !== void 0) {
|
|
405479
|
+
throw new Error(
|
|
405480
|
+
"tool_calls is not supported. Use function_call with the legacy functions API."
|
|
405481
|
+
);
|
|
405482
|
+
}
|
|
405483
|
+
if ("tool_call_id" in msgAny && msgAny.tool_call_id !== void 0) {
|
|
405484
|
+
throw new Error(
|
|
405485
|
+
"tool_call_id is not supported. Use the legacy functions API."
|
|
405486
|
+
);
|
|
405487
|
+
}
|
|
405488
|
+
if (message.content !== null && message.content !== void 0 && typeof message.content !== "string") {
|
|
405489
|
+
throw new Error(
|
|
405490
|
+
"Multimodal content (arrays) is not supported. Only text content is supported."
|
|
405491
|
+
);
|
|
405492
|
+
}
|
|
405493
|
+
}
|
|
405494
|
+
function getChatTokens(encoding) {
|
|
405495
|
+
if (encoding === "o200k_harmony") {
|
|
405496
|
+
return {
|
|
405497
|
+
imStart: HARMONY_TOKENS.start,
|
|
405498
|
+
imEnd: HARMONY_TOKENS.end,
|
|
405499
|
+
imSep: HARMONY_TOKENS.message
|
|
405500
|
+
};
|
|
405501
|
+
}
|
|
405502
|
+
return CHAT_TOKENS[encoding] ?? null;
|
|
405503
|
+
}
|
|
405504
|
+
function formatFunctionCall(fc) {
|
|
405505
|
+
const parts = [];
|
|
405506
|
+
if (fc.name) parts.push(fc.name);
|
|
405507
|
+
if (fc.arguments) parts.push(fc.arguments);
|
|
405508
|
+
return parts.join("\n");
|
|
405509
|
+
}
|
|
405377
405510
|
export {
|
|
405378
405511
|
DEFAULT_MODELS,
|
|
405379
405512
|
LAST_UPDATED,
|
|
@@ -405389,6 +405522,7 @@ export {
|
|
|
405389
405522
|
decodeSentencePiece,
|
|
405390
405523
|
decodeSentencePieceAsync,
|
|
405391
405524
|
encode,
|
|
405525
|
+
encodeChat,
|
|
405392
405526
|
encodeSentencePiece,
|
|
405393
405527
|
encodeSentencePieceAsync,
|
|
405394
405528
|
ensureSentencePieceModel,
|