@hyvmind/tiktoken-ts 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +557 -0
- package/dist/bpe.d.ts +171 -0
- package/dist/bpe.d.ts.map +1 -0
- package/dist/bpe.js +478 -0
- package/dist/bpe.js.map +1 -0
- package/dist/core/byte-pair-encoding.d.ts +49 -0
- package/dist/core/byte-pair-encoding.d.ts.map +1 -0
- package/dist/core/byte-pair-encoding.js +154 -0
- package/dist/core/byte-pair-encoding.js.map +1 -0
- package/dist/core/encoding-definitions.d.ts +95 -0
- package/dist/core/encoding-definitions.d.ts.map +1 -0
- package/dist/core/encoding-definitions.js +202 -0
- package/dist/core/encoding-definitions.js.map +1 -0
- package/dist/core/index.d.ts +12 -0
- package/dist/core/index.d.ts.map +1 -0
- package/dist/core/index.js +17 -0
- package/dist/core/index.js.map +1 -0
- package/dist/core/model-to-encoding.d.ts +36 -0
- package/dist/core/model-to-encoding.d.ts.map +1 -0
- package/dist/core/model-to-encoding.js +299 -0
- package/dist/core/model-to-encoding.js.map +1 -0
- package/dist/core/tiktoken.d.ts +126 -0
- package/dist/core/tiktoken.d.ts.map +1 -0
- package/dist/core/tiktoken.js +295 -0
- package/dist/core/tiktoken.js.map +1 -0
- package/dist/core/vocab-loader.d.ts +77 -0
- package/dist/core/vocab-loader.d.ts.map +1 -0
- package/dist/core/vocab-loader.js +176 -0
- package/dist/core/vocab-loader.js.map +1 -0
- package/dist/encodings/cl100k-base.d.ts +43 -0
- package/dist/encodings/cl100k-base.d.ts.map +1 -0
- package/dist/encodings/cl100k-base.js +142 -0
- package/dist/encodings/cl100k-base.js.map +1 -0
- package/dist/encodings/claude-estimation.d.ts +136 -0
- package/dist/encodings/claude-estimation.d.ts.map +1 -0
- package/dist/encodings/claude-estimation.js +160 -0
- package/dist/encodings/claude-estimation.js.map +1 -0
- package/dist/encodings/index.d.ts +9 -0
- package/dist/encodings/index.d.ts.map +1 -0
- package/dist/encodings/index.js +13 -0
- package/dist/encodings/index.js.map +1 -0
- package/dist/encodings/o200k-base.d.ts +58 -0
- package/dist/encodings/o200k-base.d.ts.map +1 -0
- package/dist/encodings/o200k-base.js +191 -0
- package/dist/encodings/o200k-base.js.map +1 -0
- package/dist/encodings/p50k-base.d.ts +44 -0
- package/dist/encodings/p50k-base.d.ts.map +1 -0
- package/dist/encodings/p50k-base.js +64 -0
- package/dist/encodings/p50k-base.js.map +1 -0
- package/dist/index.d.ts +61 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +109 -0
- package/dist/index.js.map +1 -0
- package/dist/models.d.ts +92 -0
- package/dist/models.d.ts.map +1 -0
- package/dist/models.js +320 -0
- package/dist/models.js.map +1 -0
- package/dist/tiktoken.d.ts +198 -0
- package/dist/tiktoken.d.ts.map +1 -0
- package/dist/tiktoken.js +331 -0
- package/dist/tiktoken.js.map +1 -0
- package/dist/tokenizer.d.ts +181 -0
- package/dist/tokenizer.d.ts.map +1 -0
- package/dist/tokenizer.js +436 -0
- package/dist/tokenizer.js.map +1 -0
- package/dist/types.d.ts +127 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +6 -0
- package/dist/types.js.map +1 -0
- package/dist/utils.d.ts +152 -0
- package/dist/utils.d.ts.map +1 -0
- package/dist/utils.js +244 -0
- package/dist/utils.js.map +1 -0
- package/package.json +78 -0
package/dist/models.js
ADDED
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Model Configuration
|
|
3
|
+
* Maps model names to encodings and context limits
|
|
4
|
+
*
|
|
5
|
+
* Context limits are synced with tiktoken-rs:
|
|
6
|
+
* https://github.com/zurawiki/tiktoken-rs/blob/main/tiktoken-rs/src/model.rs
|
|
7
|
+
*/
|
|
8
|
+
import { getExactContextSize } from "./core/model-to-encoding.js";
|
|
9
|
+
/**
|
|
10
|
+
* Helper to create model config with context size from tiktoken-rs
|
|
11
|
+
*/
|
|
12
|
+
function createModelConfig(name, encoding, maxOutputTokens, family, contextLimitOverride) {
|
|
13
|
+
return {
|
|
14
|
+
name,
|
|
15
|
+
encoding,
|
|
16
|
+
contextLimit: contextLimitOverride ?? getExactContextSize(name),
|
|
17
|
+
maxOutputTokens,
|
|
18
|
+
family,
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Model context limits and configurations
|
|
23
|
+
* Context limits from tiktoken-rs, max output tokens from OpenAI docs
|
|
24
|
+
*/
|
|
25
|
+
export const MODEL_CONFIGS = {
|
|
26
|
+
// ==========================================================================
|
|
27
|
+
// GPT-5 Series (400K context from tiktoken-rs)
|
|
28
|
+
// ==========================================================================
|
|
29
|
+
"gpt-5.2": createModelConfig("gpt-5.2", "o200k_base", 16384, "gpt-5", 400_000),
|
|
30
|
+
"gpt-5.2-pro": createModelConfig("gpt-5.2-pro", "o200k_base", 16384, "gpt-5", 400_000),
|
|
31
|
+
"gpt-5": createModelConfig("gpt-5", "o200k_base", 16384, "gpt-5"),
|
|
32
|
+
"gpt-5-mini": createModelConfig("gpt-5-mini", "o200k_base", 16384, "gpt-5"),
|
|
33
|
+
"gpt-5-nano": createModelConfig("gpt-5-nano", "o200k_base", 4096, "gpt-5"),
|
|
34
|
+
"gpt-5-turbo": createModelConfig("gpt-5-turbo", "o200k_base", 16384, "gpt-5"),
|
|
35
|
+
// ==========================================================================
|
|
36
|
+
// Codex Series
|
|
37
|
+
// ==========================================================================
|
|
38
|
+
"gpt-5.2-codex": createModelConfig("gpt-5.2-codex", "o200k_base", 32768, "codex", 192_000),
|
|
39
|
+
"gpt-5.1-codex": createModelConfig("gpt-5.1-codex", "o200k_base", 32768, "codex", 192_000),
|
|
40
|
+
// ==========================================================================
|
|
41
|
+
// o-Series Reasoning Models (200K context from tiktoken-rs)
|
|
42
|
+
// ==========================================================================
|
|
43
|
+
o1: createModelConfig("o1", "o200k_base", 32768, "o-series"),
|
|
44
|
+
"o1-mini": createModelConfig("o1-mini", "o200k_base", 16384, "o-series"),
|
|
45
|
+
"o1-preview": createModelConfig("o1-preview", "o200k_base", 16384, "o-series"),
|
|
46
|
+
o3: createModelConfig("o3", "o200k_base", 32768, "o-series"),
|
|
47
|
+
"o3-mini": createModelConfig("o3-mini", "o200k_base", 16384, "o-series"),
|
|
48
|
+
"o3-pro": createModelConfig("o3-pro", "o200k_base", 65536, "o-series", 200_000),
|
|
49
|
+
"o4-mini": createModelConfig("o4-mini", "o200k_base", 16384, "o-series"),
|
|
50
|
+
// ==========================================================================
|
|
51
|
+
// GPT-4.1 Series (1M+ context from tiktoken-rs!)
|
|
52
|
+
// ==========================================================================
|
|
53
|
+
"gpt-4.1": createModelConfig("gpt-4.1", "o200k_base", 16384, "gpt-4.1"),
|
|
54
|
+
"gpt-4.1-mini": createModelConfig("gpt-4.1-mini", "o200k_base", 16384, "gpt-4.1"),
|
|
55
|
+
"gpt-4.1-nano": createModelConfig("gpt-4.1-nano", "o200k_base", 4096, "gpt-4.1"),
|
|
56
|
+
// ==========================================================================
|
|
57
|
+
// GPT-4.5 Preview
|
|
58
|
+
// ==========================================================================
|
|
59
|
+
"gpt-4.5-preview": createModelConfig("gpt-4.5-preview", "o200k_base", 16384, "gpt-4o"),
|
|
60
|
+
// ==========================================================================
|
|
61
|
+
// GPT-4o Series (128K context)
|
|
62
|
+
// ==========================================================================
|
|
63
|
+
"gpt-4o": createModelConfig("gpt-4o", "o200k_base", 16384, "gpt-4o"),
|
|
64
|
+
"gpt-4o-mini": createModelConfig("gpt-4o-mini", "o200k_base", 16384, "gpt-4o"),
|
|
65
|
+
"chatgpt-4o-latest": createModelConfig("chatgpt-4o-latest", "o200k_base", 16384, "gpt-4o"),
|
|
66
|
+
// ==========================================================================
|
|
67
|
+
// GPT-4 Legacy Series
|
|
68
|
+
// ==========================================================================
|
|
69
|
+
"gpt-4-turbo": createModelConfig("gpt-4-turbo", "cl100k_base", 4096, "gpt-4"),
|
|
70
|
+
"gpt-4-turbo-preview": createModelConfig("gpt-4-turbo-preview", "cl100k_base", 4096, "gpt-4"),
|
|
71
|
+
"gpt-4": createModelConfig("gpt-4", "cl100k_base", 4096, "gpt-4"),
|
|
72
|
+
"gpt-4-32k": createModelConfig("gpt-4-32k", "cl100k_base", 4096, "gpt-4"),
|
|
73
|
+
// ==========================================================================
|
|
74
|
+
// GPT-3.5 Series
|
|
75
|
+
// ==========================================================================
|
|
76
|
+
"gpt-3.5-turbo": createModelConfig("gpt-3.5-turbo", "cl100k_base", 4096, "gpt-3.5"),
|
|
77
|
+
"gpt-3.5-turbo-16k": createModelConfig("gpt-3.5-turbo-16k", "cl100k_base", 4096, "gpt-3.5"),
|
|
78
|
+
// ==========================================================================
|
|
79
|
+
// Embedding Models
|
|
80
|
+
// ==========================================================================
|
|
81
|
+
"text-embedding-ada-002": createModelConfig("text-embedding-ada-002", "cl100k_base", 0, "embedding", 8192),
|
|
82
|
+
"text-embedding-3-small": createModelConfig("text-embedding-3-small", "cl100k_base", 0, "embedding", 8191),
|
|
83
|
+
"text-embedding-3-large": createModelConfig("text-embedding-3-large", "cl100k_base", 0, "embedding", 8191),
|
|
84
|
+
// ==========================================================================
|
|
85
|
+
// Anthropic Claude Models
|
|
86
|
+
//
|
|
87
|
+
// IMPORTANT: Claude uses a proprietary tokenizer that is NOT publicly available.
|
|
88
|
+
// The "claude_estimation" encoding provides SAFE estimates that intentionally
|
|
89
|
+
// over-count tokens (25% safety margin) to prevent API truncation issues.
|
|
90
|
+
//
|
|
91
|
+
// Research findings (see claude-tokenizer-research.md):
|
|
92
|
+
// - Claude 3+ uses ~22,000 token vocabulary (smaller than OpenAI's 100K-200K)
|
|
93
|
+
// - Claude produces 16-30% MORE tokens than GPT-4 for equivalent content
|
|
94
|
+
// - Average ~3.5 characters per token (vs GPT-4's ~4)
|
|
95
|
+
//
|
|
96
|
+
// For exact Claude token counts, use Anthropic's /v1/messages/count_tokens API.
|
|
97
|
+
// @see https://docs.anthropic.com/en/docs/build-with-claude/token-counting
|
|
98
|
+
// ==========================================================================
|
|
99
|
+
// Claude 4.5 Series
|
|
100
|
+
"claude-4-5-opus": createModelConfig("claude-4-5-opus", "claude_estimation", 32768, "claude", 200_000),
|
|
101
|
+
"claude-4.5-opus": createModelConfig("claude-4.5-opus", "claude_estimation", 32768, "claude", 200_000),
|
|
102
|
+
"claude-4-5-sonnet": createModelConfig("claude-4-5-sonnet", "claude_estimation", 16384, "claude", 200_000),
|
|
103
|
+
"claude-4.5-sonnet": createModelConfig("claude-4.5-sonnet", "claude_estimation", 16384, "claude", 200_000),
|
|
104
|
+
"claude-4-5-haiku": createModelConfig("claude-4-5-haiku", "claude_estimation", 8192, "claude", 200_000),
|
|
105
|
+
"claude-4.5-haiku": createModelConfig("claude-4.5-haiku", "claude_estimation", 8192, "claude", 200_000),
|
|
106
|
+
// Claude 4.1 Series
|
|
107
|
+
"claude-4-1-opus": createModelConfig("claude-4-1-opus", "claude_estimation", 32768, "claude", 200_000),
|
|
108
|
+
"claude-4.1-opus": createModelConfig("claude-4.1-opus", "claude_estimation", 32768, "claude", 200_000),
|
|
109
|
+
"claude-4-1-sonnet": createModelConfig("claude-4-1-sonnet", "claude_estimation", 16384, "claude", 200_000),
|
|
110
|
+
"claude-4.1-sonnet": createModelConfig("claude-4.1-sonnet", "claude_estimation", 16384, "claude", 200_000),
|
|
111
|
+
// Claude 4 Series
|
|
112
|
+
"claude-4-opus": createModelConfig("claude-4-opus", "claude_estimation", 32768, "claude", 200_000),
|
|
113
|
+
"claude-4-sonnet": createModelConfig("claude-4-sonnet", "claude_estimation", 16384, "claude", 200_000),
|
|
114
|
+
// Claude 3.5 Series
|
|
115
|
+
"claude-3-5-sonnet-20241022": createModelConfig("claude-3-5-sonnet-20241022", "claude_estimation", 8192, "claude", 200_000),
|
|
116
|
+
"claude-3-5-sonnet-latest": createModelConfig("claude-3-5-sonnet-latest", "claude_estimation", 8192, "claude", 200_000),
|
|
117
|
+
"claude-3-5-sonnet": createModelConfig("claude-3-5-sonnet", "claude_estimation", 8192, "claude", 200_000),
|
|
118
|
+
"claude-3-5-haiku-20241022": createModelConfig("claude-3-5-haiku-20241022", "claude_estimation", 8192, "claude", 200_000),
|
|
119
|
+
"claude-3-5-haiku-latest": createModelConfig("claude-3-5-haiku-latest", "claude_estimation", 8192, "claude", 200_000),
|
|
120
|
+
"claude-3-5-haiku": createModelConfig("claude-3-5-haiku", "claude_estimation", 8192, "claude", 200_000),
|
|
121
|
+
// Claude 3 Series
|
|
122
|
+
"claude-3-opus-20240229": createModelConfig("claude-3-opus-20240229", "claude_estimation", 4096, "claude", 200_000),
|
|
123
|
+
"claude-3-opus-latest": createModelConfig("claude-3-opus-latest", "claude_estimation", 4096, "claude", 200_000),
|
|
124
|
+
"claude-3-opus": createModelConfig("claude-3-opus", "claude_estimation", 4096, "claude", 200_000),
|
|
125
|
+
"claude-3-sonnet-20240229": createModelConfig("claude-3-sonnet-20240229", "claude_estimation", 4096, "claude", 200_000),
|
|
126
|
+
"claude-3-sonnet": createModelConfig("claude-3-sonnet", "claude_estimation", 4096, "claude", 200_000),
|
|
127
|
+
"claude-3-haiku-20240307": createModelConfig("claude-3-haiku-20240307", "claude_estimation", 4096, "claude", 200_000),
|
|
128
|
+
"claude-3-haiku": createModelConfig("claude-3-haiku", "claude_estimation", 4096, "claude", 200_000),
|
|
129
|
+
// Claude 2 Series (Legacy)
|
|
130
|
+
"claude-2.1": createModelConfig("claude-2.1", "claude_estimation", 4096, "claude", 200_000),
|
|
131
|
+
"claude-2.0": createModelConfig("claude-2.0", "claude_estimation", 4096, "claude", 100_000),
|
|
132
|
+
// Claude Instant (Legacy)
|
|
133
|
+
"claude-instant-1.2": createModelConfig("claude-instant-1.2", "claude_estimation", 4096, "claude", 100_000),
|
|
134
|
+
// ==========================================================================
|
|
135
|
+
// DeepSeek Models (approximation with cl100k_base)
|
|
136
|
+
// ==========================================================================
|
|
137
|
+
"deepseek-chat": createModelConfig("deepseek-chat", "cl100k_base", 4096, "deepseek", 32_768),
|
|
138
|
+
"deepseek-coder": createModelConfig("deepseek-coder", "cl100k_base", 4096, "deepseek", 16_384),
|
|
139
|
+
"deepseek-reasoner": createModelConfig("deepseek-reasoner", "cl100k_base", 8192, "deepseek", 65_536),
|
|
140
|
+
// ==========================================================================
|
|
141
|
+
// Gemini Models (approximation with cl100k_base)
|
|
142
|
+
// ==========================================================================
|
|
143
|
+
"gemini-1.5-pro": createModelConfig("gemini-1.5-pro", "cl100k_base", 8192, "gemini", 1_000_000),
|
|
144
|
+
"gemini-1.5-flash": createModelConfig("gemini-1.5-flash", "cl100k_base", 8192, "gemini", 1_000_000),
|
|
145
|
+
"gemini-2.0-flash": createModelConfig("gemini-2.0-flash", "cl100k_base", 8192, "gemini", 1_000_000),
|
|
146
|
+
"gemini-2.0-flash-thinking": createModelConfig("gemini-2.0-flash-thinking", "cl100k_base", 16384, "gemini", 1_000_000),
|
|
147
|
+
};
|
|
148
|
+
/**
|
|
149
|
+
* Model name aliases for flexibility
|
|
150
|
+
*/
|
|
151
|
+
export const MODEL_ALIASES = {
|
|
152
|
+
// Common aliases
|
|
153
|
+
gpt4: "gpt-4",
|
|
154
|
+
gpt4o: "gpt-4o",
|
|
155
|
+
"gpt4o-mini": "gpt-4o-mini",
|
|
156
|
+
"gpt-4-omni": "gpt-4o",
|
|
157
|
+
"gpt-4-omni-mini": "gpt-4o-mini",
|
|
158
|
+
// GPT-5 aliases
|
|
159
|
+
gpt5: "gpt-5",
|
|
160
|
+
"gpt5-mini": "gpt-5-mini",
|
|
161
|
+
"gpt5-nano": "gpt-5-nano",
|
|
162
|
+
// GPT-3.5 aliases
|
|
163
|
+
"gpt-35-turbo": "gpt-3.5-turbo",
|
|
164
|
+
gpt35: "gpt-3.5-turbo",
|
|
165
|
+
chatgpt: "gpt-3.5-turbo",
|
|
166
|
+
// DeepSeek aliases
|
|
167
|
+
deepseek: "deepseek-chat",
|
|
168
|
+
"deepseek-v3": "deepseek-chat",
|
|
169
|
+
// Gemini aliases
|
|
170
|
+
gemini: "gemini-1.5-flash",
|
|
171
|
+
"gemini-pro": "gemini-1.5-pro",
|
|
172
|
+
"gemini-flash": "gemini-1.5-flash",
|
|
173
|
+
// Claude aliases - default to latest versions
|
|
174
|
+
claude: "claude-4-5-sonnet",
|
|
175
|
+
"claude-latest": "claude-4-5-sonnet",
|
|
176
|
+
"claude-sonnet": "claude-4-5-sonnet",
|
|
177
|
+
"claude-haiku": "claude-4-5-haiku",
|
|
178
|
+
"claude-opus": "claude-4-5-opus",
|
|
179
|
+
// Claude 4.5 aliases
|
|
180
|
+
"claude4.5": "claude-4-5-sonnet",
|
|
181
|
+
"claude-45": "claude-4-5-sonnet",
|
|
182
|
+
"claude-45-sonnet": "claude-4-5-sonnet",
|
|
183
|
+
"claude-45-opus": "claude-4-5-opus",
|
|
184
|
+
"claude-45-haiku": "claude-4-5-haiku",
|
|
185
|
+
// Claude 4.1 aliases
|
|
186
|
+
"claude4.1": "claude-4-1-sonnet",
|
|
187
|
+
"claude-41": "claude-4-1-sonnet",
|
|
188
|
+
"claude-41-sonnet": "claude-4-1-sonnet",
|
|
189
|
+
"claude-41-opus": "claude-4-1-opus",
|
|
190
|
+
// Claude 4 aliases
|
|
191
|
+
claude4: "claude-4-sonnet",
|
|
192
|
+
"claude-4": "claude-4-sonnet",
|
|
193
|
+
// Claude 3.5 aliases
|
|
194
|
+
"claude3.5": "claude-3-5-sonnet",
|
|
195
|
+
"claude-3.5": "claude-3-5-sonnet",
|
|
196
|
+
"claude-35-sonnet": "claude-3-5-sonnet",
|
|
197
|
+
"claude-35-haiku": "claude-3-5-haiku",
|
|
198
|
+
// Claude 3 aliases
|
|
199
|
+
claude3: "claude-3-opus",
|
|
200
|
+
"claude-3": "claude-3-opus",
|
|
201
|
+
// Legacy aliases
|
|
202
|
+
"claude-instant": "claude-instant-1.2",
|
|
203
|
+
"claude-2": "claude-2.1",
|
|
204
|
+
};
|
|
205
|
+
/**
|
|
206
|
+
* Get model configuration by name
|
|
207
|
+
* Handles aliases and partial matches
|
|
208
|
+
*
|
|
209
|
+
* @param modelName - Model name or alias
|
|
210
|
+
* @returns Model configuration or undefined
|
|
211
|
+
*/
|
|
212
|
+
export function getModelConfig(modelName) {
|
|
213
|
+
const normalizedName = modelName.toLowerCase().trim();
|
|
214
|
+
// Direct match
|
|
215
|
+
if (MODEL_CONFIGS[normalizedName]) {
|
|
216
|
+
return MODEL_CONFIGS[normalizedName];
|
|
217
|
+
}
|
|
218
|
+
// Check aliases
|
|
219
|
+
const aliasedName = MODEL_ALIASES[normalizedName];
|
|
220
|
+
if (aliasedName && MODEL_CONFIGS[aliasedName]) {
|
|
221
|
+
return MODEL_CONFIGS[aliasedName];
|
|
222
|
+
}
|
|
223
|
+
// Partial match (for versioned models like gpt-4-0613)
|
|
224
|
+
for (const [key, config] of Object.entries(MODEL_CONFIGS)) {
|
|
225
|
+
if (normalizedName.startsWith(key) || key.startsWith(normalizedName)) {
|
|
226
|
+
return config;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
return undefined;
|
|
230
|
+
}
|
|
231
|
+
/**
|
|
232
|
+
* Get encoding name for a model
|
|
233
|
+
*
|
|
234
|
+
* @param modelName - Model name
|
|
235
|
+
* @returns Encoding name, defaults to o200k_base for unknown models
|
|
236
|
+
*/
|
|
237
|
+
export function getEncodingForModel(modelName) {
|
|
238
|
+
const config = getModelConfig(modelName);
|
|
239
|
+
return config?.encoding ?? "o200k_base";
|
|
240
|
+
}
|
|
241
|
+
/**
|
|
242
|
+
* Get context limit for a model
|
|
243
|
+
*
|
|
244
|
+
* @param modelName - Model name
|
|
245
|
+
* @returns Context limit in tokens
|
|
246
|
+
*/
|
|
247
|
+
export function getModelContextLimit(modelName) {
|
|
248
|
+
const config = getModelConfig(modelName);
|
|
249
|
+
return config?.contextLimit ?? 128000; // Default to GPT-4o-class context
|
|
250
|
+
}
|
|
251
|
+
/**
|
|
252
|
+
* Get max output tokens for a model
|
|
253
|
+
*
|
|
254
|
+
* @param modelName - Model name
|
|
255
|
+
* @returns Max output tokens
|
|
256
|
+
*/
|
|
257
|
+
export function getModelMaxOutputTokens(modelName) {
|
|
258
|
+
const config = getModelConfig(modelName);
|
|
259
|
+
return config?.maxOutputTokens ?? 4096; // Default to standard output limit
|
|
260
|
+
}
|
|
261
|
+
/**
|
|
262
|
+
* Get model family
|
|
263
|
+
*
|
|
264
|
+
* @param modelName - Model name
|
|
265
|
+
* @returns Model family classification
|
|
266
|
+
*/
|
|
267
|
+
export function getModelFamily(modelName) {
|
|
268
|
+
const config = getModelConfig(modelName);
|
|
269
|
+
return config?.family ?? "unknown";
|
|
270
|
+
}
|
|
271
|
+
/**
|
|
272
|
+
* Check if a model uses the newer o200k_base encoding
|
|
273
|
+
*
|
|
274
|
+
* @param modelName - Model name
|
|
275
|
+
* @returns True if model uses o200k_base
|
|
276
|
+
*/
|
|
277
|
+
export function usesO200kEncoding(modelName) {
|
|
278
|
+
return getEncodingForModel(modelName) === "o200k_base";
|
|
279
|
+
}
|
|
280
|
+
/**
|
|
281
|
+
* Check if a model uses the Claude estimation encoding
|
|
282
|
+
*
|
|
283
|
+
* Claude models use a proprietary tokenizer. This encoding provides
|
|
284
|
+
* "safe" estimates that intentionally over-count tokens to prevent
|
|
285
|
+
* API truncation issues.
|
|
286
|
+
*
|
|
287
|
+
* @param modelName - Model name
|
|
288
|
+
* @returns True if model uses claude_estimation
|
|
289
|
+
*/
|
|
290
|
+
export function usesClaudeEstimation(modelName) {
|
|
291
|
+
return getEncodingForModel(modelName) === "claude_estimation";
|
|
292
|
+
}
|
|
293
|
+
/**
|
|
294
|
+
* Check if a model is a Claude model
|
|
295
|
+
*
|
|
296
|
+
* @param modelName - Model name
|
|
297
|
+
* @returns True if model is from Anthropic Claude family
|
|
298
|
+
*/
|
|
299
|
+
export function isClaudeModel(modelName) {
|
|
300
|
+
const config = getModelConfig(modelName);
|
|
301
|
+
return config?.family === "claude";
|
|
302
|
+
}
|
|
303
|
+
/**
|
|
304
|
+
* List all known models
|
|
305
|
+
*
|
|
306
|
+
* @returns Array of model names
|
|
307
|
+
*/
|
|
308
|
+
export function listModels() {
|
|
309
|
+
return Object.keys(MODEL_CONFIGS);
|
|
310
|
+
}
|
|
311
|
+
/**
|
|
312
|
+
* List models by family
|
|
313
|
+
*
|
|
314
|
+
* @param family - Model family to filter
|
|
315
|
+
* @returns Array of model configurations
|
|
316
|
+
*/
|
|
317
|
+
export function listModelsByFamily(family) {
|
|
318
|
+
return Object.values(MODEL_CONFIGS).filter((config) => config.family === family);
|
|
319
|
+
}
|
|
320
|
+
//# sourceMappingURL=models.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"models.js","sourceRoot":"","sources":["../src/models.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAGH,OAAO,EAAE,mBAAmB,EAAE,MAAM,6BAA6B,CAAC;AAElE;;GAEG;AACH,SAAS,iBAAiB,CACxB,IAAY,EACZ,QAAsB,EACtB,eAAuB,EACvB,MAAmB,EACnB,oBAA6B;IAE7B,OAAO;QACL,IAAI;QACJ,QAAQ;QACR,YAAY,EAAE,oBAAoB,IAAI,mBAAmB,CAAC,IAAI,CAAC;QAC/D,eAAe;QACf,MAAM;KACP,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,MAAM,aAAa,GAAgC;IACxD,6EAA6E;IAC7E,+CAA+C;IAC/C,6EAA6E;IAC7E,SAAS,EAAE,iBAAiB,CAC1B,SAAS,EACT,YAAY,EACZ,KAAK,EACL,OAAO,EACP,OAAO,CACR;IACD,aAAa,EAAE,iBAAiB,CAC9B,aAAa,EACb,YAAY,EACZ,KAAK,EACL,OAAO,EACP,OAAO,CACR;IACD,OAAO,EAAE,iBAAiB,CAAC,OAAO,EAAE,YAAY,EAAE,KAAK,EAAE,OAAO,CAAC;IACjE,YAAY,EAAE,iBAAiB,CAAC,YAAY,EAAE,YAAY,EAAE,KAAK,EAAE,OAAO,CAAC;IAC3E,YAAY,EAAE,iBAAiB,CAAC,YAAY,EAAE,YAAY,EAAE,IAAI,EAAE,OAAO,CAAC;IAC1E,aAAa,EAAE,iBAAiB,CAAC,aAAa,EAAE,YAAY,EAAE,KAAK,EAAE,OAAO,CAAC;IAE7E,6EAA6E;IAC7E,eAAe;IACf,6EAA6E;IAC7E,eAAe,EAAE,iBAAiB,CAChC,eAAe,EACf,YAAY,EACZ,KAAK,EACL,OAAO,EACP,OAAO,CACR;IACD,eAAe,EAAE,iBAAiB,CAChC,eAAe,EACf,YAAY,EACZ,KAAK,EACL,OAAO,EACP,OAAO,CACR;IAED,6EAA6E;IAC7E,4DAA4D;IAC5D,6EAA6E;IAC7E,EAAE,EAAE,iBAAiB,CAAC,IAAI,EAAE,YAAY,EAAE,KAAK,EAAE,UAAU,CAAC;IAC5D,SAAS,EAAE,iBAAiB,CAAC,SAAS,EAAE,YAAY,EAAE,KAAK,EAAE,UAAU,CAAC;IACxE,YAAY,EAAE,iBAAiB,CAC7B,YAAY,EACZ,YAAY,EACZ,KAAK,EACL,UAAU,CACX;IACD,EAAE,EAAE,iBAAiB,CAAC,IAAI,EAAE,YAAY,EAAE,KAAK,EAAE,UAAU,CAAC;IAC5D,SAAS,EAAE,iBAAiB,CAAC,SAAS,EAAE,YAAY,EAAE,KAAK,EAAE,UAAU,CAAC;IACxE,QAAQ,EAAE,iBAAiB,CACzB,QAAQ,EACR,YAAY,EACZ,KAAK,EACL,UAAU,EACV,OAAO,CACR;IACD,SAAS,EAAE,iBAAiB,CAAC,SAAS,EAAE,YAAY,EAAE,KAAK,EAAE,UAAU,CAAC;IAExE,6EAA6E;IAC7E,iDAAiD;IACjD,6EAA6E;IAC7E,SAAS,EAAE,iBAAiB,CAAC,SAAS,EAAE,YAAY,EAAE,KAAK,EAAE,SAAS,CAAC;IACvE,cAAc,EAAE,iBAAiB,CAC/B,cAAc,EACd,YAAY,EACZ,KAAK,EACL,SAAS,CACV;IACD,cAAc,EAAE,iBAAiB,CAC/B,cAAc,EACd,YAAY,EACZ,IAAI,EACJ,SAAS,CACV;IAED,6EAA6E;IAC7E,kBAAkB;IAClB,6EAA6E;IAC7E,iBAAiB,EAAE,iBAAiB,CAClC,iBAAiB,EACjB,YAAY,EACZ,KAAK,EACL,QAAQ,CACT;IAED,6EAA6E;IAC7E,+BAA+B;IAC/B,6EAA6E;IAC7E,QAAQ,EAAE,iBAAiB,CAAC,QAAQ,EAAE,YAAY,EAAE,KAAK,EAAE,QAAQ,CAAC;IACpE,aAAa,EAAE,iBAAiB,CAC9B,aAAa,EACb,YAAY,EACZ,KAAK,EACL,QAAQ,CACT;IACD,mBAAmB,EAAE,iBAAiB,CACpC,mBAAmB,EACnB,YAAY,EACZ,KAAK,EACL,QAAQ,CACT;IAED,6EAA6E;IAC7E,sBAAsB;IACtB,6EAA6E;IAC7E,aAAa,EAAE,iBAAiB,CAAC,aAAa,EAAE,aAAa,EAAE,IAAI,EAAE,OAAO,CAAC;IAC7E,qBAAqB,EAAE,iBAAiB,CACtC,qBAAqB,EACrB,aAAa,EACb,IAAI,EACJ,OAAO,CACR;IACD,OAAO,EAAE,iBAAiB,CAAC,OAAO,EAAE,aAAa,EAAE,IAAI,EAAE,OAAO,CAAC;IACjE,WAAW,EAAE,iBAAiB,CAAC,WAAW,EAAE,aAAa,EAAE,IAAI,EAAE,OAAO,CAAC;IAEzE,6EAA6E;IAC7E,iBAAiB;IACjB,6EAA6E;IAC7E,eAAe,EAAE,iBAAiB,CAChC,eAAe,EACf,aAAa,EACb,IAAI,EACJ,SAAS,CACV;IACD,mBAAmB,EAAE,iBAAiB,CACpC,mBAAmB,EACnB,aAAa,EACb,IAAI,EACJ,SAAS,CACV;IAED,6EAA6E;IAC7E,mBAAmB;IACnB,6EAA6E;IAC7E,wBAAwB,EAAE,iBAAiB,CACzC,wBAAwB,EACxB,aAAa,EACb,CAAC,EACD,WAAW,EACX,IAAI,CACL;IACD,wBAAwB,EAAE,iBAAiB,CACzC,wBAAwB,EACxB,aAAa,EACb,CAAC,EACD,WAAW,EACX,IAAI,CACL;IACD,wBAAwB,EAAE,iBAAiB,CACzC,wBAAwB,EACxB,aAAa,EACb,CAAC,EACD,WAAW,EACX,IAAI,CACL;IAED,6EAA6E;IAC7E,0BAA0B;IAC1B,EAAE;IACF,iFAAiF;IACjF,8EAA8E;IAC9E,0EAA0E;IAC1E,EAAE;IACF,wDAAwD;IACxD,8EAA8E;IAC9E,yEAAyE;IACzE,sDAAsD;IACtD,EAAE;IACF,gFAAgF;IAChF,2EAA2E;IAC3E,6EAA6E;IAE7E,oBAAoB;IACpB,iBAAiB,EAAE,iBAAiB,CAClC,iBAAiB,EACjB,mBAAmB,EACnB,KAAK,EACL,QAAQ,EACR,OAAO,CACR;IACD,iBAAiB,EAAE,iBAAiB,CAClC,iBAAiB,EACjB,mBAAmB,EACnB,KAAK,EACL,QAAQ,EACR,OAAO,CACR;IACD,mBAAmB,EAAE,iBAAiB,CACpC,mBAAmB,EACnB,mBAAmB,EACnB,KAAK,EACL,QAAQ,EACR,OAAO,CACR;IACD,mBAAmB,EAAE,iBAAiB,CACpC,mBAAmB,EACnB,mBAAmB,EACnB,KAAK,EACL,QAAQ,EACR,OAAO,CACR;IACD,kBAAkB,EAAE,iBAAiB,CACnC,kBAAkB,EAClB,mBAAmB,EACnB,IAAI,EACJ,QAAQ,EACR,OAAO,CACR;IACD,kBAAkB,EAAE,iBAAiB,CACnC,kBAAkB,EAClB,mBAAmB,EACnB,IAAI,EACJ,QAAQ,EACR,OAAO,CACR;IAED,oBAAoB;IACpB,iBAAiB,EAAE,iBAAiB,CAClC,iBAAiB,EACjB,mBAAmB,EACnB,KAAK,EACL,QAAQ,EACR,OAAO,CACR;IACD,iBAAiB,EAAE,iBAAiB,CAClC,iBAAiB,EACjB,mBAAmB,EACnB,KAAK,EACL,QAAQ,EACR,OAAO,CACR;IACD,mBAAmB,EAAE,iBAAiB,CACpC,mBAAmB,EACnB,mBAAmB,EACnB,KAAK,EACL,QAAQ,EACR,OAAO,CACR;IACD,mBAAmB,EAAE,iBAAiB,CACpC,mBAAmB,EACnB,mBAAmB,EACnB,KAAK,EACL,QAAQ,EACR,OAAO,CACR;IAED,kBAAkB;IAClB,eAAe,EAAE,iBAAiB,CAChC,eAAe,EACf,mBAAmB,EACnB,KAAK,EACL,QAAQ,EACR,OAAO,CACR;IACD,iBAAiB,EAAE,iBAAiB,CAClC,iBAAiB,EACjB,mBAAmB,EACnB,KAAK,EACL,QAAQ,EACR,OAAO,CACR;IAED,oBAAoB;IACpB,4BAA4B,EAAE,iBAAiB,CAC7C,4BAA4B,EAC5B,mBAAmB,EACnB,IAAI,EACJ,QAAQ,EACR,OAAO,CACR;IACD,0BAA0B,EAAE,iBAAiB,CAC3C,0BAA0B,EAC1B,mBAAmB,EACnB,IAAI,EACJ,QAAQ,EACR,OAAO,CACR;IACD,mBAAmB,EAAE,iBAAiB,CACpC,mBAAmB,EACnB,mBAAmB,EACnB,IAAI,EACJ,QAAQ,EACR,OAAO,CACR;IACD,2BAA2B,EAAE,iBAAiB,CAC5C,2BAA2B,EAC3B,mBAAmB,EACnB,IAAI,EACJ,QAAQ,EACR,OAAO,CACR;IACD,yBAAyB,EAAE,iBAAiB,CAC1C,yBAAyB,EACzB,mBAAmB,EACnB,IAAI,EACJ,QAAQ,EACR,OAAO,CACR;IACD,kBAAkB,EAAE,iBAAiB,CACnC,kBAAkB,EAClB,mBAAmB,EACnB,IAAI,EACJ,QAAQ,EACR,OAAO,CACR;IAED,kBAAkB;IAClB,wBAAwB,EAAE,iBAAiB,CACzC,wBAAwB,EACxB,mBAAmB,EACnB,IAAI,EACJ,QAAQ,EACR,OAAO,CACR;IACD,sBAAsB,EAAE,iBAAiB,CACvC,sBAAsB,EACtB,mBAAmB,EACnB,IAAI,EACJ,QAAQ,EACR,OAAO,CACR;IACD,eAAe,EAAE,iBAAiB,CAChC,eAAe,EACf,mBAAmB,EACnB,IAAI,EACJ,QAAQ,EACR,OAAO,CACR;IACD,0BAA0B,EAAE,iBAAiB,CAC3C,0BAA0B,EAC1B,mBAAmB,EACnB,IAAI,EACJ,QAAQ,EACR,OAAO,CACR;IACD,iBAAiB,EAAE,iBAAiB,CAClC,iBAAiB,EACjB,mBAAmB,EACnB,IAAI,EACJ,QAAQ,EACR,OAAO,CACR;IACD,yBAAyB,EAAE,iBAAiB,CAC1C,yBAAyB,EACzB,mBAAmB,EACnB,IAAI,EACJ,QAAQ,EACR,OAAO,CACR;IACD,gBAAgB,EAAE,iBAAiB,CACjC,gBAAgB,EAChB,mBAAmB,EACnB,IAAI,EACJ,QAAQ,EACR,OAAO,CACR;IAED,2BAA2B;IAC3B,YAAY,EAAE,iBAAiB,CAC7B,YAAY,EACZ,mBAAmB,EACnB,IAAI,EACJ,QAAQ,EACR,OAAO,CACR;IACD,YAAY,EAAE,iBAAiB,CAC7B,YAAY,EACZ,mBAAmB,EACnB,IAAI,EACJ,QAAQ,EACR,OAAO,CACR;IAED,0BAA0B;IAC1B,oBAAoB,EAAE,iBAAiB,CACrC,oBAAoB,EACpB,mBAAmB,EACnB,IAAI,EACJ,QAAQ,EACR,OAAO,CACR;IAED,6EAA6E;IAC7E,mDAAmD;IACnD,6EAA6E;IAC7E,eAAe,EAAE,iBAAiB,CAChC,eAAe,EACf,aAAa,EACb,IAAI,EACJ,UAAU,EACV,MAAM,CACP;IACD,gBAAgB,EAAE,iBAAiB,CACjC,gBAAgB,EAChB,aAAa,EACb,IAAI,EACJ,UAAU,EACV,MAAM,CACP;IACD,mBAAmB,EAAE,iBAAiB,CACpC,mBAAmB,EACnB,aAAa,EACb,IAAI,EACJ,UAAU,EACV,MAAM,CACP;IAED,6EAA6E;IAC7E,iDAAiD;IACjD,6EAA6E;IAC7E,gBAAgB,EAAE,iBAAiB,CACjC,gBAAgB,EAChB,aAAa,EACb,IAAI,EACJ,QAAQ,EACR,SAAS,CACV;IACD,kBAAkB,EAAE,iBAAiB,CACnC,kBAAkB,EAClB,aAAa,EACb,IAAI,EACJ,QAAQ,EACR,SAAS,CACV;IACD,kBAAkB,EAAE,iBAAiB,CACnC,kBAAkB,EAClB,aAAa,EACb,IAAI,EACJ,QAAQ,EACR,SAAS,CACV;IACD,2BAA2B,EAAE,iBAAiB,CAC5C,2BAA2B,EAC3B,aAAa,EACb,KAAK,EACL,QAAQ,EACR,SAAS,CACV;CACF,CAAC;AAEF;;GAEG;AACH,MAAM,CAAC,MAAM,aAAa,GAA2B;IACnD,iBAAiB;IACjB,IAAI,EAAE,OAAO;IACb,KAAK,EAAE,QAAQ;IACf,YAAY,EAAE,aAAa;IAC3B,YAAY,EAAE,QAAQ;IACtB,iBAAiB,EAAE,aAAa;IAEhC,gBAAgB;IAChB,IAAI,EAAE,OAAO;IACb,WAAW,EAAE,YAAY;IACzB,WAAW,EAAE,YAAY;IAEzB,kBAAkB;IAClB,cAAc,EAAE,eAAe;IAC/B,KAAK,EAAE,eAAe;IACtB,OAAO,EAAE,eAAe;IAExB,mBAAmB;IACnB,QAAQ,EAAE,eAAe;IACzB,aAAa,EAAE,eAAe;IAE9B,iBAAiB;IACjB,MAAM,EAAE,kBAAkB;IAC1B,YAAY,EAAE,gBAAgB;IAC9B,cAAc,EAAE,kBAAkB;IAElC,8CAA8C;IAC9C,MAAM,EAAE,mBAAmB;IAC3B,eAAe,EAAE,mBAAmB;IACpC,eAAe,EAAE,mBAAmB;IACpC,cAAc,EAAE,kBAAkB;IAClC,aAAa,EAAE,iBAAiB;IAEhC,qBAAqB;IACrB,WAAW,EAAE,mBAAmB;IAChC,WAAW,EAAE,mBAAmB;IAChC,kBAAkB,EAAE,mBAAmB;IACvC,gBAAgB,EAAE,iBAAiB;IACnC,iBAAiB,EAAE,kBAAkB;IAErC,qBAAqB;IACrB,WAAW,EAAE,mBAAmB;IAChC,WAAW,EAAE,mBAAmB;IAChC,kBAAkB,EAAE,mBAAmB;IACvC,gBAAgB,EAAE,iBAAiB;IAEnC,mBAAmB;IACnB,OAAO,EAAE,iBAAiB;IAC1B,UAAU,EAAE,iBAAiB;IAE7B,qBAAqB;IACrB,WAAW,EAAE,mBAAmB;IAChC,YAAY,EAAE,mBAAmB;IACjC,kBAAkB,EAAE,mBAAmB;IACvC,iBAAiB,EAAE,kBAAkB;IAErC,mBAAmB;IACnB,OAAO,EAAE,eAAe;IACxB,UAAU,EAAE,eAAe;IAE3B,iBAAiB;IACjB,gBAAgB,EAAE,oBAAoB;IACtC,UAAU,EAAE,YAAY;CACzB,CAAC;AAEF;;;;;;GAMG;AACH,MAAM,UAAU,cAAc,CAAC,SAAiB;IAC9C,MAAM,cAAc,GAAG,SAAS,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;IAEtD,eAAe;IACf,IAAI,aAAa,CAAC,cAAc,CAAC,EAAE,CAAC;QAClC,OAAO,aAAa,CAAC,cAAc,CAAC,CAAC;IACvC,CAAC;IAED,gBAAgB;IAChB,MAAM,WAAW,GAAG,aAAa,CAAC,cAAc,CAAC,CAAC;IAClD,IAAI,WAAW,IAAI,aAAa,CAAC,WAAW,CAAC,EAAE,CAAC;QAC9C,OAAO,aAAa,CAAC,WAAW,CAAC,CAAC;IACpC,CAAC;IAED,uDAAuD;IACvD,KAAK,MAAM,CAAC,GAAG,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,aAAa,CAAC,EAAE,CAAC;QAC1D,IAAI,cAAc,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,UAAU,CAAC,cAAc,CAAC,EAAE,CAAC;YACrE,OAAO,MAAM,CAAC;QAChB,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,mBAAmB,CAAC,SAAiB;IACnD,MAAM,MAAM,GAAG,cAAc,CAAC,SAAS,CAAC,CAAC;IACzC,OAAO,MAAM,EAAE,QAAQ,IAAI,YAAY,CAAC;AAC1C,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,oBAAoB,CAAC,SAAiB;IACpD,MAAM,MAAM,GAAG,cAAc,CAAC,SAAS,CAAC,CAAC;IACzC,OAAO,MAAM,EAAE,YAAY,IAAI,MAAM,CAAC,CAAC,kCAAkC;AAC3E,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,uBAAuB,CAAC,SAAiB;IACvD,MAAM,MAAM,GAAG,cAAc,CAAC,SAAS,CAAC,CAAC;IACzC,OAAO,MAAM,EAAE,eAAe,IAAI,IAAI,CAAC,CAAC,mCAAmC;AAC7E,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,cAAc,CAAC,SAAiB;IAC9C,MAAM,MAAM,GAAG,cAAc,CAAC,SAAS,CAAC,CAAC;IACzC,OAAO,MAAM,EAAE,MAAM,IAAI,SAAS,CAAC;AACrC,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,iBAAiB,CAAC,SAAiB;IACjD,OAAO,mBAAmB,CAAC,SAAS,CAAC,KAAK,YAAY,CAAC;AACzD,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,oBAAoB,CAAC,SAAiB;IACpD,OAAO,mBAAmB,CAAC,SAAS,CAAC,KAAK,mBAAmB,CAAC;AAChE,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,aAAa,CAAC,SAAiB;IAC7C,MAAM,MAAM,GAAG,cAAc,CAAC,SAAS,CAAC,CAAC;IACzC,OAAO,MAAM,EAAE,MAAM,KAAK,QAAQ,CAAC;AACrC,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,UAAU;IACxB,OAAO,MAAM,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;AACpC,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,kBAAkB,CAAC,MAAmB;IACpD,OAAO,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,MAAM,CACxC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,MAAM,KAAK,MAAM,CACrC,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* High-Level Tiktoken API
|
|
3
|
+
*
|
|
4
|
+
* Provides easy-to-use tokenization APIs with automatic vocabulary loading.
|
|
5
|
+
*
|
|
6
|
+
* This module is the main entry point for most users. It provides:
|
|
7
|
+
* - Lazy loading of vocabularies
|
|
8
|
+
* - Caching of tokenizer instances
|
|
9
|
+
* - Support for both sync and async operations
|
|
10
|
+
* - Model name resolution
|
|
11
|
+
*/
|
|
12
|
+
import { type Rank, type EncodingDefinition } from "./core/index.js";
|
|
13
|
+
/**
|
|
14
|
+
* Tiktoken encoding instance
|
|
15
|
+
*
|
|
16
|
+
* This class wraps CoreBPE and provides:
|
|
17
|
+
* - Lazy initialization
|
|
18
|
+
* - Convenient API matching tiktoken-rs
|
|
19
|
+
* - Error handling for unloaded vocabularies
|
|
20
|
+
*/
|
|
21
|
+
export declare class Tiktoken {
|
|
22
|
+
/** The encoding name */
|
|
23
|
+
readonly name: string;
|
|
24
|
+
/** The encoding definition */
|
|
25
|
+
private readonly definition;
|
|
26
|
+
/** The underlying CoreBPE instance (lazy loaded) */
|
|
27
|
+
private coreBPE;
|
|
28
|
+
/** Promise for async initialization */
|
|
29
|
+
private initPromise;
|
|
30
|
+
/** Whether the vocabulary has been loaded */
|
|
31
|
+
private isLoaded;
|
|
32
|
+
/**
|
|
33
|
+
* Create a new Tiktoken instance
|
|
34
|
+
*
|
|
35
|
+
* Note: The vocabulary is NOT loaded until you call load() or one of the
|
|
36
|
+
* encoding methods with a loaded vocabulary.
|
|
37
|
+
*
|
|
38
|
+
* @param encodingName - The encoding name (e.g., "cl100k_base", "o200k_base")
|
|
39
|
+
*/
|
|
40
|
+
constructor(encodingName: string);
|
|
41
|
+
/**
|
|
42
|
+
* Initialize from cached vocabulary
|
|
43
|
+
*/
|
|
44
|
+
private initFromCache;
|
|
45
|
+
/**
|
|
46
|
+
* Initialize the CoreBPE instance
|
|
47
|
+
*/
|
|
48
|
+
private initCoreBPE;
|
|
49
|
+
/**
|
|
50
|
+
* Load the vocabulary from URL
|
|
51
|
+
*
|
|
52
|
+
* @returns Promise that resolves when loaded
|
|
53
|
+
*/
|
|
54
|
+
load(): Promise<void>;
|
|
55
|
+
/**
|
|
56
|
+
* Load the vocabulary from a string (for embedded vocabularies)
|
|
57
|
+
*
|
|
58
|
+
* @param content - The vocabulary file content
|
|
59
|
+
*/
|
|
60
|
+
loadFromString(content: string): void;
|
|
61
|
+
/**
|
|
62
|
+
* Ensure the vocabulary is loaded
|
|
63
|
+
*/
|
|
64
|
+
private ensureLoaded;
|
|
65
|
+
/**
|
|
66
|
+
* Check if the vocabulary is loaded
|
|
67
|
+
*/
|
|
68
|
+
get loaded(): boolean;
|
|
69
|
+
/**
|
|
70
|
+
* Encode text into token IDs without handling special tokens
|
|
71
|
+
*
|
|
72
|
+
* @param text - Text to encode
|
|
73
|
+
* @returns Array of token IDs
|
|
74
|
+
*/
|
|
75
|
+
encodeOrdinary(text: string): Rank[];
|
|
76
|
+
/**
|
|
77
|
+
* Encode text into token IDs
|
|
78
|
+
*
|
|
79
|
+
* @param text - Text to encode
|
|
80
|
+
* @param allowedSpecial - Special tokens to allow (default: none)
|
|
81
|
+
* @returns Array of token IDs
|
|
82
|
+
*/
|
|
83
|
+
encode(text: string, allowedSpecial?: Set<string> | "all"): Rank[];
|
|
84
|
+
/**
|
|
85
|
+
* Encode text with all special tokens allowed
|
|
86
|
+
*
|
|
87
|
+
* @param text - Text to encode
|
|
88
|
+
* @returns Array of token IDs
|
|
89
|
+
*/
|
|
90
|
+
encodeWithSpecialTokens(text: string): Rank[];
|
|
91
|
+
/**
|
|
92
|
+
* Decode token IDs back to text
|
|
93
|
+
*
|
|
94
|
+
* @param tokens - Array of token IDs
|
|
95
|
+
* @returns Decoded text
|
|
96
|
+
*/
|
|
97
|
+
decode(tokens: Rank[]): string;
|
|
98
|
+
/**
|
|
99
|
+
* Decode token IDs to raw bytes
|
|
100
|
+
*
|
|
101
|
+
* @param tokens - Array of token IDs
|
|
102
|
+
* @returns Decoded bytes
|
|
103
|
+
*/
|
|
104
|
+
decodeBytes(tokens: Rank[]): Uint8Array;
|
|
105
|
+
/**
|
|
106
|
+
* Count tokens in text (without returning the tokens)
|
|
107
|
+
*
|
|
108
|
+
* @param text - Text to count
|
|
109
|
+
* @returns Number of tokens
|
|
110
|
+
*/
|
|
111
|
+
countTokens(text: string): number;
|
|
112
|
+
/**
|
|
113
|
+
* Get the vocabulary size (excluding special tokens)
|
|
114
|
+
*/
|
|
115
|
+
get vocabSize(): number;
|
|
116
|
+
/**
|
|
117
|
+
* Get the total vocabulary size (including special tokens)
|
|
118
|
+
*/
|
|
119
|
+
get totalVocabSize(): number;
|
|
120
|
+
/**
|
|
121
|
+
* Get all special tokens
|
|
122
|
+
*/
|
|
123
|
+
getSpecialTokens(): Set<string>;
|
|
124
|
+
/**
|
|
125
|
+
* Check if a token is a special token
|
|
126
|
+
*/
|
|
127
|
+
isSpecialToken(token: Rank): boolean;
|
|
128
|
+
/**
|
|
129
|
+
* Get the definition for this encoding
|
|
130
|
+
*/
|
|
131
|
+
getDefinition(): EncodingDefinition;
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Get or create a Tiktoken instance for an encoding
|
|
135
|
+
*
|
|
136
|
+
* @param encodingName - Encoding name
|
|
137
|
+
* @returns Tiktoken instance (may not be loaded yet)
|
|
138
|
+
*/
|
|
139
|
+
export declare function getEncoding(encodingName: string): Tiktoken;
|
|
140
|
+
/**
|
|
141
|
+
* Get or create a Tiktoken instance for a model
|
|
142
|
+
*
|
|
143
|
+
* @param modelName - Model name
|
|
144
|
+
* @returns Tiktoken instance (may not be loaded yet)
|
|
145
|
+
* @throws Error if no encoding is found for the model
|
|
146
|
+
*/
|
|
147
|
+
export declare function getEncodingForModel(modelName: string): Tiktoken;
|
|
148
|
+
/**
|
|
149
|
+
* Get a loaded Tiktoken instance for an encoding (async)
|
|
150
|
+
*
|
|
151
|
+
* @param encodingName - Encoding name
|
|
152
|
+
* @returns Promise resolving to a loaded Tiktoken instance
|
|
153
|
+
*/
|
|
154
|
+
export declare function getEncodingAsync(encodingName: string): Promise<Tiktoken>;
|
|
155
|
+
/**
|
|
156
|
+
* Get a loaded Tiktoken instance for a model (async)
|
|
157
|
+
*
|
|
158
|
+
* @param modelName - Model name
|
|
159
|
+
* @returns Promise resolving to a loaded Tiktoken instance
|
|
160
|
+
*/
|
|
161
|
+
export declare function getEncodingForModelAsync(modelName: string): Promise<Tiktoken>;
|
|
162
|
+
/**
|
|
163
|
+
* Clear the Tiktoken instance cache
|
|
164
|
+
*/
|
|
165
|
+
export declare function clearTiktokenCache(): void;
|
|
166
|
+
/**
|
|
167
|
+
* Encode text using a specific encoding (async)
|
|
168
|
+
*
|
|
169
|
+
* @param text - Text to encode
|
|
170
|
+
* @param encodingName - Encoding name
|
|
171
|
+
* @returns Promise resolving to token IDs
|
|
172
|
+
*/
|
|
173
|
+
export declare function encodeAsync(text: string, encodingName?: string): Promise<Rank[]>;
|
|
174
|
+
/**
|
|
175
|
+
* Decode tokens using a specific encoding (async)
|
|
176
|
+
*
|
|
177
|
+
* @param tokens - Token IDs
|
|
178
|
+
* @param encodingName - Encoding name
|
|
179
|
+
* @returns Promise resolving to decoded text
|
|
180
|
+
*/
|
|
181
|
+
export declare function decodeAsync(tokens: Rank[], encodingName?: string): Promise<string>;
|
|
182
|
+
/**
|
|
183
|
+
* Count tokens in text (async)
|
|
184
|
+
*
|
|
185
|
+
* @param text - Text to count
|
|
186
|
+
* @param encodingName - Encoding name
|
|
187
|
+
* @returns Promise resolving to token count
|
|
188
|
+
*/
|
|
189
|
+
export declare function countTokensAsync(text: string, encodingName?: string): Promise<number>;
|
|
190
|
+
/**
|
|
191
|
+
* Count tokens for a model (async)
|
|
192
|
+
*
|
|
193
|
+
* @param text - Text to count
|
|
194
|
+
* @param modelName - Model name
|
|
195
|
+
* @returns Promise resolving to token count
|
|
196
|
+
*/
|
|
197
|
+
export declare function countTokensForModelAsync(text: string, modelName: string): Promise<number>;
|
|
198
|
+
//# sourceMappingURL=tiktoken.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tiktoken.d.ts","sourceRoot":"","sources":["../src/tiktoken.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAEL,KAAK,IAAI,EAIT,KAAK,kBAAkB,EAMxB,MAAM,iBAAiB,CAAC;AAEzB;;;;;;;GAOG;AACH,qBAAa,QAAQ;IACnB,wBAAwB;IACxB,SAAgB,IAAI,EAAE,MAAM,CAAC;IAE7B,8BAA8B;IAC9B,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAqB;IAEhD,oDAAoD;IACpD,OAAO,CAAC,OAAO,CAAwB;IAEvC,uCAAuC;IACvC,OAAO,CAAC,WAAW,CAA8B;IAEjD,6CAA6C;IAC7C,OAAO,CAAC,QAAQ,CAAS;IAEzB;;;;;;;OAOG;gBACS,YAAY,EAAE,MAAM;IAehC;;OAEG;IACH,OAAO,CAAC,aAAa;IAOrB;;OAEG;IACH,OAAO,CAAC,WAAW;IAkBnB;;;;OAIG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAiB3B;;;;OAIG;IACH,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI;IASrC;;OAEG;IACH,OAAO,CAAC,YAAY;IASpB;;OAEG;IACH,IAAI,MAAM,IAAI,OAAO,CAEpB;IAMD;;;;;OAKG;IACH,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,EAAE;IAIpC;;;;;;OAMG;IACH,MAAM,CAAC,IAAI,EAAE,MAAM,EAAE,cAAc,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,KAAK,GAAG,IAAI,EAAE;IAWlE;;;;;OAKG;IACH,uBAAuB,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,EAAE;IAQ7C;;;;;OAKG;IACH,MAAM,CAAC,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM;IAI9B;;;;;OAKG;IACH,WAAW,CAAC,MAAM,EAAE,IAAI,EAAE,GAAG,UAAU;IAQvC;;;;;OAKG;IACH,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM;IAIjC;;OAEG;IACH,IAAI,SAAS,IAAI,MAAM,CAEtB;IAED;;OAEG;IACH,IAAI,cAAc,IAAI,MAAM,CAE3B;IAED;;OAEG;IACH,gBAAgB,IAAI,GAAG,CAAC,MAAM,CAAC;IAI/B;;OAEG;IACH,cAAc,CAAC,KAAK,EAAE,IAAI,GAAG,OAAO;IAIpC;;OAEG;IACH,aAAa,IAAI,kBAAkB;CAGpC;AAWD;;;;;GAKG;AACH,wBAAgB,WAAW,CAAC,YAAY,EAAE,MAAM,GAAG,QAAQ,CAS1D;AAED;;;;;;GAMG;AACH,wBAAgB,mBAAmB,CAAC,SAAS,EAAE,MAAM,GAAG,QAAQ,CAO/D;AAED;;;;;GAKG;AACH,wBAAsB,gBAAgB,CACpC,YAAY,EAAE,MAAM,GACnB,OAAO,CAAC,QAAQ,CAAC,CAInB;AAED;;;;;GAKG;AACH,wBAAsB,wBAAwB,CAC5C,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,QAAQ,CAAC,CAInB;AAED;;GAEG;AACH,wBAAgB,kBAAkB,IAAI,IAAI,CAEzC;AAMD;;;;;;GAMG;AACH,wBAAsB,WAAW,CAC/B,IAAI,EAAE,MAAM,EACZ,YAAY,GAAE,MAAqB,GAClC,OAAO,CAAC,IAAI,EAAE,CAAC,CAGjB;AAED;;;;;;GAMG;AACH,wBAAsB,WAAW,CAC/B,MAAM,EAAE,IAAI,EAAE,EACd,YAAY,GAAE,MAAqB,GAClC,OAAO,CAAC,MAAM,CAAC,CAGjB;AAED;;;;;;GAMG;AACH,wBAAsB,gBAAgB,CACpC,IAAI,EAAE,MAAM,EACZ,YAAY,GAAE,MAAqB,GAClC,OAAO,CAAC,MAAM,CAAC,CAGjB;AAED;;;;;;GAMG;AACH,wBAAsB,wBAAwB,CAC5C,IAAI,EAAE,MAAM,EACZ,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,MAAM,CAAC,CAGjB"}
|