@goliapkg/tiktoken-wasm 2.1.1 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,14 +1,14 @@
1
- # @goliapkg/tokenrs-wasm
1
+ # @goliapkg/tiktoken-wasm
2
2
 
3
- [![npm](https://img.shields.io/npm/v/@goliapkg/tokenrs-wasm?style=flat-square&logo=npm)](https://www.npmjs.com/package/@goliapkg/tokenrs-wasm)
4
- [![License](https://img.shields.io/npm/l/@goliapkg/tokenrs-wasm?style=flat-square)](../LICENSE)
3
+ [![npm](https://img.shields.io/npm/v/@goliapkg/tiktoken-wasm?style=flat-square&logo=npm)](https://www.npmjs.com/package/@goliapkg/tiktoken-wasm)
4
+ [![License](https://img.shields.io/npm/l/@goliapkg/tiktoken-wasm?style=flat-square)](../LICENSE)
5
5
 
6
- WebAssembly bindings for the [tiktoken](https://crates.io/crates/tiktoken) BPE tokenizer — run OpenAI-compatible tokenization directly in the browser or Node.js with near-native performance.
6
+ WebAssembly bindings for the [tiktoken](https://crates.io/crates/tiktoken) BPE tokenizer — run multi-provider tokenization directly in the browser or Node.js with near-native performance.
7
7
 
8
8
  ## Install
9
9
 
10
10
  ```bash
11
- npm install @goliapkg/tokenrs-wasm
11
+ npm install @goliapkg/tiktoken-wasm
12
12
  ```
13
13
 
14
14
  ## Build from source
@@ -35,7 +35,7 @@ import init, {
35
35
  estimateCost,
36
36
  getModelInfo,
37
37
  type Encoding,
38
- } from '@goliapkg/tokenrs-wasm'
38
+ } from '@goliapkg/tiktoken-wasm'
39
39
 
40
40
  // initialize WASM module (required once, before any other calls)
41
41
  await init()
@@ -46,8 +46,10 @@ const tokens: Uint32Array = enc.encode('hello world')
46
46
  const text: string = enc.decode(tokens) // "hello world"
47
47
  const count: number = enc.count('hello world') // 2
48
48
 
49
- // by model name
49
+ // by model name — supports OpenAI, Meta, DeepSeek, Qwen, Mistral
50
50
  const enc2 = encodingForModel('gpt-4o')
51
+ const enc3 = encodingForModel('llama-4-scout')
52
+ const enc4 = encodingForModel('deepseek-r1')
51
53
 
52
54
  // cost estimation (USD)
53
55
  const cost: number = estimateCost('gpt-4o', 1000, 500)
@@ -59,6 +61,8 @@ const info = getModelInfo('claude-opus-4')
59
61
  // free WASM memory when done
60
62
  enc.free()
61
63
  enc2.free()
64
+ enc3.free()
65
+ enc4.free()
62
66
  ```
63
67
 
64
68
  ### Bundler Configuration
@@ -99,11 +103,20 @@ module.exports = {
99
103
 
100
104
  ### `getEncoding(name: string): Encoding`
101
105
 
102
- Get a tokenizer by encoding name. Supported: `cl100k_base`, `o200k_base`, `p50k_base`, `p50k_edit`, `r50k_base`.
106
+ Get a tokenizer by encoding name. Supported:
107
+ - `cl100k_base` — GPT-4, GPT-3.5-turbo
108
+ - `o200k_base` — GPT-4o, GPT-4.1, o1, o3
109
+ - `p50k_base` — text-davinci-002/003
110
+ - `p50k_edit` — text-davinci-edit
111
+ - `r50k_base` — GPT-3 (davinci, curie, etc.)
112
+ - `llama3` — Meta Llama 3/4
113
+ - `deepseek_v3` — DeepSeek V3/R1
114
+ - `qwen2` — Qwen 2/2.5/3
115
+ - `mistral_v3` — Mistral/Codestral/Pixtral
103
116
 
104
117
  ### `encodingForModel(model: string): Encoding`
105
118
 
106
- Get a tokenizer by OpenAI model name (e.g. `gpt-4o`, `o3-mini`, `gpt-3.5-turbo`).
119
+ Get a tokenizer by model name (e.g. `gpt-4o`, `llama-4-scout`, `deepseek-r1`, `qwen3-235b`).
107
120
 
108
121
  ### `Encoding`
109
122
 
@@ -118,7 +131,7 @@ Get a tokenizer by OpenAI model name (e.g. `gpt-4o`, `o3-mini`, `gpt-3.5-turbo`)
118
131
 
119
132
  ### `estimateCost(modelId, inputTokens, outputTokens): number`
120
133
 
121
- Estimate API cost in USD. Supports OpenAI, Anthropic Claude, and Google Gemini models.
134
+ Estimate API cost in USD. Supports 57 models across 7 providers.
122
135
 
123
136
  ### `getModelInfo(modelId): object`
124
137
 
@@ -128,9 +141,13 @@ Get model metadata: pricing, context window, max output tokens.
128
141
 
129
142
  | Provider | Models |
130
143
  |----------|--------|
131
- | OpenAI | gpt-4o, gpt-4o-mini, o1, o3, o4-mini, gpt-4-turbo, gpt-4, gpt-3.5-turbo, embeddings |
132
- | Anthropic | claude-opus-4, claude-sonnet-4, claude-3.5-haiku, claude-3.5-sonnet, claude-3-opus, claude-3-haiku |
144
+ | OpenAI | gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, gpt-4o, gpt-4o-mini, o3, o3-pro, o3-mini, o4-mini, o1, gpt-4-turbo, gpt-4, gpt-3.5-turbo, embeddings |
145
+ | Anthropic | claude-opus-4, claude-sonnet-4, claude-4.5-sonnet, claude-4.5-haiku, claude-4.6-sonnet, claude-4.6-opus, claude-4.6-haiku, claude-3.5-haiku, claude-3.5-sonnet, claude-3-opus, claude-3-haiku |
133
146
  | Google | gemini-2.5-pro, gemini-2.5-flash, gemini-2.0-flash, gemini-1.5-pro, gemini-1.5-flash |
147
+ | Meta | llama-4-scout, llama-4-maverick, llama-3.3-70b, llama-3.1-405b, llama-3.1-70b, llama-3.1-8b |
148
+ | DeepSeek | deepseek-r1, deepseek-v3, deepseek-chat |
149
+ | Qwen | qwen3-235b, qwen3-32b, qwen3-30b-a3b, qwen3-14b, qwen-2.5-72b, qwen-2.5-coder-32b, qwen-turbo |
150
+ | Mistral | mistral-large, mistral-medium, mistral-small, codestral, pixtral-large |
134
151
 
135
152
  ## Demo
136
153
 
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "@goliapkg/tiktoken-wasm",
3
3
  "type": "module",
4
4
  "description": "WASM bindings for the tiktoken BPE tokenizer",
5
- "version": "2.1.1",
5
+ "version": "3.1.0",
6
6
  "license": "MIT",
7
7
  "repository": {
8
8
  "type": "git",
@@ -44,8 +44,9 @@ export class Encoding {
44
44
  }
45
45
 
46
46
  /**
47
- * Get an encoding for an OpenAI model name (e.g. `"gpt-4o"`, `"o3-mini"`).
47
+ * Get an encoding for a model name (e.g. `"gpt-4o"`, `"o3-mini"`, `"llama-4"`, `"deepseek-r1"`).
48
48
  *
49
+ * Supports models from OpenAI, Meta, DeepSeek, Qwen, and Mistral.
49
50
  * Automatically resolves the model name to the correct encoding.
50
51
  * Throws `Error` for unknown model names.
51
52
  */
@@ -54,7 +55,7 @@ export function encodingForModel(model: string): Encoding;
54
55
  /**
55
56
  * Estimate cost in USD for a given model, input token count, and output token count.
56
57
  *
57
- * Supports OpenAI, Anthropic Claude, and Google Gemini models.
58
+ * Supports OpenAI, Anthropic Claude, Google Gemini, Meta Llama, DeepSeek, Qwen, and Mistral models.
58
59
  * Throws `Error` for unknown model ids.
59
60
  */
60
61
  export function estimateCost(model_id: string, input_tokens: number, output_tokens: number): number;
@@ -62,7 +63,16 @@ export function estimateCost(model_id: string, input_tokens: number, output_toke
62
63
  /**
63
64
  * Get an encoding by name.
64
65
  *
65
- * Supported: `"cl100k_base"`, `"o200k_base"`, `"p50k_base"`, `"p50k_edit"`, `"r50k_base"`.
66
+ * Supported encodings:
67
+ * - `"cl100k_base"` — GPT-4, GPT-3.5-turbo
68
+ * - `"o200k_base"` — GPT-4o, GPT-4.1, o1, o3
69
+ * - `"p50k_base"` — text-davinci-002/003
70
+ * - `"p50k_edit"` — text-davinci-edit
71
+ * - `"r50k_base"` — GPT-3 (davinci, curie, etc.)
72
+ * - `"llama3"` — Meta Llama 3/4
73
+ * - `"deepseek_v3"` — DeepSeek V3/R1
74
+ * - `"qwen2"` — Qwen 2/2.5/3
75
+ * - `"mistral_v3"` — Mistral/Codestral/Pixtral
66
76
  *
67
77
  * Throws `Error` for unknown encoding names.
68
78
  */
package/tiktoken_wasm.js CHANGED
@@ -110,8 +110,9 @@ export class Encoding {
110
110
  if (Symbol.dispose) Encoding.prototype[Symbol.dispose] = Encoding.prototype.free;
111
111
 
112
112
  /**
113
- * Get an encoding for an OpenAI model name (e.g. `"gpt-4o"`, `"o3-mini"`).
113
+ * Get an encoding for a model name (e.g. `"gpt-4o"`, `"o3-mini"`, `"llama-4"`, `"deepseek-r1"`).
114
114
  *
115
+ * Supports models from OpenAI, Meta, DeepSeek, Qwen, and Mistral.
115
116
  * Automatically resolves the model name to the correct encoding.
116
117
  * Throws `Error` for unknown model names.
117
118
  * @param {string} model
@@ -130,7 +131,7 @@ export function encodingForModel(model) {
130
131
  /**
131
132
  * Estimate cost in USD for a given model, input token count, and output token count.
132
133
  *
133
- * Supports OpenAI, Anthropic Claude, and Google Gemini models.
134
+ * Supports OpenAI, Anthropic Claude, Google Gemini, Meta Llama, DeepSeek, Qwen, and Mistral models.
134
135
  * Throws `Error` for unknown model ids.
135
136
  * @param {string} model_id
136
137
  * @param {number} input_tokens
@@ -150,7 +151,16 @@ export function estimateCost(model_id, input_tokens, output_tokens) {
150
151
  /**
151
152
  * Get an encoding by name.
152
153
  *
153
- * Supported: `"cl100k_base"`, `"o200k_base"`, `"p50k_base"`, `"p50k_edit"`, `"r50k_base"`.
154
+ * Supported encodings:
155
+ * - `"cl100k_base"` — GPT-4, GPT-3.5-turbo
156
+ * - `"o200k_base"` — GPT-4o, GPT-4.1, o1, o3
157
+ * - `"p50k_base"` — text-davinci-002/003
158
+ * - `"p50k_edit"` — text-davinci-edit
159
+ * - `"r50k_base"` — GPT-3 (davinci, curie, etc.)
160
+ * - `"llama3"` — Meta Llama 3/4
161
+ * - `"deepseek_v3"` — DeepSeek V3/R1
162
+ * - `"qwen2"` — Qwen 2/2.5/3
163
+ * - `"mistral_v3"` — Mistral/Codestral/Pixtral
154
164
  *
155
165
  * Throws `Error` for unknown encoding names.
156
166
  * @param {string} name
Binary file