@goliapkg/tiktoken-wasm 2.1.1 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -12
- package/package.json +1 -1
- package/tiktoken_wasm.d.ts +13 -3
- package/tiktoken_wasm.js +13 -3
- package/tiktoken_wasm_bg.wasm +0 -0
package/README.md
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
# @goliapkg/
|
|
1
|
+
# @goliapkg/tiktoken-wasm
|
|
2
2
|
|
|
3
|
-
[](https://www.npmjs.com/package/@goliapkg/tiktoken-wasm)
|
|
4
|
+
[](../LICENSE)
|
|
5
5
|
|
|
6
|
-
WebAssembly bindings for the [tiktoken](https://crates.io/crates/tiktoken) BPE tokenizer — run
|
|
6
|
+
WebAssembly bindings for the [tiktoken](https://crates.io/crates/tiktoken) BPE tokenizer — run multi-provider tokenization directly in the browser or Node.js with near-native performance.
|
|
7
7
|
|
|
8
8
|
## Install
|
|
9
9
|
|
|
10
10
|
```bash
|
|
11
|
-
npm install @goliapkg/
|
|
11
|
+
npm install @goliapkg/tiktoken-wasm
|
|
12
12
|
```
|
|
13
13
|
|
|
14
14
|
## Build from source
|
|
@@ -35,7 +35,7 @@ import init, {
|
|
|
35
35
|
estimateCost,
|
|
36
36
|
getModelInfo,
|
|
37
37
|
type Encoding,
|
|
38
|
-
} from '@goliapkg/
|
|
38
|
+
} from '@goliapkg/tiktoken-wasm'
|
|
39
39
|
|
|
40
40
|
// initialize WASM module (required once, before any other calls)
|
|
41
41
|
await init()
|
|
@@ -46,8 +46,10 @@ const tokens: Uint32Array = enc.encode('hello world')
|
|
|
46
46
|
const text: string = enc.decode(tokens) // "hello world"
|
|
47
47
|
const count: number = enc.count('hello world') // 2
|
|
48
48
|
|
|
49
|
-
// by model name
|
|
49
|
+
// by model name — supports OpenAI, Meta, DeepSeek, Qwen, Mistral
|
|
50
50
|
const enc2 = encodingForModel('gpt-4o')
|
|
51
|
+
const enc3 = encodingForModel('llama-4-scout')
|
|
52
|
+
const enc4 = encodingForModel('deepseek-r1')
|
|
51
53
|
|
|
52
54
|
// cost estimation (USD)
|
|
53
55
|
const cost: number = estimateCost('gpt-4o', 1000, 500)
|
|
@@ -59,6 +61,8 @@ const info = getModelInfo('claude-opus-4')
|
|
|
59
61
|
// free WASM memory when done
|
|
60
62
|
enc.free()
|
|
61
63
|
enc2.free()
|
|
64
|
+
enc3.free()
|
|
65
|
+
enc4.free()
|
|
62
66
|
```
|
|
63
67
|
|
|
64
68
|
### Bundler Configuration
|
|
@@ -99,11 +103,20 @@ module.exports = {
|
|
|
99
103
|
|
|
100
104
|
### `getEncoding(name: string): Encoding`
|
|
101
105
|
|
|
102
|
-
Get a tokenizer by encoding name. Supported:
|
|
106
|
+
Get a tokenizer by encoding name. Supported:
|
|
107
|
+
- `cl100k_base` — GPT-4, GPT-3.5-turbo
|
|
108
|
+
- `o200k_base` — GPT-4o, GPT-4.1, o1, o3
|
|
109
|
+
- `p50k_base` — text-davinci-002/003
|
|
110
|
+
- `p50k_edit` — text-davinci-edit
|
|
111
|
+
- `r50k_base` — GPT-3 (davinci, curie, etc.)
|
|
112
|
+
- `llama3` — Meta Llama 3/4
|
|
113
|
+
- `deepseek_v3` — DeepSeek V3/R1
|
|
114
|
+
- `qwen2` — Qwen 2/2.5/3
|
|
115
|
+
- `mistral_v3` — Mistral/Codestral/Pixtral
|
|
103
116
|
|
|
104
117
|
### `encodingForModel(model: string): Encoding`
|
|
105
118
|
|
|
106
|
-
Get a tokenizer by
|
|
119
|
+
Get a tokenizer by model name (e.g. `gpt-4o`, `llama-4-scout`, `deepseek-r1`, `qwen3-235b`).
|
|
107
120
|
|
|
108
121
|
### `Encoding`
|
|
109
122
|
|
|
@@ -118,7 +131,7 @@ Get a tokenizer by OpenAI model name (e.g. `gpt-4o`, `o3-mini`, `gpt-3.5-turbo`)
|
|
|
118
131
|
|
|
119
132
|
### `estimateCost(modelId, inputTokens, outputTokens): number`
|
|
120
133
|
|
|
121
|
-
Estimate API cost in USD. Supports
|
|
134
|
+
Estimate API cost in USD. Supports 57 models across 7 providers.
|
|
122
135
|
|
|
123
136
|
### `getModelInfo(modelId): object`
|
|
124
137
|
|
|
@@ -128,9 +141,13 @@ Get model metadata: pricing, context window, max output tokens.
|
|
|
128
141
|
|
|
129
142
|
| Provider | Models |
|
|
130
143
|
|----------|--------|
|
|
131
|
-
| OpenAI | gpt-4o, gpt-4o-mini,
|
|
132
|
-
| Anthropic | claude-opus-4, claude-sonnet-4, claude-3.5-haiku, claude-3.5-sonnet, claude-3-opus, claude-3-haiku |
|
|
144
|
+
| OpenAI | gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, gpt-4o, gpt-4o-mini, o3, o3-pro, o3-mini, o4-mini, o1, gpt-4-turbo, gpt-4, gpt-3.5-turbo, embeddings |
|
|
145
|
+
| Anthropic | claude-opus-4, claude-sonnet-4, claude-4.5-sonnet, claude-4.5-haiku, claude-4.6-sonnet, claude-4.6-opus, claude-4.6-haiku, claude-3.5-haiku, claude-3.5-sonnet, claude-3-opus, claude-3-haiku |
|
|
133
146
|
| Google | gemini-2.5-pro, gemini-2.5-flash, gemini-2.0-flash, gemini-1.5-pro, gemini-1.5-flash |
|
|
147
|
+
| Meta | llama-4-scout, llama-4-maverick, llama-3.3-70b, llama-3.1-405b, llama-3.1-70b, llama-3.1-8b |
|
|
148
|
+
| DeepSeek | deepseek-r1, deepseek-v3, deepseek-chat |
|
|
149
|
+
| Qwen | qwen3-235b, qwen3-32b, qwen3-30b-a3b, qwen3-14b, qwen-2.5-72b, qwen-2.5-coder-32b, qwen-turbo |
|
|
150
|
+
| Mistral | mistral-large, mistral-medium, mistral-small, codestral, pixtral-large |
|
|
134
151
|
|
|
135
152
|
## Demo
|
|
136
153
|
|
package/package.json
CHANGED
package/tiktoken_wasm.d.ts
CHANGED
|
@@ -44,8 +44,9 @@ export class Encoding {
|
|
|
44
44
|
}
|
|
45
45
|
|
|
46
46
|
/**
|
|
47
|
-
* Get an encoding for
|
|
47
|
+
* Get an encoding for a model name (e.g. `"gpt-4o"`, `"o3-mini"`, `"llama-4"`, `"deepseek-r1"`).
|
|
48
48
|
*
|
|
49
|
+
* Supports models from OpenAI, Meta, DeepSeek, Qwen, and Mistral.
|
|
49
50
|
* Automatically resolves the model name to the correct encoding.
|
|
50
51
|
* Throws `Error` for unknown model names.
|
|
51
52
|
*/
|
|
@@ -54,7 +55,7 @@ export function encodingForModel(model: string): Encoding;
|
|
|
54
55
|
/**
|
|
55
56
|
* Estimate cost in USD for a given model, input token count, and output token count.
|
|
56
57
|
*
|
|
57
|
-
* Supports OpenAI, Anthropic Claude,
|
|
58
|
+
* Supports OpenAI, Anthropic Claude, Google Gemini, Meta Llama, DeepSeek, Qwen, and Mistral models.
|
|
58
59
|
* Throws `Error` for unknown model ids.
|
|
59
60
|
*/
|
|
60
61
|
export function estimateCost(model_id: string, input_tokens: number, output_tokens: number): number;
|
|
@@ -62,7 +63,16 @@ export function estimateCost(model_id: string, input_tokens: number, output_toke
|
|
|
62
63
|
/**
|
|
63
64
|
* Get an encoding by name.
|
|
64
65
|
*
|
|
65
|
-
* Supported:
|
|
66
|
+
* Supported encodings:
|
|
67
|
+
* - `"cl100k_base"` — GPT-4, GPT-3.5-turbo
|
|
68
|
+
* - `"o200k_base"` — GPT-4o, GPT-4.1, o1, o3
|
|
69
|
+
* - `"p50k_base"` — text-davinci-002/003
|
|
70
|
+
* - `"p50k_edit"` — text-davinci-edit
|
|
71
|
+
* - `"r50k_base"` — GPT-3 (davinci, curie, etc.)
|
|
72
|
+
* - `"llama3"` — Meta Llama 3/4
|
|
73
|
+
* - `"deepseek_v3"` — DeepSeek V3/R1
|
|
74
|
+
* - `"qwen2"` — Qwen 2/2.5/3
|
|
75
|
+
* - `"mistral_v3"` — Mistral/Codestral/Pixtral
|
|
66
76
|
*
|
|
67
77
|
* Throws `Error` for unknown encoding names.
|
|
68
78
|
*/
|
package/tiktoken_wasm.js
CHANGED
|
@@ -110,8 +110,9 @@ export class Encoding {
|
|
|
110
110
|
if (Symbol.dispose) Encoding.prototype[Symbol.dispose] = Encoding.prototype.free;
|
|
111
111
|
|
|
112
112
|
/**
|
|
113
|
-
* Get an encoding for
|
|
113
|
+
* Get an encoding for a model name (e.g. `"gpt-4o"`, `"o3-mini"`, `"llama-4"`, `"deepseek-r1"`).
|
|
114
114
|
*
|
|
115
|
+
* Supports models from OpenAI, Meta, DeepSeek, Qwen, and Mistral.
|
|
115
116
|
* Automatically resolves the model name to the correct encoding.
|
|
116
117
|
* Throws `Error` for unknown model names.
|
|
117
118
|
* @param {string} model
|
|
@@ -130,7 +131,7 @@ export function encodingForModel(model) {
|
|
|
130
131
|
/**
|
|
131
132
|
* Estimate cost in USD for a given model, input token count, and output token count.
|
|
132
133
|
*
|
|
133
|
-
* Supports OpenAI, Anthropic Claude,
|
|
134
|
+
* Supports OpenAI, Anthropic Claude, Google Gemini, Meta Llama, DeepSeek, Qwen, and Mistral models.
|
|
134
135
|
* Throws `Error` for unknown model ids.
|
|
135
136
|
* @param {string} model_id
|
|
136
137
|
* @param {number} input_tokens
|
|
@@ -150,7 +151,16 @@ export function estimateCost(model_id, input_tokens, output_tokens) {
|
|
|
150
151
|
/**
|
|
151
152
|
* Get an encoding by name.
|
|
152
153
|
*
|
|
153
|
-
* Supported:
|
|
154
|
+
* Supported encodings:
|
|
155
|
+
* - `"cl100k_base"` — GPT-4, GPT-3.5-turbo
|
|
156
|
+
* - `"o200k_base"` — GPT-4o, GPT-4.1, o1, o3
|
|
157
|
+
* - `"p50k_base"` — text-davinci-002/003
|
|
158
|
+
* - `"p50k_edit"` — text-davinci-edit
|
|
159
|
+
* - `"r50k_base"` — GPT-3 (davinci, curie, etc.)
|
|
160
|
+
* - `"llama3"` — Meta Llama 3/4
|
|
161
|
+
* - `"deepseek_v3"` — DeepSeek V3/R1
|
|
162
|
+
* - `"qwen2"` — Qwen 2/2.5/3
|
|
163
|
+
* - `"mistral_v3"` — Mistral/Codestral/Pixtral
|
|
154
164
|
*
|
|
155
165
|
* Throws `Error` for unknown encoding names.
|
|
156
166
|
* @param {string} name
|
package/tiktoken_wasm_bg.wasm
CHANGED
|
Binary file
|