@goliapkg/tiktoken-wasm 3.1.0 → 3.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -29
- package/package.json +1 -1
- package/tiktoken_wasm_bg.wasm +0 -0
package/README.md
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
# @goliapkg/
|
|
1
|
+
# @goliapkg/tokenrs-wasm
|
|
2
2
|
|
|
3
|
-
[](https://www.npmjs.com/package/@goliapkg/tokenrs-wasm)
|
|
4
|
+
[](../LICENSE)
|
|
5
5
|
|
|
6
|
-
WebAssembly bindings for the [tiktoken](https://crates.io/crates/tiktoken) BPE tokenizer — run
|
|
6
|
+
WebAssembly bindings for the [tiktoken](https://crates.io/crates/tiktoken) BPE tokenizer — run OpenAI-compatible tokenization directly in the browser or Node.js with near-native performance.
|
|
7
7
|
|
|
8
8
|
## Install
|
|
9
9
|
|
|
10
10
|
```bash
|
|
11
|
-
npm install @goliapkg/
|
|
11
|
+
npm install @goliapkg/tokenrs-wasm
|
|
12
12
|
```
|
|
13
13
|
|
|
14
14
|
## Build from source
|
|
@@ -35,7 +35,7 @@ import init, {
|
|
|
35
35
|
estimateCost,
|
|
36
36
|
getModelInfo,
|
|
37
37
|
type Encoding,
|
|
38
|
-
} from '@goliapkg/
|
|
38
|
+
} from '@goliapkg/tokenrs-wasm'
|
|
39
39
|
|
|
40
40
|
// initialize WASM module (required once, before any other calls)
|
|
41
41
|
await init()
|
|
@@ -46,10 +46,8 @@ const tokens: Uint32Array = enc.encode('hello world')
|
|
|
46
46
|
const text: string = enc.decode(tokens) // "hello world"
|
|
47
47
|
const count: number = enc.count('hello world') // 2
|
|
48
48
|
|
|
49
|
-
// by model name
|
|
49
|
+
// by model name
|
|
50
50
|
const enc2 = encodingForModel('gpt-4o')
|
|
51
|
-
const enc3 = encodingForModel('llama-4-scout')
|
|
52
|
-
const enc4 = encodingForModel('deepseek-r1')
|
|
53
51
|
|
|
54
52
|
// cost estimation (USD)
|
|
55
53
|
const cost: number = estimateCost('gpt-4o', 1000, 500)
|
|
@@ -61,8 +59,6 @@ const info = getModelInfo('claude-opus-4')
|
|
|
61
59
|
// free WASM memory when done
|
|
62
60
|
enc.free()
|
|
63
61
|
enc2.free()
|
|
64
|
-
enc3.free()
|
|
65
|
-
enc4.free()
|
|
66
62
|
```
|
|
67
63
|
|
|
68
64
|
### Bundler Configuration
|
|
@@ -103,20 +99,11 @@ module.exports = {
|
|
|
103
99
|
|
|
104
100
|
### `getEncoding(name: string): Encoding`
|
|
105
101
|
|
|
106
|
-
Get a tokenizer by encoding name. Supported:
|
|
107
|
-
- `cl100k_base` — GPT-4, GPT-3.5-turbo
|
|
108
|
-
- `o200k_base` — GPT-4o, GPT-4.1, o1, o3
|
|
109
|
-
- `p50k_base` — text-davinci-002/003
|
|
110
|
-
- `p50k_edit` — text-davinci-edit
|
|
111
|
-
- `r50k_base` — GPT-3 (davinci, curie, etc.)
|
|
112
|
-
- `llama3` — Meta Llama 3/4
|
|
113
|
-
- `deepseek_v3` — DeepSeek V3/R1
|
|
114
|
-
- `qwen2` — Qwen 2/2.5/3
|
|
115
|
-
- `mistral_v3` — Mistral/Codestral/Pixtral
|
|
102
|
+
Get a tokenizer by encoding name. Supported: `cl100k_base`, `o200k_base`, `p50k_base`, `p50k_edit`, `r50k_base`.
|
|
116
103
|
|
|
117
104
|
### `encodingForModel(model: string): Encoding`
|
|
118
105
|
|
|
119
|
-
Get a tokenizer by model name (e.g. `gpt-4o`, `
|
|
106
|
+
Get a tokenizer by OpenAI model name (e.g. `gpt-4o`, `o3-mini`, `gpt-3.5-turbo`).
|
|
120
107
|
|
|
121
108
|
### `Encoding`
|
|
122
109
|
|
|
@@ -131,7 +118,7 @@ Get a tokenizer by model name (e.g. `gpt-4o`, `llama-4-scout`, `deepseek-r1`, `q
|
|
|
131
118
|
|
|
132
119
|
### `estimateCost(modelId, inputTokens, outputTokens): number`
|
|
133
120
|
|
|
134
|
-
Estimate API cost in USD. Supports
|
|
121
|
+
Estimate API cost in USD. Supports OpenAI, Anthropic Claude, and Google Gemini models.
|
|
135
122
|
|
|
136
123
|
### `getModelInfo(modelId): object`
|
|
137
124
|
|
|
@@ -141,13 +128,9 @@ Get model metadata: pricing, context window, max output tokens.
|
|
|
141
128
|
|
|
142
129
|
| Provider | Models |
|
|
143
130
|
|----------|--------|
|
|
144
|
-
| OpenAI | gpt-
|
|
145
|
-
| Anthropic | claude-opus-4, claude-sonnet-4, claude-
|
|
131
|
+
| OpenAI | gpt-4o, gpt-4o-mini, o1, o3, o4-mini, gpt-4-turbo, gpt-4, gpt-3.5-turbo, embeddings |
|
|
132
|
+
| Anthropic | claude-opus-4, claude-sonnet-4, claude-3.5-haiku, claude-3.5-sonnet, claude-3-opus, claude-3-haiku |
|
|
146
133
|
| Google | gemini-2.5-pro, gemini-2.5-flash, gemini-2.0-flash, gemini-1.5-pro, gemini-1.5-flash |
|
|
147
|
-
| Meta | llama-4-scout, llama-4-maverick, llama-3.3-70b, llama-3.1-405b, llama-3.1-70b, llama-3.1-8b |
|
|
148
|
-
| DeepSeek | deepseek-r1, deepseek-v3, deepseek-chat |
|
|
149
|
-
| Qwen | qwen3-235b, qwen3-32b, qwen3-30b-a3b, qwen3-14b, qwen-2.5-72b, qwen-2.5-coder-32b, qwen-turbo |
|
|
150
|
-
| Mistral | mistral-large, mistral-medium, mistral-small, codestral, pixtral-large |
|
|
151
134
|
|
|
152
135
|
## Demo
|
|
153
136
|
|
package/package.json
CHANGED
package/tiktoken_wasm_bg.wasm
CHANGED
|
Binary file
|