@goliapkg/tiktoken-wasm 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +141 -0
- package/package.json +21 -0
- package/tiktoken_wasm.d.ts +123 -0
- package/tiktoken_wasm.js +448 -0
- package/tiktoken_wasm_bg.wasm +0 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2023 GOLIA株式会社
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# @goliapkg/tokenrs-wasm
|
|
2
|
+
|
|
3
|
+
[](https://www.npmjs.com/package/@goliapkg/tokenrs-wasm)
|
|
4
|
+
[](../LICENSE)
|
|
5
|
+
|
|
6
|
+
WebAssembly bindings for the [tiktoken](https://crates.io/crates/tiktoken) BPE tokenizer — run OpenAI-compatible tokenization directly in the browser or Node.js with near-native performance.
|
|
7
|
+
|
|
8
|
+
## Install
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
npm install @goliapkg/tokenrs-wasm
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Build from source
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
# requires wasm-pack: cargo install wasm-pack
|
|
18
|
+
cd tiktoken-wasm
|
|
19
|
+
wasm-pack build --target web --release
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Output is in `pkg/` — a complete npm-ready package containing:
|
|
23
|
+
- `tiktoken_wasm.js` — ES module with WASM loader
|
|
24
|
+
- `tiktoken_wasm_bg.wasm` — compiled WASM binary (~3 MB)
|
|
25
|
+
- `tiktoken_wasm.d.ts` — TypeScript type definitions
|
|
26
|
+
|
|
27
|
+
## Usage
|
|
28
|
+
|
|
29
|
+
### ES Module (Browser / Vite / webpack)
|
|
30
|
+
|
|
31
|
+
```typescript
|
|
32
|
+
import init, {
|
|
33
|
+
getEncoding,
|
|
34
|
+
encodingForModel,
|
|
35
|
+
estimateCost,
|
|
36
|
+
getModelInfo,
|
|
37
|
+
type Encoding,
|
|
38
|
+
} from '@goliapkg/tokenrs-wasm'
|
|
39
|
+
|
|
40
|
+
// initialize WASM module (required once, before any other calls)
|
|
41
|
+
await init()
|
|
42
|
+
|
|
43
|
+
// encode / decode
|
|
44
|
+
const enc: Encoding = getEncoding('cl100k_base')
|
|
45
|
+
const tokens: Uint32Array = enc.encode('hello world')
|
|
46
|
+
const text: string = enc.decode(tokens) // "hello world"
|
|
47
|
+
const count: number = enc.count('hello world') // 2
|
|
48
|
+
|
|
49
|
+
// by model name
|
|
50
|
+
const enc2 = encodingForModel('gpt-4o')
|
|
51
|
+
|
|
52
|
+
// cost estimation (USD)
|
|
53
|
+
const cost: number = estimateCost('gpt-4o', 1000, 500)
|
|
54
|
+
|
|
55
|
+
// model metadata
|
|
56
|
+
const info = getModelInfo('claude-opus-4')
|
|
57
|
+
// { id, provider, input_per_1m, output_per_1m, cached_input_per_1m, context_window, max_output }
|
|
58
|
+
|
|
59
|
+
// free WASM memory when done
|
|
60
|
+
enc.free()
|
|
61
|
+
enc2.free()
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
### Bundler Configuration
|
|
65
|
+
|
|
66
|
+
**Vite** — add plugins to `vite.config.ts`:
|
|
67
|
+
|
|
68
|
+
```typescript
|
|
69
|
+
import wasm from 'vite-plugin-wasm'
|
|
70
|
+
import topLevelAwait from 'vite-plugin-top-level-await'
|
|
71
|
+
|
|
72
|
+
export default defineConfig({
|
|
73
|
+
plugins: [wasm(), topLevelAwait()],
|
|
74
|
+
})
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
**webpack 5** — enable WASM experiments in `webpack.config.js`:
|
|
78
|
+
|
|
79
|
+
```javascript
|
|
80
|
+
module.exports = {
|
|
81
|
+
experiments: {
|
|
82
|
+
asyncWebAssembly: true,
|
|
83
|
+
},
|
|
84
|
+
}
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
**Next.js** — add to `next.config.js`:
|
|
88
|
+
|
|
89
|
+
```javascript
|
|
90
|
+
module.exports = {
|
|
91
|
+
webpack: (config) => {
|
|
92
|
+
config.experiments = { ...config.experiments, asyncWebAssembly: true }
|
|
93
|
+
return config
|
|
94
|
+
},
|
|
95
|
+
}
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## API Reference
|
|
99
|
+
|
|
100
|
+
### `getEncoding(name: string): Encoding`
|
|
101
|
+
|
|
102
|
+
Get a tokenizer by encoding name. Supported: `cl100k_base`, `o200k_base`, `p50k_base`, `p50k_edit`, `r50k_base`.
|
|
103
|
+
|
|
104
|
+
### `encodingForModel(model: string): Encoding`
|
|
105
|
+
|
|
106
|
+
Get a tokenizer by OpenAI model name (e.g. `gpt-4o`, `o3-mini`, `gpt-3.5-turbo`).
|
|
107
|
+
|
|
108
|
+
### `Encoding`
|
|
109
|
+
|
|
110
|
+
| Method | Returns | Description |
|
|
111
|
+
|--------|---------|-------------|
|
|
112
|
+
| `encode(text)` | `Uint32Array` | Encode text to token ids |
|
|
113
|
+
| `encodeWithSpecialTokens(text)` | `Uint32Array` | Encode with special token recognition |
|
|
114
|
+
| `decode(tokens)` | `string` | Decode token ids to text |
|
|
115
|
+
| `count(text)` | `number` | Count tokens (faster than `encode().length`) |
|
|
116
|
+
| `name` | `string` | Encoding name (getter) |
|
|
117
|
+
| `free()` | `void` | Release WASM memory |
|
|
118
|
+
|
|
119
|
+
### `estimateCost(modelId, inputTokens, outputTokens): number`
|
|
120
|
+
|
|
121
|
+
Estimate API cost in USD. Supports OpenAI, Anthropic Claude, and Google Gemini models.
|
|
122
|
+
|
|
123
|
+
### `getModelInfo(modelId): object`
|
|
124
|
+
|
|
125
|
+
Get model metadata: pricing, context window, max output tokens.
|
|
126
|
+
|
|
127
|
+
## Supported Models (pricing)
|
|
128
|
+
|
|
129
|
+
| Provider | Models |
|
|
130
|
+
|----------|--------|
|
|
131
|
+
| OpenAI | gpt-4o, gpt-4o-mini, o1, o3, o4-mini, gpt-4-turbo, gpt-4, gpt-3.5-turbo, embeddings |
|
|
132
|
+
| Anthropic | claude-opus-4, claude-sonnet-4, claude-3.5-haiku, claude-3.5-sonnet, claude-3-opus, claude-3-haiku |
|
|
133
|
+
| Google | gemini-2.5-pro, gemini-2.5-flash, gemini-2.0-flash, gemini-1.5-pro, gemini-1.5-flash |
|
|
134
|
+
|
|
135
|
+
## Demo
|
|
136
|
+
|
|
137
|
+
See [`examples/react-app`](../examples/react-app/) for a complete Vite + React demo application.
|
|
138
|
+
|
|
139
|
+
## License
|
|
140
|
+
|
|
141
|
+
[MIT](../LICENSE)
|
package/package.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@goliapkg/tiktoken-wasm",
|
|
3
|
+
"type": "module",
|
|
4
|
+
"description": "WASM bindings for the tiktoken BPE tokenizer",
|
|
5
|
+
"version": "2.1.1",
|
|
6
|
+
"license": "MIT",
|
|
7
|
+
"repository": {
|
|
8
|
+
"type": "git",
|
|
9
|
+
"url": "https://github.com/goliajp/airs"
|
|
10
|
+
},
|
|
11
|
+
"files": [
|
|
12
|
+
"tiktoken_wasm_bg.wasm",
|
|
13
|
+
"tiktoken_wasm.js",
|
|
14
|
+
"tiktoken_wasm.d.ts"
|
|
15
|
+
],
|
|
16
|
+
"main": "tiktoken_wasm.js",
|
|
17
|
+
"types": "tiktoken_wasm.d.ts",
|
|
18
|
+
"sideEffects": [
|
|
19
|
+
"./snippets/*"
|
|
20
|
+
]
|
|
21
|
+
}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
/* tslint:disable */
|
|
2
|
+
/* eslint-disable */
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* WASM wrapper around a tiktoken encoding instance.
|
|
6
|
+
*
|
|
7
|
+
* Created via [`get_encoding`] or [`encoding_for_model`].
|
|
8
|
+
* Call `.free()` when done to release WASM memory.
|
|
9
|
+
*/
|
|
10
|
+
export class Encoding {
|
|
11
|
+
private constructor();
|
|
12
|
+
free(): void;
|
|
13
|
+
[Symbol.dispose](): void;
|
|
14
|
+
/**
|
|
15
|
+
* Count tokens without building the full token id array.
|
|
16
|
+
*
|
|
17
|
+
* Faster than `encode(text).length` for cases where you only need the count.
|
|
18
|
+
*/
|
|
19
|
+
count(text: string): number;
|
|
20
|
+
/**
|
|
21
|
+
* Decode token ids back to a UTF-8 string.
|
|
22
|
+
*
|
|
23
|
+
* Uses lossy UTF-8 conversion — invalid byte sequences are replaced with U+FFFD.
|
|
24
|
+
*/
|
|
25
|
+
decode(tokens: Uint32Array): string;
|
|
26
|
+
/**
|
|
27
|
+
* Encode text into token ids (returns `Uint32Array` in JS).
|
|
28
|
+
*
|
|
29
|
+
* Special tokens like `<|endoftext|>` are treated as ordinary text.
|
|
30
|
+
* Use `encodeWithSpecialTokens()` to recognize them.
|
|
31
|
+
*/
|
|
32
|
+
encode(text: string): Uint32Array;
|
|
33
|
+
/**
|
|
34
|
+
* Encode text into token ids, recognizing special tokens.
|
|
35
|
+
*
|
|
36
|
+
* Special tokens (e.g. `<|endoftext|>`) are encoded as their designated ids
|
|
37
|
+
* instead of being split into sub-word pieces.
|
|
38
|
+
*/
|
|
39
|
+
encodeWithSpecialTokens(text: string): Uint32Array;
|
|
40
|
+
/**
|
|
41
|
+
* Get the encoding name (e.g. `"cl100k_base"`).
|
|
42
|
+
*/
|
|
43
|
+
readonly name: string;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Get an encoding for an OpenAI model name (e.g. `"gpt-4o"`, `"o3-mini"`).
|
|
48
|
+
*
|
|
49
|
+
* Automatically resolves the model name to the correct encoding.
|
|
50
|
+
* Throws `Error` for unknown model names.
|
|
51
|
+
*/
|
|
52
|
+
export function encodingForModel(model: string): Encoding;
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Estimate cost in USD for a given model, input token count, and output token count.
|
|
56
|
+
*
|
|
57
|
+
* Supports OpenAI, Anthropic Claude, and Google Gemini models.
|
|
58
|
+
* Throws `Error` for unknown model ids.
|
|
59
|
+
*/
|
|
60
|
+
export function estimateCost(model_id: string, input_tokens: number, output_tokens: number): number;
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Get an encoding by name.
|
|
64
|
+
*
|
|
65
|
+
* Supported: `"cl100k_base"`, `"o200k_base"`, `"p50k_base"`, `"p50k_edit"`, `"r50k_base"`.
|
|
66
|
+
*
|
|
67
|
+
* Throws `Error` for unknown encoding names.
|
|
68
|
+
*/
|
|
69
|
+
export function getEncoding(name: string): Encoding;
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Get model pricing and metadata as a JS object.
|
|
73
|
+
*
|
|
74
|
+
* Returns an object with: `id`, `provider`, `input_per_1m`, `output_per_1m`,
|
|
75
|
+
* `cached_input_per_1m`, `context_window`, `max_output`.
|
|
76
|
+
*
|
|
77
|
+
* Throws `Error` for unknown model ids.
|
|
78
|
+
*/
|
|
79
|
+
export function getModelInfo(model_id: string): any;
|
|
80
|
+
|
|
81
|
+
export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module;
|
|
82
|
+
|
|
83
|
+
export interface InitOutput {
|
|
84
|
+
readonly memory: WebAssembly.Memory;
|
|
85
|
+
readonly __wbg_encoding_free: (a: number, b: number) => void;
|
|
86
|
+
readonly encodingForModel: (a: number, b: number) => [number, number, number];
|
|
87
|
+
readonly encoding_count: (a: number, b: number, c: number) => number;
|
|
88
|
+
readonly encoding_decode: (a: number, b: number, c: number) => [number, number];
|
|
89
|
+
readonly encoding_encode: (a: number, b: number, c: number) => [number, number];
|
|
90
|
+
readonly encoding_encodeWithSpecialTokens: (a: number, b: number, c: number) => [number, number];
|
|
91
|
+
readonly encoding_name: (a: number) => [number, number];
|
|
92
|
+
readonly estimateCost: (a: number, b: number, c: number, d: number) => [number, number, number];
|
|
93
|
+
readonly getEncoding: (a: number, b: number) => [number, number, number];
|
|
94
|
+
readonly getModelInfo: (a: number, b: number) => [number, number, number];
|
|
95
|
+
readonly __wbindgen_malloc: (a: number, b: number) => number;
|
|
96
|
+
readonly __wbindgen_realloc: (a: number, b: number, c: number, d: number) => number;
|
|
97
|
+
readonly __wbindgen_externrefs: WebAssembly.Table;
|
|
98
|
+
readonly __externref_table_dealloc: (a: number) => void;
|
|
99
|
+
readonly __wbindgen_free: (a: number, b: number, c: number) => void;
|
|
100
|
+
readonly __wbindgen_start: () => void;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
export type SyncInitInput = BufferSource | WebAssembly.Module;
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Instantiates the given `module`, which can either be bytes or
|
|
107
|
+
* a precompiled `WebAssembly.Module`.
|
|
108
|
+
*
|
|
109
|
+
* @param {{ module: SyncInitInput }} module - Passing `SyncInitInput` directly is deprecated.
|
|
110
|
+
*
|
|
111
|
+
* @returns {InitOutput}
|
|
112
|
+
*/
|
|
113
|
+
export function initSync(module: { module: SyncInitInput } | SyncInitInput): InitOutput;
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* If `module_or_path` is {RequestInfo} or {URL}, makes a request and
|
|
117
|
+
* for everything else, calls `WebAssembly.instantiate` directly.
|
|
118
|
+
*
|
|
119
|
+
* @param {{ module_or_path: InitInput | Promise<InitInput> }} module_or_path - Passing `InitInput` directly is deprecated.
|
|
120
|
+
*
|
|
121
|
+
* @returns {Promise<InitOutput>}
|
|
122
|
+
*/
|
|
123
|
+
export default function __wbg_init (module_or_path?: { module_or_path: InitInput | Promise<InitInput> } | InitInput | Promise<InitInput>): Promise<InitOutput>;
|
package/tiktoken_wasm.js
ADDED
|
@@ -0,0 +1,448 @@
|
|
|
1
|
+
/* @ts-self-types="./tiktoken_wasm.d.ts" */
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* WASM wrapper around a tiktoken encoding instance.
|
|
5
|
+
*
|
|
6
|
+
* Created via [`get_encoding`] or [`encoding_for_model`].
|
|
7
|
+
* Call `.free()` when done to release WASM memory.
|
|
8
|
+
*/
|
|
9
|
+
export class Encoding {
|
|
10
|
+
static __wrap(ptr) {
|
|
11
|
+
ptr = ptr >>> 0;
|
|
12
|
+
const obj = Object.create(Encoding.prototype);
|
|
13
|
+
obj.__wbg_ptr = ptr;
|
|
14
|
+
EncodingFinalization.register(obj, obj.__wbg_ptr, obj);
|
|
15
|
+
return obj;
|
|
16
|
+
}
|
|
17
|
+
__destroy_into_raw() {
|
|
18
|
+
const ptr = this.__wbg_ptr;
|
|
19
|
+
this.__wbg_ptr = 0;
|
|
20
|
+
EncodingFinalization.unregister(this);
|
|
21
|
+
return ptr;
|
|
22
|
+
}
|
|
23
|
+
free() {
|
|
24
|
+
const ptr = this.__destroy_into_raw();
|
|
25
|
+
wasm.__wbg_encoding_free(ptr, 0);
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Count tokens without building the full token id array.
|
|
29
|
+
*
|
|
30
|
+
* Faster than `encode(text).length` for cases where you only need the count.
|
|
31
|
+
* @param {string} text
|
|
32
|
+
* @returns {number}
|
|
33
|
+
*/
|
|
34
|
+
count(text) {
|
|
35
|
+
const ptr0 = passStringToWasm0(text, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
36
|
+
const len0 = WASM_VECTOR_LEN;
|
|
37
|
+
const ret = wasm.encoding_count(this.__wbg_ptr, ptr0, len0);
|
|
38
|
+
return ret >>> 0;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Decode token ids back to a UTF-8 string.
|
|
42
|
+
*
|
|
43
|
+
* Uses lossy UTF-8 conversion — invalid byte sequences are replaced with U+FFFD.
|
|
44
|
+
* @param {Uint32Array} tokens
|
|
45
|
+
* @returns {string}
|
|
46
|
+
*/
|
|
47
|
+
decode(tokens) {
|
|
48
|
+
let deferred2_0;
|
|
49
|
+
let deferred2_1;
|
|
50
|
+
try {
|
|
51
|
+
const ptr0 = passArray32ToWasm0(tokens, wasm.__wbindgen_malloc);
|
|
52
|
+
const len0 = WASM_VECTOR_LEN;
|
|
53
|
+
const ret = wasm.encoding_decode(this.__wbg_ptr, ptr0, len0);
|
|
54
|
+
deferred2_0 = ret[0];
|
|
55
|
+
deferred2_1 = ret[1];
|
|
56
|
+
return getStringFromWasm0(ret[0], ret[1]);
|
|
57
|
+
} finally {
|
|
58
|
+
wasm.__wbindgen_free(deferred2_0, deferred2_1, 1);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Encode text into token ids (returns `Uint32Array` in JS).
|
|
63
|
+
*
|
|
64
|
+
* Special tokens like `<|endoftext|>` are treated as ordinary text.
|
|
65
|
+
* Use `encodeWithSpecialTokens()` to recognize them.
|
|
66
|
+
* @param {string} text
|
|
67
|
+
* @returns {Uint32Array}
|
|
68
|
+
*/
|
|
69
|
+
encode(text) {
|
|
70
|
+
const ptr0 = passStringToWasm0(text, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
71
|
+
const len0 = WASM_VECTOR_LEN;
|
|
72
|
+
const ret = wasm.encoding_encode(this.__wbg_ptr, ptr0, len0);
|
|
73
|
+
var v2 = getArrayU32FromWasm0(ret[0], ret[1]).slice();
|
|
74
|
+
wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
|
|
75
|
+
return v2;
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Encode text into token ids, recognizing special tokens.
|
|
79
|
+
*
|
|
80
|
+
* Special tokens (e.g. `<|endoftext|>`) are encoded as their designated ids
|
|
81
|
+
* instead of being split into sub-word pieces.
|
|
82
|
+
* @param {string} text
|
|
83
|
+
* @returns {Uint32Array}
|
|
84
|
+
*/
|
|
85
|
+
encodeWithSpecialTokens(text) {
|
|
86
|
+
const ptr0 = passStringToWasm0(text, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
87
|
+
const len0 = WASM_VECTOR_LEN;
|
|
88
|
+
const ret = wasm.encoding_encodeWithSpecialTokens(this.__wbg_ptr, ptr0, len0);
|
|
89
|
+
var v2 = getArrayU32FromWasm0(ret[0], ret[1]).slice();
|
|
90
|
+
wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
|
|
91
|
+
return v2;
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Get the encoding name (e.g. `"cl100k_base"`).
|
|
95
|
+
* @returns {string}
|
|
96
|
+
*/
|
|
97
|
+
get name() {
|
|
98
|
+
let deferred1_0;
|
|
99
|
+
let deferred1_1;
|
|
100
|
+
try {
|
|
101
|
+
const ret = wasm.encoding_name(this.__wbg_ptr);
|
|
102
|
+
deferred1_0 = ret[0];
|
|
103
|
+
deferred1_1 = ret[1];
|
|
104
|
+
return getStringFromWasm0(ret[0], ret[1]);
|
|
105
|
+
} finally {
|
|
106
|
+
wasm.__wbindgen_free(deferred1_0, deferred1_1, 1);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
if (Symbol.dispose) Encoding.prototype[Symbol.dispose] = Encoding.prototype.free;
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Get an encoding for an OpenAI model name (e.g. `"gpt-4o"`, `"o3-mini"`).
|
|
114
|
+
*
|
|
115
|
+
* Automatically resolves the model name to the correct encoding.
|
|
116
|
+
* Throws `Error` for unknown model names.
|
|
117
|
+
* @param {string} model
|
|
118
|
+
* @returns {Encoding}
|
|
119
|
+
*/
|
|
120
|
+
export function encodingForModel(model) {
|
|
121
|
+
const ptr0 = passStringToWasm0(model, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
122
|
+
const len0 = WASM_VECTOR_LEN;
|
|
123
|
+
const ret = wasm.encodingForModel(ptr0, len0);
|
|
124
|
+
if (ret[2]) {
|
|
125
|
+
throw takeFromExternrefTable0(ret[1]);
|
|
126
|
+
}
|
|
127
|
+
return Encoding.__wrap(ret[0]);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Estimate cost in USD for a given model, input token count, and output token count.
|
|
132
|
+
*
|
|
133
|
+
* Supports OpenAI, Anthropic Claude, and Google Gemini models.
|
|
134
|
+
* Throws `Error` for unknown model ids.
|
|
135
|
+
* @param {string} model_id
|
|
136
|
+
* @param {number} input_tokens
|
|
137
|
+
* @param {number} output_tokens
|
|
138
|
+
* @returns {number}
|
|
139
|
+
*/
|
|
140
|
+
export function estimateCost(model_id, input_tokens, output_tokens) {
|
|
141
|
+
const ptr0 = passStringToWasm0(model_id, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
142
|
+
const len0 = WASM_VECTOR_LEN;
|
|
143
|
+
const ret = wasm.estimateCost(ptr0, len0, input_tokens, output_tokens);
|
|
144
|
+
if (ret[2]) {
|
|
145
|
+
throw takeFromExternrefTable0(ret[1]);
|
|
146
|
+
}
|
|
147
|
+
return ret[0];
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Get an encoding by name.
|
|
152
|
+
*
|
|
153
|
+
* Supported: `"cl100k_base"`, `"o200k_base"`, `"p50k_base"`, `"p50k_edit"`, `"r50k_base"`.
|
|
154
|
+
*
|
|
155
|
+
* Throws `Error` for unknown encoding names.
|
|
156
|
+
* @param {string} name
|
|
157
|
+
* @returns {Encoding}
|
|
158
|
+
*/
|
|
159
|
+
export function getEncoding(name) {
|
|
160
|
+
const ptr0 = passStringToWasm0(name, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
161
|
+
const len0 = WASM_VECTOR_LEN;
|
|
162
|
+
const ret = wasm.getEncoding(ptr0, len0);
|
|
163
|
+
if (ret[2]) {
|
|
164
|
+
throw takeFromExternrefTable0(ret[1]);
|
|
165
|
+
}
|
|
166
|
+
return Encoding.__wrap(ret[0]);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Get model pricing and metadata as a JS object.
|
|
171
|
+
*
|
|
172
|
+
* Returns an object with: `id`, `provider`, `input_per_1m`, `output_per_1m`,
|
|
173
|
+
* `cached_input_per_1m`, `context_window`, `max_output`.
|
|
174
|
+
*
|
|
175
|
+
* Throws `Error` for unknown model ids.
|
|
176
|
+
* @param {string} model_id
|
|
177
|
+
* @returns {any}
|
|
178
|
+
*/
|
|
179
|
+
export function getModelInfo(model_id) {
|
|
180
|
+
const ptr0 = passStringToWasm0(model_id, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
181
|
+
const len0 = WASM_VECTOR_LEN;
|
|
182
|
+
const ret = wasm.getModelInfo(ptr0, len0);
|
|
183
|
+
if (ret[2]) {
|
|
184
|
+
throw takeFromExternrefTable0(ret[1]);
|
|
185
|
+
}
|
|
186
|
+
return takeFromExternrefTable0(ret[0]);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
function __wbg_get_imports() {
|
|
190
|
+
const import0 = {
|
|
191
|
+
__proto__: null,
|
|
192
|
+
__wbg_Error_83742b46f01ce22d: function(arg0, arg1) {
|
|
193
|
+
const ret = Error(getStringFromWasm0(arg0, arg1));
|
|
194
|
+
return ret;
|
|
195
|
+
},
|
|
196
|
+
__wbg_String_8564e559799eccda: function(arg0, arg1) {
|
|
197
|
+
const ret = String(arg1);
|
|
198
|
+
const ptr1 = passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
199
|
+
const len1 = WASM_VECTOR_LEN;
|
|
200
|
+
getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true);
|
|
201
|
+
getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true);
|
|
202
|
+
},
|
|
203
|
+
__wbg___wbindgen_throw_6ddd609b62940d55: function(arg0, arg1) {
|
|
204
|
+
throw new Error(getStringFromWasm0(arg0, arg1));
|
|
205
|
+
},
|
|
206
|
+
__wbg_new_ab79df5bd7c26067: function() {
|
|
207
|
+
const ret = new Object();
|
|
208
|
+
return ret;
|
|
209
|
+
},
|
|
210
|
+
__wbg_set_6be42768c690e380: function(arg0, arg1, arg2) {
|
|
211
|
+
arg0[arg1] = arg2;
|
|
212
|
+
},
|
|
213
|
+
__wbindgen_cast_0000000000000001: function(arg0) {
|
|
214
|
+
// Cast intrinsic for `F64 -> Externref`.
|
|
215
|
+
const ret = arg0;
|
|
216
|
+
return ret;
|
|
217
|
+
},
|
|
218
|
+
__wbindgen_cast_0000000000000002: function(arg0, arg1) {
|
|
219
|
+
// Cast intrinsic for `Ref(String) -> Externref`.
|
|
220
|
+
const ret = getStringFromWasm0(arg0, arg1);
|
|
221
|
+
return ret;
|
|
222
|
+
},
|
|
223
|
+
__wbindgen_init_externref_table: function() {
|
|
224
|
+
const table = wasm.__wbindgen_externrefs;
|
|
225
|
+
const offset = table.grow(4);
|
|
226
|
+
table.set(0, undefined);
|
|
227
|
+
table.set(offset + 0, undefined);
|
|
228
|
+
table.set(offset + 1, null);
|
|
229
|
+
table.set(offset + 2, true);
|
|
230
|
+
table.set(offset + 3, false);
|
|
231
|
+
},
|
|
232
|
+
};
|
|
233
|
+
return {
|
|
234
|
+
__proto__: null,
|
|
235
|
+
"./tiktoken_wasm_bg.js": import0,
|
|
236
|
+
};
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
const EncodingFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
240
|
+
? { register: () => {}, unregister: () => {} }
|
|
241
|
+
: new FinalizationRegistry(ptr => wasm.__wbg_encoding_free(ptr >>> 0, 1));
|
|
242
|
+
|
|
243
|
+
function getArrayU32FromWasm0(ptr, len) {
|
|
244
|
+
ptr = ptr >>> 0;
|
|
245
|
+
return getUint32ArrayMemory0().subarray(ptr / 4, ptr / 4 + len);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
let cachedDataViewMemory0 = null;
|
|
249
|
+
function getDataViewMemory0() {
|
|
250
|
+
if (cachedDataViewMemory0 === null || cachedDataViewMemory0.buffer.detached === true || (cachedDataViewMemory0.buffer.detached === undefined && cachedDataViewMemory0.buffer !== wasm.memory.buffer)) {
|
|
251
|
+
cachedDataViewMemory0 = new DataView(wasm.memory.buffer);
|
|
252
|
+
}
|
|
253
|
+
return cachedDataViewMemory0;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
function getStringFromWasm0(ptr, len) {
|
|
257
|
+
ptr = ptr >>> 0;
|
|
258
|
+
return decodeText(ptr, len);
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
let cachedUint32ArrayMemory0 = null;
|
|
262
|
+
function getUint32ArrayMemory0() {
|
|
263
|
+
if (cachedUint32ArrayMemory0 === null || cachedUint32ArrayMemory0.byteLength === 0) {
|
|
264
|
+
cachedUint32ArrayMemory0 = new Uint32Array(wasm.memory.buffer);
|
|
265
|
+
}
|
|
266
|
+
return cachedUint32ArrayMemory0;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
let cachedUint8ArrayMemory0 = null;
|
|
270
|
+
function getUint8ArrayMemory0() {
|
|
271
|
+
if (cachedUint8ArrayMemory0 === null || cachedUint8ArrayMemory0.byteLength === 0) {
|
|
272
|
+
cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer);
|
|
273
|
+
}
|
|
274
|
+
return cachedUint8ArrayMemory0;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
function passArray32ToWasm0(arg, malloc) {
|
|
278
|
+
const ptr = malloc(arg.length * 4, 4) >>> 0;
|
|
279
|
+
getUint32ArrayMemory0().set(arg, ptr / 4);
|
|
280
|
+
WASM_VECTOR_LEN = arg.length;
|
|
281
|
+
return ptr;
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
function passStringToWasm0(arg, malloc, realloc) {
|
|
285
|
+
if (realloc === undefined) {
|
|
286
|
+
const buf = cachedTextEncoder.encode(arg);
|
|
287
|
+
const ptr = malloc(buf.length, 1) >>> 0;
|
|
288
|
+
getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf);
|
|
289
|
+
WASM_VECTOR_LEN = buf.length;
|
|
290
|
+
return ptr;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
let len = arg.length;
|
|
294
|
+
let ptr = malloc(len, 1) >>> 0;
|
|
295
|
+
|
|
296
|
+
const mem = getUint8ArrayMemory0();
|
|
297
|
+
|
|
298
|
+
let offset = 0;
|
|
299
|
+
|
|
300
|
+
for (; offset < len; offset++) {
|
|
301
|
+
const code = arg.charCodeAt(offset);
|
|
302
|
+
if (code > 0x7F) break;
|
|
303
|
+
mem[ptr + offset] = code;
|
|
304
|
+
}
|
|
305
|
+
if (offset !== len) {
|
|
306
|
+
if (offset !== 0) {
|
|
307
|
+
arg = arg.slice(offset);
|
|
308
|
+
}
|
|
309
|
+
ptr = realloc(ptr, len, len = offset + arg.length * 3, 1) >>> 0;
|
|
310
|
+
const view = getUint8ArrayMemory0().subarray(ptr + offset, ptr + len);
|
|
311
|
+
const ret = cachedTextEncoder.encodeInto(arg, view);
|
|
312
|
+
|
|
313
|
+
offset += ret.written;
|
|
314
|
+
ptr = realloc(ptr, len, offset, 1) >>> 0;
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
WASM_VECTOR_LEN = offset;
|
|
318
|
+
return ptr;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
function takeFromExternrefTable0(idx) {
|
|
322
|
+
const value = wasm.__wbindgen_externrefs.get(idx);
|
|
323
|
+
wasm.__externref_table_dealloc(idx);
|
|
324
|
+
return value;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
let cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true });
|
|
328
|
+
cachedTextDecoder.decode();
|
|
329
|
+
const MAX_SAFARI_DECODE_BYTES = 2146435072;
|
|
330
|
+
let numBytesDecoded = 0;
|
|
331
|
+
function decodeText(ptr, len) {
|
|
332
|
+
numBytesDecoded += len;
|
|
333
|
+
if (numBytesDecoded >= MAX_SAFARI_DECODE_BYTES) {
|
|
334
|
+
cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true });
|
|
335
|
+
cachedTextDecoder.decode();
|
|
336
|
+
numBytesDecoded = len;
|
|
337
|
+
}
|
|
338
|
+
return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
const cachedTextEncoder = new TextEncoder();
|
|
342
|
+
|
|
343
|
+
if (!('encodeInto' in cachedTextEncoder)) {
|
|
344
|
+
cachedTextEncoder.encodeInto = function (arg, view) {
|
|
345
|
+
const buf = cachedTextEncoder.encode(arg);
|
|
346
|
+
view.set(buf);
|
|
347
|
+
return {
|
|
348
|
+
read: arg.length,
|
|
349
|
+
written: buf.length
|
|
350
|
+
};
|
|
351
|
+
};
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
let WASM_VECTOR_LEN = 0;
|
|
355
|
+
|
|
356
|
+
let wasmModule, wasm;
|
|
357
|
+
function __wbg_finalize_init(instance, module) {
|
|
358
|
+
wasm = instance.exports;
|
|
359
|
+
wasmModule = module;
|
|
360
|
+
cachedDataViewMemory0 = null;
|
|
361
|
+
cachedUint32ArrayMemory0 = null;
|
|
362
|
+
cachedUint8ArrayMemory0 = null;
|
|
363
|
+
wasm.__wbindgen_start();
|
|
364
|
+
return wasm;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
async function __wbg_load(module, imports) {
|
|
368
|
+
if (typeof Response === 'function' && module instanceof Response) {
|
|
369
|
+
if (typeof WebAssembly.instantiateStreaming === 'function') {
|
|
370
|
+
try {
|
|
371
|
+
return await WebAssembly.instantiateStreaming(module, imports);
|
|
372
|
+
} catch (e) {
|
|
373
|
+
const validResponse = module.ok && expectedResponseType(module.type);
|
|
374
|
+
|
|
375
|
+
if (validResponse && module.headers.get('Content-Type') !== 'application/wasm') {
|
|
376
|
+
console.warn("`WebAssembly.instantiateStreaming` failed because your server does not serve Wasm with `application/wasm` MIME type. Falling back to `WebAssembly.instantiate` which is slower. Original error:\n", e);
|
|
377
|
+
|
|
378
|
+
} else { throw e; }
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
const bytes = await module.arrayBuffer();
|
|
383
|
+
return await WebAssembly.instantiate(bytes, imports);
|
|
384
|
+
} else {
|
|
385
|
+
const instance = await WebAssembly.instantiate(module, imports);
|
|
386
|
+
|
|
387
|
+
if (instance instanceof WebAssembly.Instance) {
|
|
388
|
+
return { instance, module };
|
|
389
|
+
} else {
|
|
390
|
+
return instance;
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
function expectedResponseType(type) {
|
|
395
|
+
switch (type) {
|
|
396
|
+
case 'basic': case 'cors': case 'default': return true;
|
|
397
|
+
}
|
|
398
|
+
return false;
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
function initSync(module) {
|
|
403
|
+
if (wasm !== undefined) return wasm;
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
if (module !== undefined) {
|
|
407
|
+
if (Object.getPrototypeOf(module) === Object.prototype) {
|
|
408
|
+
({module} = module)
|
|
409
|
+
} else {
|
|
410
|
+
console.warn('using deprecated parameters for `initSync()`; pass a single object instead')
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
const imports = __wbg_get_imports();
|
|
415
|
+
if (!(module instanceof WebAssembly.Module)) {
|
|
416
|
+
module = new WebAssembly.Module(module);
|
|
417
|
+
}
|
|
418
|
+
const instance = new WebAssembly.Instance(module, imports);
|
|
419
|
+
return __wbg_finalize_init(instance, module);
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
async function __wbg_init(module_or_path) {
|
|
423
|
+
if (wasm !== undefined) return wasm;
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
if (module_or_path !== undefined) {
|
|
427
|
+
if (Object.getPrototypeOf(module_or_path) === Object.prototype) {
|
|
428
|
+
({module_or_path} = module_or_path)
|
|
429
|
+
} else {
|
|
430
|
+
console.warn('using deprecated parameters for the initialization function; pass a single object instead')
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
if (module_or_path === undefined) {
|
|
435
|
+
module_or_path = new URL('tiktoken_wasm_bg.wasm', import.meta.url);
|
|
436
|
+
}
|
|
437
|
+
const imports = __wbg_get_imports();
|
|
438
|
+
|
|
439
|
+
if (typeof module_or_path === 'string' || (typeof Request === 'function' && module_or_path instanceof Request) || (typeof URL === 'function' && module_or_path instanceof URL)) {
|
|
440
|
+
module_or_path = fetch(module_or_path);
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
const { instance, module } = await __wbg_load(await module_or_path, imports);
|
|
444
|
+
|
|
445
|
+
return __wbg_finalize_init(instance, module);
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
export { initSync, __wbg_init as default };
|
|
Binary file
|