j_r_r_token 1.1.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -0
- data/Cargo.lock +36 -38
- data/README.md +11 -8
- data/ext/j_r_r_token/Cargo.toml +2 -2
- data/ext/j_r_r_token/src/lib.rs +32 -45
- data/j_r_r_token.gemspec +1 -1
- data/lib/j_r_r_token/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2ec532950d7c7718d14e303c9c43d18df4eb6d191bb4dd56cfc1b9dbc62cd673
|
|
4
|
+
data.tar.gz: c9bff0c4a3cd9c815b45ee7bd1362d71c12f198241bbbee605050d5addfb9feb
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ef83e1a569ff920d8cbd55d7f3109c883b2e1b7e5fe561db7f2ac3e67fe3de8d1ad4c552b9bd10c9bbfb9af783331dac0c65d7b45f49af8bc0efd7c8849f747f
|
|
7
|
+
data.tar.gz: c990330066e627ca909479fefb397480d05da12401aa23383c96e53c58bae351c2bc4e9ad535d476153507a67333e90cf6cd607e6dfd5590f716a02ce26d1acd
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,27 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
---
|
|
11
|
+
## [1.2.1] - 2026-05-13
|
|
12
|
+
### Breaking Change
|
|
13
|
+
- Minimum Ruby version raised from 2.7.0 to 3.0.0. Ruby 2.7 has been EOL since March 2023 and is no longer supported by upstream `rake-compiler-dock` (1.11.0+) which `rb_sys` uses for cross-compiling native gems. Production environments on Ruby 3.0+ are unaffected.
|
|
14
|
+
|
|
15
|
+
### New Functionality
|
|
16
|
+
- Added `o200k_harmony` tokenizer with support for `gpt-oss-20b` and `gpt-oss-120b`.
|
|
17
|
+
- Added support for additional model aliases now recognized by `tiktoken-rs` 0.11: `gpt-5-mini`, `gpt-5-nano`, `gpt-5.x` decimal variants (e.g. `gpt-5.4`, `gpt-5.4-pro`, `gpt-5.2-codex`), `gpt-4.5-*`, `o1`, `o3`, `o4-mini`, `codex-mini`, `codex-mini-latest`, and `ft:gpt-5` fine-tunes.
|
|
18
|
+
- Fine-tuned model resolution (`ft:` prefix) is now handled natively by the upstream `get_tokenizer` mapping, so any future ft: bases are picked up automatically.
|
|
19
|
+
|
|
20
|
+
### Change
|
|
21
|
+
- Bumped `tiktoken-rs` from 0.7 to 0.11.
|
|
22
|
+
- Bumped `magnus` from 0.6 to 0.8 (with the `old-api` feature enabled to preserve current bindings). Still supports Ruby >= 3.0.
|
|
23
|
+
- Refactored Rust model dispatch to delegate to `tiktoken_rs::tokenizer::get_tokenizer` instead of maintaining a parallel alias list. The gem now stays in sync with upstream OpenAI tiktoken automatically on each `tiktoken-rs` bump.
|
|
24
|
+
- CI matrix trimmed: dropped deprecated `macos-13`; consolidated to `ubuntu-latest`, `ubuntu-24.04-arm`, `macos-latest`, `macos-15-intel`. Added Ruby 4.0 to both test and cross-gem builds.
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
## [1.2.0] - 2026-05-13
|
|
28
|
+
### Note
|
|
29
|
+
- Partial release. The `arm-linux` cross-compile leg failed because upstream `rake-compiler-dock` 1.11.0+ dropped Ruby 2.7, so the published artifact set is incomplete. Please use 1.2.1 instead, which drops Ruby 2.7 from the support matrix and ships a complete set of platform gems.
|
|
30
|
+
|
|
10
31
|
---
|
|
11
32
|
## [1.1.0] - 2025-08-26
|
|
12
33
|
### New Functionality
|
data/Cargo.lock
CHANGED
|
@@ -13,9 +13,9 @@ dependencies = [
|
|
|
13
13
|
|
|
14
14
|
[[package]]
|
|
15
15
|
name = "anyhow"
|
|
16
|
-
version = "1.0.
|
|
16
|
+
version = "1.0.102"
|
|
17
17
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
18
|
-
checksum = "
|
|
18
|
+
checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
|
|
19
19
|
|
|
20
20
|
[[package]]
|
|
21
21
|
name = "base64"
|
|
@@ -25,38 +25,36 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
|
|
|
25
25
|
|
|
26
26
|
[[package]]
|
|
27
27
|
name = "bindgen"
|
|
28
|
-
version = "0.
|
|
28
|
+
version = "0.72.1"
|
|
29
29
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
30
|
-
checksum = "
|
|
30
|
+
checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
|
|
31
31
|
dependencies = [
|
|
32
32
|
"bitflags",
|
|
33
33
|
"cexpr",
|
|
34
34
|
"clang-sys",
|
|
35
35
|
"itertools",
|
|
36
|
-
"lazy_static",
|
|
37
|
-
"lazycell",
|
|
38
36
|
"proc-macro2",
|
|
39
37
|
"quote",
|
|
40
38
|
"regex",
|
|
41
|
-
"rustc-hash",
|
|
39
|
+
"rustc-hash 2.1.2",
|
|
42
40
|
"shlex",
|
|
43
41
|
"syn",
|
|
44
42
|
]
|
|
45
43
|
|
|
46
44
|
[[package]]
|
|
47
45
|
name = "bit-set"
|
|
48
|
-
version = "0.
|
|
46
|
+
version = "0.8.0"
|
|
49
47
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
50
|
-
checksum = "
|
|
48
|
+
checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
|
|
51
49
|
dependencies = [
|
|
52
50
|
"bit-vec",
|
|
53
51
|
]
|
|
54
52
|
|
|
55
53
|
[[package]]
|
|
56
54
|
name = "bit-vec"
|
|
57
|
-
version = "0.
|
|
55
|
+
version = "0.8.0"
|
|
58
56
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
59
|
-
checksum = "
|
|
57
|
+
checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
|
|
60
58
|
|
|
61
59
|
[[package]]
|
|
62
60
|
name = "bitflags"
|
|
@@ -66,9 +64,9 @@ checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
|
|
|
66
64
|
|
|
67
65
|
[[package]]
|
|
68
66
|
name = "bstr"
|
|
69
|
-
version = "1.12.
|
|
67
|
+
version = "1.12.1"
|
|
70
68
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
71
|
-
checksum = "
|
|
69
|
+
checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab"
|
|
72
70
|
dependencies = [
|
|
73
71
|
"memchr",
|
|
74
72
|
"regex-automata",
|
|
@@ -109,9 +107,9 @@ checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
|
|
109
107
|
|
|
110
108
|
[[package]]
|
|
111
109
|
name = "fancy-regex"
|
|
112
|
-
version = "0.
|
|
110
|
+
version = "0.17.0"
|
|
113
111
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
114
|
-
checksum = "
|
|
112
|
+
checksum = "72cf461f865c862bb7dc573f643dd6a2b6842f7c30b07882b56bd148cc2761b8"
|
|
115
113
|
dependencies = [
|
|
116
114
|
"bit-set",
|
|
117
115
|
"regex-automata",
|
|
@@ -147,12 +145,6 @@ version = "1.5.0"
|
|
|
147
145
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
148
146
|
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
|
|
149
147
|
|
|
150
|
-
[[package]]
|
|
151
|
-
name = "lazycell"
|
|
152
|
-
version = "1.3.0"
|
|
153
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
154
|
-
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
|
155
|
-
|
|
156
148
|
[[package]]
|
|
157
149
|
name = "libc"
|
|
158
150
|
version = "0.2.174"
|
|
@@ -171,9 +163,9 @@ dependencies = [
|
|
|
171
163
|
|
|
172
164
|
[[package]]
|
|
173
165
|
name = "magnus"
|
|
174
|
-
version = "0.
|
|
166
|
+
version = "0.8.2"
|
|
175
167
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
176
|
-
checksum = "
|
|
168
|
+
checksum = "3b36a5b126bbe97eb0d02d07acfeb327036c6319fd816139a49824a83b7f9012"
|
|
177
169
|
dependencies = [
|
|
178
170
|
"magnus-macros",
|
|
179
171
|
"rb-sys",
|
|
@@ -183,9 +175,9 @@ dependencies = [
|
|
|
183
175
|
|
|
184
176
|
[[package]]
|
|
185
177
|
name = "magnus-macros"
|
|
186
|
-
version = "0.
|
|
178
|
+
version = "0.8.0"
|
|
187
179
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
188
|
-
checksum = "
|
|
180
|
+
checksum = "47607461fd8e1513cb4f2076c197d8092d921a1ea75bd08af97398f593751892"
|
|
189
181
|
dependencies = [
|
|
190
182
|
"proc-macro2",
|
|
191
183
|
"quote",
|
|
@@ -234,18 +226,18 @@ dependencies = [
|
|
|
234
226
|
|
|
235
227
|
[[package]]
|
|
236
228
|
name = "rb-sys"
|
|
237
|
-
version = "0.9.
|
|
229
|
+
version = "0.9.128"
|
|
238
230
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
239
|
-
checksum = "
|
|
231
|
+
checksum = "45ca28513560e56cfb79a62b1fce363c73af170a182024ce880c77ee9429920a"
|
|
240
232
|
dependencies = [
|
|
241
233
|
"rb-sys-build",
|
|
242
234
|
]
|
|
243
235
|
|
|
244
236
|
[[package]]
|
|
245
237
|
name = "rb-sys-build"
|
|
246
|
-
version = "0.9.
|
|
238
|
+
version = "0.9.128"
|
|
247
239
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
248
|
-
checksum = "
|
|
240
|
+
checksum = "ce04b2c55eff3a21aaa623fcc655d94373238e72cac6b3e1a3641ff31649f99a"
|
|
249
241
|
dependencies = [
|
|
250
242
|
"bindgen",
|
|
251
243
|
"lazy_static",
|
|
@@ -258,15 +250,15 @@ dependencies = [
|
|
|
258
250
|
|
|
259
251
|
[[package]]
|
|
260
252
|
name = "rb-sys-env"
|
|
261
|
-
version = "0.
|
|
253
|
+
version = "0.2.3"
|
|
262
254
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
263
|
-
checksum = "
|
|
255
|
+
checksum = "cca7ad6a7e21e72151d56fe2495a259b5670e204c3adac41ee7ef676ea08117a"
|
|
264
256
|
|
|
265
257
|
[[package]]
|
|
266
258
|
name = "regex"
|
|
267
|
-
version = "1.
|
|
259
|
+
version = "1.12.3"
|
|
268
260
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
269
|
-
checksum = "
|
|
261
|
+
checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
|
|
270
262
|
dependencies = [
|
|
271
263
|
"aho-corasick",
|
|
272
264
|
"memchr",
|
|
@@ -276,9 +268,9 @@ dependencies = [
|
|
|
276
268
|
|
|
277
269
|
[[package]]
|
|
278
270
|
name = "regex-automata"
|
|
279
|
-
version = "0.4.
|
|
271
|
+
version = "0.4.14"
|
|
280
272
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
281
|
-
checksum = "
|
|
273
|
+
checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
|
|
282
274
|
dependencies = [
|
|
283
275
|
"aho-corasick",
|
|
284
276
|
"memchr",
|
|
@@ -297,6 +289,12 @@ version = "1.1.0"
|
|
|
297
289
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
298
290
|
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
|
299
291
|
|
|
292
|
+
[[package]]
|
|
293
|
+
name = "rustc-hash"
|
|
294
|
+
version = "2.1.2"
|
|
295
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
296
|
+
checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe"
|
|
297
|
+
|
|
300
298
|
[[package]]
|
|
301
299
|
name = "seq-macro"
|
|
302
300
|
version = "0.3.6"
|
|
@@ -348,9 +346,9 @@ dependencies = [
|
|
|
348
346
|
|
|
349
347
|
[[package]]
|
|
350
348
|
name = "tiktoken-rs"
|
|
351
|
-
version = "0.
|
|
349
|
+
version = "0.11.0"
|
|
352
350
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
353
|
-
checksum = "
|
|
351
|
+
checksum = "fac4a168cfc1d8ed65bf17a6ee0843ad9a68f863c63c0fb2fa7eab67838782ee"
|
|
354
352
|
dependencies = [
|
|
355
353
|
"anyhow",
|
|
356
354
|
"base64",
|
|
@@ -358,7 +356,7 @@ dependencies = [
|
|
|
358
356
|
"fancy-regex",
|
|
359
357
|
"lazy_static",
|
|
360
358
|
"regex",
|
|
361
|
-
"rustc-hash",
|
|
359
|
+
"rustc-hash 1.1.0",
|
|
362
360
|
]
|
|
363
361
|
|
|
364
362
|
[[package]]
|
data/README.md
CHANGED
|
@@ -6,7 +6,7 @@ JRRToken provides a high-performance, native Ruby interface for counting tokens
|
|
|
6
6
|
|
|
7
7
|
The gem ships with pre-compiled native extensions for Linux and macOS, so your end-users don't need a Rust toolchain installed.
|
|
8
8
|
|
|
9
|
-
Supports Ruby >=
|
|
9
|
+
Supports Ruby >= 3.0.0
|
|
10
10
|
|
|
11
11
|
Extensive Model Support: Includes tokenizers for all modern and legacy OpenAI models, recognizing dozens of model aliases automatically.
|
|
12
12
|
|
|
@@ -15,16 +15,16 @@ Extensive Model Support: Includes tokenizers for all modern and legacy OpenAI mo
|
|
|
15
15
|
## Install
|
|
16
16
|
|
|
17
17
|
#### Gemfile
|
|
18
|
-
`gem 'j_r_r_token', '~> 1.
|
|
18
|
+
`gem 'j_r_r_token', '~> 1.2'`
|
|
19
19
|
|
|
20
20
|
#### Command Line
|
|
21
|
-
`gem install j_r_r_token -v '~> 1.
|
|
21
|
+
`gem install j_r_r_token -v '~> 1.2'`
|
|
22
22
|
|
|
23
23
|
## Use
|
|
24
24
|
|
|
25
25
|
### Count tokens for a specific model (the model keyword is required)
|
|
26
26
|
```Ruby
|
|
27
|
-
count = JRRToken::Tokenizer.count("hello world!", model: "gpt-5")
|
|
27
|
+
count = JRRToken::Tokenizer.count("hello world!", model: "gpt-5.4")
|
|
28
28
|
```
|
|
29
29
|
|
|
30
30
|
### The gem recognizes many aliases, including older models
|
|
@@ -40,18 +40,21 @@ count = JRRToken::Tokenizer.count(1234, model: "o200k_base")
|
|
|
40
40
|
If you provide an unsupported model name, the gem will raise an `ArgumentError`.
|
|
41
41
|
|
|
42
42
|
## Supported Models
|
|
43
|
-
The gem automatically maps dozens of model names and prefixes to the correct underlying tokenizer. You don't need to know the tokenizer's base name (e.g., cl100k_base); just use the model name you're working with.
|
|
43
|
+
The gem automatically maps dozens of model names and prefixes to the correct underlying tokenizer. You don't need to know the tokenizer's base name (e.g., cl100k_base); just use the model name you're working with. Model resolution is delegated to [`tiktoken-rs`](https://github.com/zurawiki/tiktoken-rs), so the alias list stays in sync with upstream.
|
|
44
44
|
|
|
45
|
-
-
|
|
46
|
-
-
|
|
45
|
+
- o200k_harmony Models (e.g., gpt-oss-20b, gpt-oss-120b)
|
|
46
|
+
- o200k_base Models (e.g., gpt-5.4, gpt-5-mini, gpt-4.5, gpt-4.1, gpt-4o, o1, o3, o4-mini, codex-mini)
|
|
47
|
+
- cl100k_base Models (e.g., gpt-4, gpt-3.5-turbo, text-embedding-3-small)
|
|
47
48
|
- p50k_base Models (e.g., text-davinci-003)
|
|
48
49
|
- r50k_base Models (e.g., gpt-2)
|
|
49
50
|
- p50k_edit Models
|
|
50
51
|
|
|
52
|
+
Fine-tuned models (`ft:<base>:org:name:id`) are automatically resolved to the same tokenizer as their base model.
|
|
53
|
+
|
|
51
54
|
## Developing JRRToken Locally
|
|
52
55
|
|
|
53
56
|
### Install Ruby and Rust dependencies
|
|
54
|
-
Ruby >=
|
|
57
|
+
Ruby >= 3.0.0
|
|
55
58
|
|
|
56
59
|
RubyGems >= 3.0.0
|
|
57
60
|
|
data/ext/j_r_r_token/Cargo.toml
CHANGED
data/ext/j_r_r_token/src/lib.rs
CHANGED
|
@@ -1,53 +1,40 @@
|
|
|
1
1
|
use magnus::{define_module, exception, function, Error};
|
|
2
|
-
use tiktoken_rs::{
|
|
2
|
+
use tiktoken_rs::{
|
|
3
|
+
cl100k_base, o200k_base, o200k_harmony, p50k_base, p50k_edit, r50k_base,
|
|
4
|
+
tokenizer::{get_tokenizer, Tokenizer},
|
|
5
|
+
CoreBPE,
|
|
6
|
+
};
|
|
3
7
|
|
|
4
|
-
//
|
|
5
|
-
//
|
|
8
|
+
// Resolves a model name (or bare encoding name) to a CoreBPE instance.
|
|
9
|
+
// Model -> tokenizer dispatch is delegated to `tiktoken_rs::tokenizer::get_tokenizer`,
|
|
10
|
+
// which is kept in sync with upstream OpenAI tiktoken. It handles `ft:` prefix
|
|
11
|
+
// stripping, GPT-5/4.5/4.1/4o families, o1/o3/o4 reasoning models, gpt-oss
|
|
12
|
+
// (harmony), codex-mini, and all legacy aliases.
|
|
6
13
|
fn get_bpe_from_model(model: &str) -> Result<CoreBPE, Error> {
|
|
7
|
-
let
|
|
8
|
-
//
|
|
9
|
-
"
|
|
10
|
-
|
|
11
|
-
"cl100k_base"
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
"p50k_edit" | "text-davinci-edit-001" | "code-davinci-edit-001" => p50k_edit(),
|
|
28
|
-
// --- Fallback for Prefixes ---
|
|
29
|
-
_ => {
|
|
30
|
-
let o200k_prefixes = [
|
|
31
|
-
"o1-", "o3-", "o4-", "gpt-5-", "gpt-4.1-", "chatgpt-4o-", "gpt-4o-", "ft:gpt-4o",
|
|
32
|
-
];
|
|
33
|
-
|
|
34
|
-
let cl100k_prefixes = [
|
|
35
|
-
"gpt-4-", "gpt-3.5-turbo-", "gpt-35-turbo-", "ft:gpt-4:",
|
|
36
|
-
"ft:gpt-4-", "ft:gpt-3.5-turbo", "ft:davinci-002", "ft:babbage-002",
|
|
37
|
-
];
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
if o200k_prefixes.iter().any(|prefix| model.starts_with(prefix)) {
|
|
41
|
-
o200k_base()
|
|
42
|
-
} else if cl100k_prefixes.iter().any(|prefix| model.starts_with(prefix)) {
|
|
43
|
-
cl100k_base()
|
|
44
|
-
} else {
|
|
45
|
-
let err_msg = format!("Model '{}' not supported.", model);
|
|
46
|
-
return Err(Error::new(exception::arg_error(), err_msg));
|
|
14
|
+
let tokenizer_result = match model {
|
|
15
|
+
// Bare encoding names — callers may pass these directly.
|
|
16
|
+
"o200k_harmony" => o200k_harmony(),
|
|
17
|
+
"o200k_base" => o200k_base(),
|
|
18
|
+
"cl100k_base" => cl100k_base(),
|
|
19
|
+
"p50k_base" => p50k_base(),
|
|
20
|
+
"p50k_edit" => p50k_edit(),
|
|
21
|
+
"r50k_base" => r50k_base(),
|
|
22
|
+
_ => match get_tokenizer(model) {
|
|
23
|
+
Some(Tokenizer::O200kHarmony) => o200k_harmony(),
|
|
24
|
+
Some(Tokenizer::O200kBase) => o200k_base(),
|
|
25
|
+
Some(Tokenizer::Cl100kBase) => cl100k_base(),
|
|
26
|
+
Some(Tokenizer::P50kBase) => p50k_base(),
|
|
27
|
+
Some(Tokenizer::P50kEdit) => p50k_edit(),
|
|
28
|
+
Some(Tokenizer::R50kBase) | Some(Tokenizer::Gpt2) => r50k_base(),
|
|
29
|
+
None => {
|
|
30
|
+
return Err(Error::new(
|
|
31
|
+
exception::arg_error(),
|
|
32
|
+
format!("Model '{}' not supported.", model),
|
|
33
|
+
));
|
|
47
34
|
}
|
|
48
|
-
}
|
|
35
|
+
},
|
|
49
36
|
};
|
|
50
|
-
|
|
37
|
+
tokenizer_result.map_err(|e| Error::new(exception::runtime_error(), e.to_string()))
|
|
51
38
|
}
|
|
52
39
|
|
|
53
40
|
// This function is exposed to Ruby.
|
data/j_r_r_token.gemspec
CHANGED
|
@@ -12,7 +12,7 @@ Gem::Specification.new do |spec|
|
|
|
12
12
|
spec.description = "JRRToken is a Ruby gem that wraps the tiktoken Rust library, enabling fast and efficient tokenization for OpenAI models. It supports multiple models including o200k_base, cl100k_base, p50k_base, p50k_edit, and r50k_base."
|
|
13
13
|
spec.homepage = "https://github.com/LoganBresnahan/j_r_r_token"
|
|
14
14
|
spec.license = "MIT"
|
|
15
|
-
spec.required_ruby_version = ">=
|
|
15
|
+
spec.required_ruby_version = ">= 3.0.0"
|
|
16
16
|
spec.required_rubygems_version = ">= 3.0.0"
|
|
17
17
|
|
|
18
18
|
spec.metadata["allowed_push_host"] = "https://rubygems.org"
|
data/lib/j_r_r_token/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: j_r_r_token
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.1
|
|
4
|
+
version: 1.2.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Logan Bresnahan
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2026-05-13 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -90,7 +90,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
90
90
|
requirements:
|
|
91
91
|
- - ">="
|
|
92
92
|
- !ruby/object:Gem::Version
|
|
93
|
-
version:
|
|
93
|
+
version: 3.0.0
|
|
94
94
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
95
95
|
requirements:
|
|
96
96
|
- - ">="
|