tiktoken_ruby 0.0.11.1 → 0.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vscode/settings.json +3 -0
- data/Cargo.lock +66 -176
- data/Gemfile.lock +42 -36
- data/README.md +0 -5
- data/ext/tiktoken_ruby/Cargo.toml +3 -3
- data/ext/tiktoken_ruby/src/core_bpe_wrapper.rs +1 -1
- data/ext/tiktoken_ruby/src/lib.rs +13 -4
- data/lib/tiktoken_ruby/version.rb +1 -1
- data/lib/tiktoken_ruby.rb +19 -2
- metadata +7 -10
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 6eb1bf0a15715fcd42136ff3378a0b2b5676a06dd817fbbe59e83287c9a7e996
|
|
4
|
+
data.tar.gz: 30172f2328b4c98c62963c5ddc5afda840eb0f938e632d40e5aafc1eb7521693
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 2db1488ee96bcfc94cbe48617c1f37f54ef74e532dafd50d4c89fc56940dcd3ca83cc3bd4c49ebc8edae71998e37046af62c8e67a8c2a97b3d316ec534acec62
|
|
7
|
+
data.tar.gz: 9ce2ca75ba2ba65c375c562717cdd19c6a3e41c127964775ce49eb1e3664954c530348eba71888b037d7b3ae2568f6fe1b52bd887b27e72294e5530ab54f8f8f
|
data/.vscode/settings.json
CHANGED
data/Cargo.lock
CHANGED
|
@@ -4,30 +4,24 @@ version = 4
|
|
|
4
4
|
|
|
5
5
|
[[package]]
|
|
6
6
|
name = "aho-corasick"
|
|
7
|
-
version = "1.1.
|
|
7
|
+
version = "1.1.4"
|
|
8
8
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
9
|
-
checksum = "
|
|
9
|
+
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
|
|
10
10
|
dependencies = [
|
|
11
11
|
"memchr",
|
|
12
12
|
]
|
|
13
13
|
|
|
14
14
|
[[package]]
|
|
15
15
|
name = "anyhow"
|
|
16
|
-
version = "1.0.
|
|
16
|
+
version = "1.0.100"
|
|
17
17
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
18
|
-
checksum = "
|
|
19
|
-
|
|
20
|
-
[[package]]
|
|
21
|
-
name = "autocfg"
|
|
22
|
-
version = "1.4.0"
|
|
23
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
24
|
-
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
|
|
18
|
+
checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
|
|
25
19
|
|
|
26
20
|
[[package]]
|
|
27
21
|
name = "base64"
|
|
28
|
-
version = "0.
|
|
22
|
+
version = "0.22.1"
|
|
29
23
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
30
|
-
checksum = "
|
|
24
|
+
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
|
|
31
25
|
|
|
32
26
|
[[package]]
|
|
33
27
|
name = "bindgen"
|
|
@@ -66,15 +60,15 @@ checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
|
|
|
66
60
|
|
|
67
61
|
[[package]]
|
|
68
62
|
name = "bitflags"
|
|
69
|
-
version = "2.
|
|
63
|
+
version = "2.10.0"
|
|
70
64
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
71
|
-
checksum = "
|
|
65
|
+
checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
|
|
72
66
|
|
|
73
67
|
[[package]]
|
|
74
68
|
name = "bstr"
|
|
75
|
-
version = "1.
|
|
69
|
+
version = "1.12.1"
|
|
76
70
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
77
|
-
checksum = "
|
|
71
|
+
checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab"
|
|
78
72
|
dependencies = [
|
|
79
73
|
"memchr",
|
|
80
74
|
"regex-automata",
|
|
@@ -92,9 +86,9 @@ dependencies = [
|
|
|
92
86
|
|
|
93
87
|
[[package]]
|
|
94
88
|
name = "cfg-if"
|
|
95
|
-
version = "1.0.
|
|
89
|
+
version = "1.0.4"
|
|
96
90
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
97
|
-
checksum = "
|
|
91
|
+
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
|
|
98
92
|
|
|
99
93
|
[[package]]
|
|
100
94
|
name = "clang-sys"
|
|
@@ -109,9 +103,9 @@ dependencies = [
|
|
|
109
103
|
|
|
110
104
|
[[package]]
|
|
111
105
|
name = "either"
|
|
112
|
-
version = "1.
|
|
106
|
+
version = "1.15.0"
|
|
113
107
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
114
|
-
checksum = "
|
|
108
|
+
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
|
115
109
|
|
|
116
110
|
[[package]]
|
|
117
111
|
name = "fancy-regex"
|
|
@@ -126,9 +120,9 @@ dependencies = [
|
|
|
126
120
|
|
|
127
121
|
[[package]]
|
|
128
122
|
name = "glob"
|
|
129
|
-
version = "0.3.
|
|
123
|
+
version = "0.3.3"
|
|
130
124
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
131
|
-
checksum = "
|
|
125
|
+
checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
|
|
132
126
|
|
|
133
127
|
[[package]]
|
|
134
128
|
name = "itertools"
|
|
@@ -153,35 +147,25 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
|
|
153
147
|
|
|
154
148
|
[[package]]
|
|
155
149
|
name = "libc"
|
|
156
|
-
version = "0.2.
|
|
150
|
+
version = "0.2.177"
|
|
157
151
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
158
|
-
checksum = "
|
|
152
|
+
checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976"
|
|
159
153
|
|
|
160
154
|
[[package]]
|
|
161
155
|
name = "libloading"
|
|
162
|
-
version = "0.8.
|
|
156
|
+
version = "0.8.9"
|
|
163
157
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
164
|
-
checksum = "
|
|
158
|
+
checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
|
|
165
159
|
dependencies = [
|
|
166
160
|
"cfg-if",
|
|
167
|
-
"windows-
|
|
168
|
-
]
|
|
169
|
-
|
|
170
|
-
[[package]]
|
|
171
|
-
name = "lock_api"
|
|
172
|
-
version = "0.4.12"
|
|
173
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
174
|
-
checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
|
|
175
|
-
dependencies = [
|
|
176
|
-
"autocfg",
|
|
177
|
-
"scopeguard",
|
|
161
|
+
"windows-link",
|
|
178
162
|
]
|
|
179
163
|
|
|
180
164
|
[[package]]
|
|
181
165
|
name = "magnus"
|
|
182
|
-
version = "0.
|
|
166
|
+
version = "0.8.2"
|
|
183
167
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
184
|
-
checksum = "
|
|
168
|
+
checksum = "3b36a5b126bbe97eb0d02d07acfeb327036c6319fd816139a49824a83b7f9012"
|
|
185
169
|
dependencies = [
|
|
186
170
|
"magnus-macros",
|
|
187
171
|
"rb-sys",
|
|
@@ -191,9 +175,9 @@ dependencies = [
|
|
|
191
175
|
|
|
192
176
|
[[package]]
|
|
193
177
|
name = "magnus-macros"
|
|
194
|
-
version = "0.
|
|
178
|
+
version = "0.8.0"
|
|
195
179
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
196
|
-
checksum = "
|
|
180
|
+
checksum = "47607461fd8e1513cb4f2076c197d8092d921a1ea75bd08af97398f593751892"
|
|
197
181
|
dependencies = [
|
|
198
182
|
"proc-macro2",
|
|
199
183
|
"quote",
|
|
@@ -202,9 +186,9 @@ dependencies = [
|
|
|
202
186
|
|
|
203
187
|
[[package]]
|
|
204
188
|
name = "memchr"
|
|
205
|
-
version = "2.7.
|
|
189
|
+
version = "2.7.6"
|
|
206
190
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
207
|
-
checksum = "
|
|
191
|
+
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
|
|
208
192
|
|
|
209
193
|
[[package]]
|
|
210
194
|
name = "minimal-lexical"
|
|
@@ -222,61 +206,38 @@ dependencies = [
|
|
|
222
206
|
"minimal-lexical",
|
|
223
207
|
]
|
|
224
208
|
|
|
225
|
-
[[package]]
|
|
226
|
-
name = "parking_lot"
|
|
227
|
-
version = "0.12.3"
|
|
228
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
229
|
-
checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27"
|
|
230
|
-
dependencies = [
|
|
231
|
-
"lock_api",
|
|
232
|
-
"parking_lot_core",
|
|
233
|
-
]
|
|
234
|
-
|
|
235
|
-
[[package]]
|
|
236
|
-
name = "parking_lot_core"
|
|
237
|
-
version = "0.9.10"
|
|
238
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
239
|
-
checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8"
|
|
240
|
-
dependencies = [
|
|
241
|
-
"cfg-if",
|
|
242
|
-
"libc",
|
|
243
|
-
"redox_syscall",
|
|
244
|
-
"smallvec",
|
|
245
|
-
"windows-targets",
|
|
246
|
-
]
|
|
247
|
-
|
|
248
209
|
[[package]]
|
|
249
210
|
name = "proc-macro2"
|
|
250
|
-
version = "1.0.
|
|
211
|
+
version = "1.0.103"
|
|
251
212
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
252
|
-
checksum = "
|
|
213
|
+
checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
|
|
253
214
|
dependencies = [
|
|
254
215
|
"unicode-ident",
|
|
255
216
|
]
|
|
256
217
|
|
|
257
218
|
[[package]]
|
|
258
219
|
name = "quote"
|
|
259
|
-
version = "1.0.
|
|
220
|
+
version = "1.0.42"
|
|
260
221
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
261
|
-
checksum = "
|
|
222
|
+
checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
|
|
262
223
|
dependencies = [
|
|
263
224
|
"proc-macro2",
|
|
264
225
|
]
|
|
265
226
|
|
|
266
227
|
[[package]]
|
|
267
228
|
name = "rb-sys"
|
|
268
|
-
version = "0.9.
|
|
229
|
+
version = "0.9.117"
|
|
269
230
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
270
|
-
checksum = "
|
|
231
|
+
checksum = "f900d1ce4629a2ebffaf5de74bd8f9c1188d4c5ed406df02f97e22f77a006f44"
|
|
271
232
|
dependencies = [
|
|
272
233
|
"rb-sys-build",
|
|
273
234
|
]
|
|
274
235
|
|
|
275
236
|
[[package]]
|
|
276
237
|
name = "rb-sys-build"
|
|
277
|
-
version = "0.9.
|
|
238
|
+
version = "0.9.117"
|
|
278
239
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
279
|
-
checksum = "
|
|
240
|
+
checksum = "ef1e9c857028f631056bcd6d88cec390c751e343ce2223ddb26d23eb4a151d59"
|
|
280
241
|
dependencies = [
|
|
281
242
|
"bindgen",
|
|
282
243
|
"lazy_static",
|
|
@@ -289,24 +250,15 @@ dependencies = [
|
|
|
289
250
|
|
|
290
251
|
[[package]]
|
|
291
252
|
name = "rb-sys-env"
|
|
292
|
-
version = "0.
|
|
293
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
294
|
-
checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
|
|
295
|
-
|
|
296
|
-
[[package]]
|
|
297
|
-
name = "redox_syscall"
|
|
298
|
-
version = "0.5.8"
|
|
253
|
+
version = "0.2.2"
|
|
299
254
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
300
|
-
checksum = "
|
|
301
|
-
dependencies = [
|
|
302
|
-
"bitflags",
|
|
303
|
-
]
|
|
255
|
+
checksum = "08f8d2924cf136a1315e2b4c7460a39f62ef11ee5d522df9b2750fab55b868b6"
|
|
304
256
|
|
|
305
257
|
[[package]]
|
|
306
258
|
name = "regex"
|
|
307
|
-
version = "1.
|
|
259
|
+
version = "1.12.2"
|
|
308
260
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
309
|
-
checksum = "
|
|
261
|
+
checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
|
|
310
262
|
dependencies = [
|
|
311
263
|
"aho-corasick",
|
|
312
264
|
"memchr",
|
|
@@ -316,9 +268,9 @@ dependencies = [
|
|
|
316
268
|
|
|
317
269
|
[[package]]
|
|
318
270
|
name = "regex-automata"
|
|
319
|
-
version = "0.4.
|
|
271
|
+
version = "0.4.13"
|
|
320
272
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
321
|
-
checksum = "
|
|
273
|
+
checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
|
|
322
274
|
dependencies = [
|
|
323
275
|
"aho-corasick",
|
|
324
276
|
"memchr",
|
|
@@ -327,9 +279,9 @@ dependencies = [
|
|
|
327
279
|
|
|
328
280
|
[[package]]
|
|
329
281
|
name = "regex-syntax"
|
|
330
|
-
version = "0.8.
|
|
282
|
+
version = "0.8.8"
|
|
331
283
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
332
|
-
checksum = "
|
|
284
|
+
checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
|
|
333
285
|
|
|
334
286
|
[[package]]
|
|
335
287
|
name = "rustc-hash"
|
|
@@ -338,31 +290,34 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
|
338
290
|
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
|
339
291
|
|
|
340
292
|
[[package]]
|
|
341
|
-
name = "
|
|
342
|
-
version = "
|
|
293
|
+
name = "seq-macro"
|
|
294
|
+
version = "0.3.6"
|
|
343
295
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
344
|
-
checksum = "
|
|
296
|
+
checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
|
|
345
297
|
|
|
346
298
|
[[package]]
|
|
347
|
-
name = "
|
|
348
|
-
version = "0.
|
|
299
|
+
name = "serde"
|
|
300
|
+
version = "1.0.228"
|
|
349
301
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
350
|
-
checksum = "
|
|
302
|
+
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
|
|
303
|
+
dependencies = [
|
|
304
|
+
"serde_core",
|
|
305
|
+
]
|
|
351
306
|
|
|
352
307
|
[[package]]
|
|
353
|
-
name = "
|
|
354
|
-
version = "1.0.
|
|
308
|
+
name = "serde_core"
|
|
309
|
+
version = "1.0.228"
|
|
355
310
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
356
|
-
checksum = "
|
|
311
|
+
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
|
|
357
312
|
dependencies = [
|
|
358
313
|
"serde_derive",
|
|
359
314
|
]
|
|
360
315
|
|
|
361
316
|
[[package]]
|
|
362
317
|
name = "serde_derive"
|
|
363
|
-
version = "1.0.
|
|
318
|
+
version = "1.0.228"
|
|
364
319
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
365
|
-
checksum = "
|
|
320
|
+
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
|
|
366
321
|
dependencies = [
|
|
367
322
|
"proc-macro2",
|
|
368
323
|
"quote",
|
|
@@ -381,17 +336,11 @@ version = "1.3.0"
|
|
|
381
336
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
382
337
|
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
|
383
338
|
|
|
384
|
-
[[package]]
|
|
385
|
-
name = "smallvec"
|
|
386
|
-
version = "1.13.2"
|
|
387
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
388
|
-
checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
|
|
389
|
-
|
|
390
339
|
[[package]]
|
|
391
340
|
name = "syn"
|
|
392
|
-
version = "2.0.
|
|
341
|
+
version = "2.0.110"
|
|
393
342
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
394
|
-
checksum = "
|
|
343
|
+
checksum = "a99801b5bd34ede4cf3fc688c5919368fea4e4814a4664359503e6015b280aea"
|
|
395
344
|
dependencies = [
|
|
396
345
|
"proc-macro2",
|
|
397
346
|
"quote",
|
|
@@ -400,16 +349,15 @@ dependencies = [
|
|
|
400
349
|
|
|
401
350
|
[[package]]
|
|
402
351
|
name = "tiktoken-rs"
|
|
403
|
-
version = "0.
|
|
352
|
+
version = "0.9.1"
|
|
404
353
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
405
|
-
checksum = "
|
|
354
|
+
checksum = "3a19830747d9034cd9da43a60eaa8e552dfda7712424aebf187b7a60126bae0d"
|
|
406
355
|
dependencies = [
|
|
407
356
|
"anyhow",
|
|
408
357
|
"base64",
|
|
409
358
|
"bstr",
|
|
410
359
|
"fancy-regex",
|
|
411
360
|
"lazy_static",
|
|
412
|
-
"parking_lot",
|
|
413
361
|
"regex",
|
|
414
362
|
"rustc-hash",
|
|
415
363
|
]
|
|
@@ -425,70 +373,12 @@ dependencies = [
|
|
|
425
373
|
|
|
426
374
|
[[package]]
|
|
427
375
|
name = "unicode-ident"
|
|
428
|
-
version = "1.0.
|
|
429
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
430
|
-
checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
|
|
431
|
-
|
|
432
|
-
[[package]]
|
|
433
|
-
name = "windows-targets"
|
|
434
|
-
version = "0.52.6"
|
|
435
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
436
|
-
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
|
|
437
|
-
dependencies = [
|
|
438
|
-
"windows_aarch64_gnullvm",
|
|
439
|
-
"windows_aarch64_msvc",
|
|
440
|
-
"windows_i686_gnu",
|
|
441
|
-
"windows_i686_gnullvm",
|
|
442
|
-
"windows_i686_msvc",
|
|
443
|
-
"windows_x86_64_gnu",
|
|
444
|
-
"windows_x86_64_gnullvm",
|
|
445
|
-
"windows_x86_64_msvc",
|
|
446
|
-
]
|
|
447
|
-
|
|
448
|
-
[[package]]
|
|
449
|
-
name = "windows_aarch64_gnullvm"
|
|
450
|
-
version = "0.52.6"
|
|
451
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
452
|
-
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
|
|
453
|
-
|
|
454
|
-
[[package]]
|
|
455
|
-
name = "windows_aarch64_msvc"
|
|
456
|
-
version = "0.52.6"
|
|
376
|
+
version = "1.0.22"
|
|
457
377
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
458
|
-
checksum = "
|
|
378
|
+
checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
|
|
459
379
|
|
|
460
380
|
[[package]]
|
|
461
|
-
name = "
|
|
462
|
-
version = "0.
|
|
463
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
464
|
-
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
|
|
465
|
-
|
|
466
|
-
[[package]]
|
|
467
|
-
name = "windows_i686_gnullvm"
|
|
468
|
-
version = "0.52.6"
|
|
469
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
470
|
-
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
|
|
471
|
-
|
|
472
|
-
[[package]]
|
|
473
|
-
name = "windows_i686_msvc"
|
|
474
|
-
version = "0.52.6"
|
|
475
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
476
|
-
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
|
|
477
|
-
|
|
478
|
-
[[package]]
|
|
479
|
-
name = "windows_x86_64_gnu"
|
|
480
|
-
version = "0.52.6"
|
|
481
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
482
|
-
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
|
|
483
|
-
|
|
484
|
-
[[package]]
|
|
485
|
-
name = "windows_x86_64_gnullvm"
|
|
486
|
-
version = "0.52.6"
|
|
487
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
488
|
-
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
|
|
489
|
-
|
|
490
|
-
[[package]]
|
|
491
|
-
name = "windows_x86_64_msvc"
|
|
492
|
-
version = "0.52.6"
|
|
381
|
+
name = "windows-link"
|
|
382
|
+
version = "0.2.1"
|
|
493
383
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
494
|
-
checksum = "
|
|
384
|
+
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
data/Gemfile.lock
CHANGED
|
@@ -1,74 +1,80 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
tiktoken_ruby (0.0.
|
|
5
|
-
rb_sys (
|
|
4
|
+
tiktoken_ruby (0.0.13)
|
|
5
|
+
rb_sys (~> 0.9)
|
|
6
6
|
|
|
7
7
|
GEM
|
|
8
8
|
remote: https://rubygems.org/
|
|
9
9
|
specs:
|
|
10
|
-
ast (2.4.
|
|
11
|
-
diff-lcs (1.
|
|
12
|
-
json (2.
|
|
13
|
-
language_server-protocol (3.17.0.
|
|
10
|
+
ast (2.4.3)
|
|
11
|
+
diff-lcs (1.6.2)
|
|
12
|
+
json (2.15.0)
|
|
13
|
+
language_server-protocol (3.17.0.5)
|
|
14
14
|
lint_roller (1.1.0)
|
|
15
|
-
minitest (5.
|
|
16
|
-
parallel (1.
|
|
17
|
-
parser (3.3.
|
|
15
|
+
minitest (5.25.5)
|
|
16
|
+
parallel (1.27.0)
|
|
17
|
+
parser (3.3.9.0)
|
|
18
18
|
ast (~> 2.4.1)
|
|
19
19
|
racc
|
|
20
|
+
prism (1.5.1)
|
|
20
21
|
racc (1.8.1)
|
|
21
22
|
rainbow (3.1.1)
|
|
22
|
-
rake (13.
|
|
23
|
-
rake-compiler (1.
|
|
23
|
+
rake (13.3.1)
|
|
24
|
+
rake-compiler (1.3.0)
|
|
24
25
|
rake
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
26
|
+
rake-compiler-dock (1.9.1)
|
|
27
|
+
rb_sys (0.9.117)
|
|
28
|
+
rake-compiler-dock (= 1.9.1)
|
|
29
|
+
regexp_parser (2.11.3)
|
|
30
|
+
rspec (3.13.2)
|
|
28
31
|
rspec-core (~> 3.13.0)
|
|
29
32
|
rspec-expectations (~> 3.13.0)
|
|
30
33
|
rspec-mocks (~> 3.13.0)
|
|
31
|
-
rspec-core (3.13.
|
|
34
|
+
rspec-core (3.13.6)
|
|
32
35
|
rspec-support (~> 3.13.0)
|
|
33
|
-
rspec-expectations (3.13.
|
|
36
|
+
rspec-expectations (3.13.5)
|
|
34
37
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
35
38
|
rspec-support (~> 3.13.0)
|
|
36
|
-
rspec-mocks (3.13.
|
|
39
|
+
rspec-mocks (3.13.7)
|
|
37
40
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
38
41
|
rspec-support (~> 3.13.0)
|
|
39
|
-
rspec-support (3.13.
|
|
40
|
-
rubocop (1.
|
|
42
|
+
rspec-support (3.13.6)
|
|
43
|
+
rubocop (1.80.2)
|
|
41
44
|
json (~> 2.3)
|
|
42
|
-
language_server-protocol (
|
|
45
|
+
language_server-protocol (~> 3.17.0.2)
|
|
46
|
+
lint_roller (~> 1.1.0)
|
|
43
47
|
parallel (~> 1.10)
|
|
44
48
|
parser (>= 3.3.0.2)
|
|
45
49
|
rainbow (>= 2.2.2, < 4.0)
|
|
46
50
|
regexp_parser (>= 2.9.3, < 3.0)
|
|
47
|
-
rubocop-ast (>= 1.
|
|
51
|
+
rubocop-ast (>= 1.46.0, < 2.0)
|
|
48
52
|
ruby-progressbar (~> 1.7)
|
|
49
53
|
unicode-display_width (>= 2.4.0, < 4.0)
|
|
50
|
-
rubocop-ast (1.
|
|
51
|
-
parser (>= 3.3.
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
54
|
+
rubocop-ast (1.47.1)
|
|
55
|
+
parser (>= 3.3.7.2)
|
|
56
|
+
prism (~> 1.4)
|
|
57
|
+
rubocop-performance (1.25.0)
|
|
58
|
+
lint_roller (~> 1.1)
|
|
59
|
+
rubocop (>= 1.75.0, < 2.0)
|
|
60
|
+
rubocop-ast (>= 1.38.0, < 2.0)
|
|
55
61
|
ruby-progressbar (1.13.0)
|
|
56
|
-
standard (1.
|
|
62
|
+
standard (1.51.1)
|
|
57
63
|
language_server-protocol (~> 3.17.0.2)
|
|
58
64
|
lint_roller (~> 1.0)
|
|
59
|
-
rubocop (~> 1.
|
|
65
|
+
rubocop (~> 1.80.2)
|
|
60
66
|
standard-custom (~> 1.0.0)
|
|
61
|
-
standard-performance (~> 1.
|
|
67
|
+
standard-performance (~> 1.8)
|
|
62
68
|
standard-custom (1.0.2)
|
|
63
69
|
lint_roller (~> 1.0)
|
|
64
70
|
rubocop (~> 1.50)
|
|
65
|
-
standard-performance (1.
|
|
71
|
+
standard-performance (1.8.0)
|
|
66
72
|
lint_roller (~> 1.1)
|
|
67
|
-
rubocop-performance (~> 1.
|
|
68
|
-
unicode-display_width (3.
|
|
69
|
-
unicode-emoji (~> 4.
|
|
70
|
-
unicode-emoji (4.0
|
|
71
|
-
yard (0.9.
|
|
73
|
+
rubocop-performance (~> 1.25.0)
|
|
74
|
+
unicode-display_width (3.2.0)
|
|
75
|
+
unicode-emoji (~> 4.1)
|
|
76
|
+
unicode-emoji (4.1.0)
|
|
77
|
+
yard (0.9.37)
|
|
72
78
|
yard-doctest (0.1.17)
|
|
73
79
|
minitest
|
|
74
80
|
yard
|
|
@@ -89,4 +95,4 @@ DEPENDENCIES
|
|
|
89
95
|
yard-doctest
|
|
90
96
|
|
|
91
97
|
BUNDLED WITH
|
|
92
|
-
2.
|
|
98
|
+
2.6.9
|
data/README.md
CHANGED
|
@@ -5,11 +5,6 @@
|
|
|
5
5
|
[Tiktoken](https://github.com/openai/tiktoken) is BPE tokenizer from OpenAI used with their GPT models.
|
|
6
6
|
This is a wrapper around it aimed primarily at enabling accurate counts of GPT model tokens used.
|
|
7
7
|
|
|
8
|
-
## Request for maintainers
|
|
9
|
-
|
|
10
|
-
I can't really put substantial time into maintaining this. Probably nothing more than a couple hours every few months. If you have experience maintaining ruby gems and would like to
|
|
11
|
-
lend a hand please send me an email or reply to this [issue](https://github.com/IAPark/tiktoken_ruby/issues/26)
|
|
12
|
-
|
|
13
8
|
## Installation
|
|
14
9
|
|
|
15
10
|
Install the gem and add to the application's Gemfile by executing:
|
|
@@ -10,6 +10,6 @@ publish = false
|
|
|
10
10
|
crate-type = ["cdylib"]
|
|
11
11
|
|
|
12
12
|
[dependencies]
|
|
13
|
-
magnus = { version = "0.
|
|
14
|
-
rb-sys = { version = "0.9.
|
|
15
|
-
tiktoken-rs = { version = "0.
|
|
13
|
+
magnus = { version = "0.8.2" }
|
|
14
|
+
rb-sys = { version = "0.9.117", features = ["stable-api-compiled-fallback"] }
|
|
15
|
+
tiktoken-rs = { version = "0.9.0" }
|
|
@@ -27,7 +27,7 @@ impl CoreBPEWrapper {
|
|
|
27
27
|
let allowed_special: Vec<&str> = allowed_special.iter().map(|s| s.as_str()).collect();
|
|
28
28
|
let allowed_special: HashSet<&str> = HashSet::from_iter(allowed_special.iter().cloned());
|
|
29
29
|
|
|
30
|
-
Ok(self.core_bpe.encode(text.as_str(), allowed_special))
|
|
30
|
+
Ok(self.core_bpe.encode(text.as_str(), &allowed_special).0)
|
|
31
31
|
}
|
|
32
32
|
|
|
33
33
|
pub fn encode_with_special_tokens(&self, text: String) -> Vec<Rank> {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
mod core_bpe_wrapper;
|
|
2
2
|
|
|
3
3
|
use core_bpe_wrapper::CoreBPEWrapper;
|
|
4
|
-
use magnus::{
|
|
4
|
+
use magnus::{function, method, prelude::*, Error, ExceptionClass, RModule, Ruby};
|
|
5
5
|
|
|
6
6
|
fn r50k_base() -> CoreBPEWrapper {
|
|
7
7
|
let core_bpe = tiktoken_rs::r50k_base().unwrap();
|
|
@@ -25,12 +25,20 @@ fn o200k_base() -> CoreBPEWrapper {
|
|
|
25
25
|
CoreBPEWrapper::new(core_bpe)
|
|
26
26
|
}
|
|
27
27
|
|
|
28
|
+
fn o200k_harmony() -> CoreBPEWrapper {
|
|
29
|
+
let core_bpe = tiktoken_rs::o200k_harmony().unwrap();
|
|
30
|
+
CoreBPEWrapper::new(core_bpe)
|
|
31
|
+
}
|
|
32
|
+
|
|
28
33
|
fn module() -> Result<RModule, magnus::Error> {
|
|
29
|
-
define_module("Tiktoken")
|
|
34
|
+
Ruby::get().unwrap().define_module("Tiktoken")
|
|
30
35
|
}
|
|
31
36
|
|
|
32
37
|
fn uncicode_error() -> Result<ExceptionClass, magnus::Error> {
|
|
33
|
-
module()?.define_error(
|
|
38
|
+
module()?.define_error(
|
|
39
|
+
"UnicodeError",
|
|
40
|
+
Ruby::get().unwrap().exception_standard_error(),
|
|
41
|
+
)
|
|
34
42
|
}
|
|
35
43
|
|
|
36
44
|
#[magnus::init]
|
|
@@ -43,9 +51,10 @@ fn init() -> Result<(), Error> {
|
|
|
43
51
|
factory_module.define_singleton_method("p50k_edit", function!(p50k_edit, 0))?;
|
|
44
52
|
factory_module.define_singleton_method("cl100k_base", function!(cl100k_base, 0))?;
|
|
45
53
|
factory_module.define_singleton_method("o200k_base", function!(o200k_base, 0))?;
|
|
54
|
+
factory_module.define_singleton_method("o200k_harmony", function!(o200k_harmony, 0))?;
|
|
46
55
|
|
|
47
56
|
let ext_module = module.define_module("Ext")?;
|
|
48
|
-
let bpe_class = ext_module.define_class("CoreBPE",
|
|
57
|
+
let bpe_class = ext_module.define_class("CoreBPE", Ruby::get().unwrap().class_object())?;
|
|
49
58
|
|
|
50
59
|
bpe_class.define_method(
|
|
51
60
|
"encode_ordinary",
|
data/lib/tiktoken_ruby.rb
CHANGED
|
@@ -65,7 +65,8 @@ module Tiktoken
|
|
|
65
65
|
:p50k_base,
|
|
66
66
|
:p50k_edit,
|
|
67
67
|
:cl100k_base,
|
|
68
|
-
:o200k_base
|
|
68
|
+
:o200k_base,
|
|
69
|
+
:o200k_harmony
|
|
69
70
|
]
|
|
70
71
|
|
|
71
72
|
# taken from the python library here https://github.com/openai/tiktoken/blob/main/tiktoken/model.py
|
|
@@ -73,11 +74,17 @@ module Tiktoken
|
|
|
73
74
|
# https://github.com/Congyuwang/tiktoken-rs/blob/main/tiktoken-rs/src/tokenizer.rs#L50
|
|
74
75
|
# is the source of the mapping for the Rust library
|
|
75
76
|
MODEL_TO_ENCODING_NAME = {
|
|
77
|
+
# reasoning
|
|
78
|
+
o1: "o200k_base",
|
|
79
|
+
o3: "o200k_base",
|
|
80
|
+
"o4-mini": "o200k_base",
|
|
76
81
|
# chat
|
|
77
|
-
"
|
|
82
|
+
"gpt-4.1": "o200k_base",
|
|
83
|
+
"chatgpt-4o": "o200k_base",
|
|
78
84
|
"gpt-4o": "o200k_base",
|
|
79
85
|
"gpt-4": "cl100k_base",
|
|
80
86
|
"gpt-3.5-turbo": "cl100k_base",
|
|
87
|
+
"gpt-3.5": "cl100k_base", # Common shorthand
|
|
81
88
|
"gpt-35-turbo": "cl100k_base", # Azure deployment name
|
|
82
89
|
# base
|
|
83
90
|
"davinci-002": "cl100k_base",
|
|
@@ -124,12 +131,22 @@ module Tiktoken
|
|
|
124
131
|
}
|
|
125
132
|
|
|
126
133
|
MODEL_PREFIX_TO_ENCODING = {
|
|
134
|
+
# reasoning
|
|
135
|
+
"o1-": "o200k_base",
|
|
136
|
+
"o3-": "o200k_base",
|
|
137
|
+
"o4-": "o200k_base",
|
|
127
138
|
# chat
|
|
139
|
+
"gpt-5-": "o200k_base",
|
|
140
|
+
"gpt-4.5-": "o200k_base",
|
|
141
|
+
"gpt-4.1-": "o200k_base",
|
|
142
|
+
"chatgpt-4o-": "o200k_base",
|
|
128
143
|
"gpt-4o-": "o200k_base", # e.g., gpt-4o-2024-05-13, etc.
|
|
129
144
|
"gpt-4-": "cl100k_base", # e.g., gpt-4-0314, etc., plus gpt-4-32k
|
|
130
145
|
"gpt-3.5-turbo-": "cl100k_base", # e.g, gpt-3.5-turbo-0301, -0401, etc.
|
|
131
146
|
"gpt-35-turbo-": "cl100k_base", # Azure deployment name
|
|
147
|
+
"gpt-oss-": "o200k_harmony",
|
|
132
148
|
# fine-tuned
|
|
149
|
+
"ft:gpt-4o": "cl100k_base",
|
|
133
150
|
"ft:gpt-4": "cl100k_base",
|
|
134
151
|
"ft:gpt-3.5-turbo": "cl100k_base",
|
|
135
152
|
"ft:davinci-002": "cl100k_base",
|
metadata
CHANGED
|
@@ -1,29 +1,28 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: tiktoken_ruby
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.13
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- IAPark
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: exe
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
12
|
- !ruby/object:Gem::Dependency
|
|
14
13
|
name: rb_sys
|
|
15
14
|
requirement: !ruby/object:Gem::Requirement
|
|
16
15
|
requirements:
|
|
17
|
-
- -
|
|
16
|
+
- - "~>"
|
|
18
17
|
- !ruby/object:Gem::Version
|
|
19
|
-
version: 0.9
|
|
18
|
+
version: '0.9'
|
|
20
19
|
type: :runtime
|
|
21
20
|
prerelease: false
|
|
22
21
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
22
|
requirements:
|
|
24
|
-
- -
|
|
23
|
+
- - "~>"
|
|
25
24
|
- !ruby/object:Gem::Version
|
|
26
|
-
version: 0.9
|
|
25
|
+
version: '0.9'
|
|
27
26
|
description: An unofficial Ruby wrapper for Tiktoken, a BPE tokenizer written by and
|
|
28
27
|
used by OpenAI. It can be used to count the number of tokens in text before sending
|
|
29
28
|
it to OpenAI APIs.
|
|
@@ -61,7 +60,6 @@ metadata:
|
|
|
61
60
|
homepage_uri: https://github.com/IAPark/tiktoken_ruby
|
|
62
61
|
source_code_uri: https://github.com/IAPark/tiktoken_ruby
|
|
63
62
|
documentation_uri: https://rubydoc.info/github/IAPark/tiktoken_ruby/main
|
|
64
|
-
post_install_message:
|
|
65
63
|
rdoc_options: []
|
|
66
64
|
require_paths:
|
|
67
65
|
- lib
|
|
@@ -76,8 +74,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
76
74
|
- !ruby/object:Gem::Version
|
|
77
75
|
version: 3.4.0
|
|
78
76
|
requirements: []
|
|
79
|
-
rubygems_version: 3.
|
|
80
|
-
signing_key:
|
|
77
|
+
rubygems_version: 3.6.9
|
|
81
78
|
specification_version: 4
|
|
82
79
|
summary: Ruby wrapper for Tiktoken
|
|
83
80
|
test_files: []
|