tiktoken_ruby 0.0.11 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vscode/settings.json +3 -0
- data/Cargo.lock +77 -131
- data/Gemfile.lock +39 -33
- data/README.md +0 -5
- data/ext/tiktoken_ruby/Cargo.toml +3 -3
- data/ext/tiktoken_ruby/src/core_bpe_wrapper.rs +1 -1
- data/ext/tiktoken_ruby/src/lib.rs +4 -4
- data/lib/tiktoken_ruby/version.rb +1 -1
- data/lib/tiktoken_ruby.rb +16 -1
- data/tiktoken_ruby.gemspec +33 -0
- metadata +8 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 894d85b8e99040cab3c06241e9d2b3d538c0245efed323c4225f30b7e286b530
|
4
|
+
data.tar.gz: 382740d1eb3397908163411ccc2eb633f883d21c687a06073482be920986bd91
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5c9020b8d1872979e3d5df3d8bf472f8282ada8a99efb5a22f8e95e2345d4b7b01f24ab5c7530336b8b64c835593418cc8a8a637fd6c07aa4814944a543fa69a
|
7
|
+
data.tar.gz: 3310d1d4dbec01271d3cc84ad1b107d9da1206b4c1cbd7db27b4fe1a4c3b51c9d5dd8e443a9169262f09eba575d19c6f83bd0f402025bfa6bff84ca61532e7b8
|
data/.vscode/settings.json
CHANGED
data/Cargo.lock
CHANGED
@@ -13,21 +13,15 @@ dependencies = [
|
|
13
13
|
|
14
14
|
[[package]]
|
15
15
|
name = "anyhow"
|
16
|
-
version = "1.0.
|
16
|
+
version = "1.0.99"
|
17
17
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
18
|
-
checksum = "
|
19
|
-
|
20
|
-
[[package]]
|
21
|
-
name = "autocfg"
|
22
|
-
version = "1.4.0"
|
23
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
24
|
-
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
|
18
|
+
checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100"
|
25
19
|
|
26
20
|
[[package]]
|
27
21
|
name = "base64"
|
28
|
-
version = "0.
|
22
|
+
version = "0.22.1"
|
29
23
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
30
|
-
checksum = "
|
24
|
+
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
|
31
25
|
|
32
26
|
[[package]]
|
33
27
|
name = "bindgen"
|
@@ -66,15 +60,15 @@ checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
|
|
66
60
|
|
67
61
|
[[package]]
|
68
62
|
name = "bitflags"
|
69
|
-
version = "2.
|
63
|
+
version = "2.9.3"
|
70
64
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
71
|
-
checksum = "
|
65
|
+
checksum = "34efbcccd345379ca2868b2b2c9d3782e9cc58ba87bc7d79d5b53d9c9ae6f25d"
|
72
66
|
|
73
67
|
[[package]]
|
74
68
|
name = "bstr"
|
75
|
-
version = "1.
|
69
|
+
version = "1.12.0"
|
76
70
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
77
|
-
checksum = "
|
71
|
+
checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
|
78
72
|
dependencies = [
|
79
73
|
"memchr",
|
80
74
|
"regex-automata",
|
@@ -92,9 +86,9 @@ dependencies = [
|
|
92
86
|
|
93
87
|
[[package]]
|
94
88
|
name = "cfg-if"
|
95
|
-
version = "1.0.
|
89
|
+
version = "1.0.3"
|
96
90
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
97
|
-
checksum = "
|
91
|
+
checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9"
|
98
92
|
|
99
93
|
[[package]]
|
100
94
|
name = "clang-sys"
|
@@ -109,9 +103,9 @@ dependencies = [
|
|
109
103
|
|
110
104
|
[[package]]
|
111
105
|
name = "either"
|
112
|
-
version = "1.
|
106
|
+
version = "1.15.0"
|
113
107
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
114
|
-
checksum = "
|
108
|
+
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
115
109
|
|
116
110
|
[[package]]
|
117
111
|
name = "fancy-regex"
|
@@ -126,9 +120,9 @@ dependencies = [
|
|
126
120
|
|
127
121
|
[[package]]
|
128
122
|
name = "glob"
|
129
|
-
version = "0.3.
|
123
|
+
version = "0.3.3"
|
130
124
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
131
|
-
checksum = "
|
125
|
+
checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
|
132
126
|
|
133
127
|
[[package]]
|
134
128
|
name = "itertools"
|
@@ -153,35 +147,25 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
|
153
147
|
|
154
148
|
[[package]]
|
155
149
|
name = "libc"
|
156
|
-
version = "0.2.
|
150
|
+
version = "0.2.175"
|
157
151
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
158
|
-
checksum = "
|
152
|
+
checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543"
|
159
153
|
|
160
154
|
[[package]]
|
161
155
|
name = "libloading"
|
162
|
-
version = "0.8.
|
156
|
+
version = "0.8.8"
|
163
157
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
164
|
-
checksum = "
|
158
|
+
checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667"
|
165
159
|
dependencies = [
|
166
160
|
"cfg-if",
|
167
161
|
"windows-targets",
|
168
162
|
]
|
169
163
|
|
170
|
-
[[package]]
|
171
|
-
name = "lock_api"
|
172
|
-
version = "0.4.12"
|
173
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
174
|
-
checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
|
175
|
-
dependencies = [
|
176
|
-
"autocfg",
|
177
|
-
"scopeguard",
|
178
|
-
]
|
179
|
-
|
180
164
|
[[package]]
|
181
165
|
name = "magnus"
|
182
|
-
version = "0.
|
166
|
+
version = "0.8.0"
|
183
167
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
184
|
-
checksum = "
|
168
|
+
checksum = "3f14d3cc31b2dc4fce6cd447a83c7a7ca2ab8a9f1e535dcb2f796ff972b0e68b"
|
185
169
|
dependencies = [
|
186
170
|
"magnus-macros",
|
187
171
|
"rb-sys",
|
@@ -191,9 +175,9 @@ dependencies = [
|
|
191
175
|
|
192
176
|
[[package]]
|
193
177
|
name = "magnus-macros"
|
194
|
-
version = "0.
|
178
|
+
version = "0.8.0"
|
195
179
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
196
|
-
checksum = "
|
180
|
+
checksum = "47607461fd8e1513cb4f2076c197d8092d921a1ea75bd08af97398f593751892"
|
197
181
|
dependencies = [
|
198
182
|
"proc-macro2",
|
199
183
|
"quote",
|
@@ -202,9 +186,9 @@ dependencies = [
|
|
202
186
|
|
203
187
|
[[package]]
|
204
188
|
name = "memchr"
|
205
|
-
version = "2.7.
|
189
|
+
version = "2.7.5"
|
206
190
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
207
|
-
checksum = "
|
191
|
+
checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
|
208
192
|
|
209
193
|
[[package]]
|
210
194
|
name = "minimal-lexical"
|
@@ -222,61 +206,38 @@ dependencies = [
|
|
222
206
|
"minimal-lexical",
|
223
207
|
]
|
224
208
|
|
225
|
-
[[package]]
|
226
|
-
name = "parking_lot"
|
227
|
-
version = "0.12.3"
|
228
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
229
|
-
checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27"
|
230
|
-
dependencies = [
|
231
|
-
"lock_api",
|
232
|
-
"parking_lot_core",
|
233
|
-
]
|
234
|
-
|
235
|
-
[[package]]
|
236
|
-
name = "parking_lot_core"
|
237
|
-
version = "0.9.10"
|
238
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
239
|
-
checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8"
|
240
|
-
dependencies = [
|
241
|
-
"cfg-if",
|
242
|
-
"libc",
|
243
|
-
"redox_syscall",
|
244
|
-
"smallvec",
|
245
|
-
"windows-targets",
|
246
|
-
]
|
247
|
-
|
248
209
|
[[package]]
|
249
210
|
name = "proc-macro2"
|
250
|
-
version = "1.0.
|
211
|
+
version = "1.0.101"
|
251
212
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
252
|
-
checksum = "
|
213
|
+
checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
|
253
214
|
dependencies = [
|
254
215
|
"unicode-ident",
|
255
216
|
]
|
256
217
|
|
257
218
|
[[package]]
|
258
219
|
name = "quote"
|
259
|
-
version = "1.0.
|
220
|
+
version = "1.0.40"
|
260
221
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
261
|
-
checksum = "
|
222
|
+
checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
|
262
223
|
dependencies = [
|
263
224
|
"proc-macro2",
|
264
225
|
]
|
265
226
|
|
266
227
|
[[package]]
|
267
228
|
name = "rb-sys"
|
268
|
-
version = "0.9.
|
229
|
+
version = "0.9.117"
|
269
230
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
270
|
-
checksum = "
|
231
|
+
checksum = "f900d1ce4629a2ebffaf5de74bd8f9c1188d4c5ed406df02f97e22f77a006f44"
|
271
232
|
dependencies = [
|
272
233
|
"rb-sys-build",
|
273
234
|
]
|
274
235
|
|
275
236
|
[[package]]
|
276
237
|
name = "rb-sys-build"
|
277
|
-
version = "0.9.
|
238
|
+
version = "0.9.117"
|
278
239
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
279
|
-
checksum = "
|
240
|
+
checksum = "ef1e9c857028f631056bcd6d88cec390c751e343ce2223ddb26d23eb4a151d59"
|
280
241
|
dependencies = [
|
281
242
|
"bindgen",
|
282
243
|
"lazy_static",
|
@@ -289,24 +250,15 @@ dependencies = [
|
|
289
250
|
|
290
251
|
[[package]]
|
291
252
|
name = "rb-sys-env"
|
292
|
-
version = "0.
|
293
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
294
|
-
checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
|
295
|
-
|
296
|
-
[[package]]
|
297
|
-
name = "redox_syscall"
|
298
|
-
version = "0.5.8"
|
253
|
+
version = "0.2.2"
|
299
254
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
300
|
-
checksum = "
|
301
|
-
dependencies = [
|
302
|
-
"bitflags",
|
303
|
-
]
|
255
|
+
checksum = "08f8d2924cf136a1315e2b4c7460a39f62ef11ee5d522df9b2750fab55b868b6"
|
304
256
|
|
305
257
|
[[package]]
|
306
258
|
name = "regex"
|
307
|
-
version = "1.11.
|
259
|
+
version = "1.11.2"
|
308
260
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
309
|
-
checksum = "
|
261
|
+
checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912"
|
310
262
|
dependencies = [
|
311
263
|
"aho-corasick",
|
312
264
|
"memchr",
|
@@ -316,9 +268,9 @@ dependencies = [
|
|
316
268
|
|
317
269
|
[[package]]
|
318
270
|
name = "regex-automata"
|
319
|
-
version = "0.4.
|
271
|
+
version = "0.4.10"
|
320
272
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
321
|
-
checksum = "
|
273
|
+
checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6"
|
322
274
|
dependencies = [
|
323
275
|
"aho-corasick",
|
324
276
|
"memchr",
|
@@ -327,9 +279,9 @@ dependencies = [
|
|
327
279
|
|
328
280
|
[[package]]
|
329
281
|
name = "regex-syntax"
|
330
|
-
version = "0.8.
|
282
|
+
version = "0.8.6"
|
331
283
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
332
|
-
checksum = "
|
284
|
+
checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001"
|
333
285
|
|
334
286
|
[[package]]
|
335
287
|
name = "rustc-hash"
|
@@ -337,32 +289,26 @@ version = "1.1.0"
|
|
337
289
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
338
290
|
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
339
291
|
|
340
|
-
[[package]]
|
341
|
-
name = "scopeguard"
|
342
|
-
version = "1.2.0"
|
343
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
344
|
-
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
345
|
-
|
346
292
|
[[package]]
|
347
293
|
name = "seq-macro"
|
348
|
-
version = "0.3.
|
294
|
+
version = "0.3.6"
|
349
295
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
350
|
-
checksum = "
|
296
|
+
checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
|
351
297
|
|
352
298
|
[[package]]
|
353
299
|
name = "serde"
|
354
|
-
version = "1.0.
|
300
|
+
version = "1.0.219"
|
355
301
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
356
|
-
checksum = "
|
302
|
+
checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
|
357
303
|
dependencies = [
|
358
304
|
"serde_derive",
|
359
305
|
]
|
360
306
|
|
361
307
|
[[package]]
|
362
308
|
name = "serde_derive"
|
363
|
-
version = "1.0.
|
309
|
+
version = "1.0.219"
|
364
310
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
365
|
-
checksum = "
|
311
|
+
checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
|
366
312
|
dependencies = [
|
367
313
|
"proc-macro2",
|
368
314
|
"quote",
|
@@ -381,17 +327,11 @@ version = "1.3.0"
|
|
381
327
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
382
328
|
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
383
329
|
|
384
|
-
[[package]]
|
385
|
-
name = "smallvec"
|
386
|
-
version = "1.13.2"
|
387
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
388
|
-
checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
|
389
|
-
|
390
330
|
[[package]]
|
391
331
|
name = "syn"
|
392
|
-
version = "2.0.
|
332
|
+
version = "2.0.106"
|
393
333
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
394
|
-
checksum = "
|
334
|
+
checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
|
395
335
|
dependencies = [
|
396
336
|
"proc-macro2",
|
397
337
|
"quote",
|
@@ -400,16 +340,15 @@ dependencies = [
|
|
400
340
|
|
401
341
|
[[package]]
|
402
342
|
name = "tiktoken-rs"
|
403
|
-
version = "0.
|
343
|
+
version = "0.7.0"
|
404
344
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
405
|
-
checksum = "
|
345
|
+
checksum = "25563eeba904d770acf527e8b370fe9a5547bacd20ff84a0b6c3bc41288e5625"
|
406
346
|
dependencies = [
|
407
347
|
"anyhow",
|
408
348
|
"base64",
|
409
349
|
"bstr",
|
410
350
|
"fancy-regex",
|
411
351
|
"lazy_static",
|
412
|
-
"parking_lot",
|
413
352
|
"regex",
|
414
353
|
"rustc-hash",
|
415
354
|
]
|
@@ -425,16 +364,23 @@ dependencies = [
|
|
425
364
|
|
426
365
|
[[package]]
|
427
366
|
name = "unicode-ident"
|
428
|
-
version = "1.0.
|
367
|
+
version = "1.0.18"
|
368
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
369
|
+
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
|
370
|
+
|
371
|
+
[[package]]
|
372
|
+
name = "windows-link"
|
373
|
+
version = "0.1.3"
|
429
374
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
430
|
-
checksum = "
|
375
|
+
checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
|
431
376
|
|
432
377
|
[[package]]
|
433
378
|
name = "windows-targets"
|
434
|
-
version = "0.
|
379
|
+
version = "0.53.3"
|
435
380
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
436
|
-
checksum = "
|
381
|
+
checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91"
|
437
382
|
dependencies = [
|
383
|
+
"windows-link",
|
438
384
|
"windows_aarch64_gnullvm",
|
439
385
|
"windows_aarch64_msvc",
|
440
386
|
"windows_i686_gnu",
|
@@ -447,48 +393,48 @@ dependencies = [
|
|
447
393
|
|
448
394
|
[[package]]
|
449
395
|
name = "windows_aarch64_gnullvm"
|
450
|
-
version = "0.
|
396
|
+
version = "0.53.0"
|
451
397
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
452
|
-
checksum = "
|
398
|
+
checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764"
|
453
399
|
|
454
400
|
[[package]]
|
455
401
|
name = "windows_aarch64_msvc"
|
456
|
-
version = "0.
|
402
|
+
version = "0.53.0"
|
457
403
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
458
|
-
checksum = "
|
404
|
+
checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c"
|
459
405
|
|
460
406
|
[[package]]
|
461
407
|
name = "windows_i686_gnu"
|
462
|
-
version = "0.
|
408
|
+
version = "0.53.0"
|
463
409
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
464
|
-
checksum = "
|
410
|
+
checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3"
|
465
411
|
|
466
412
|
[[package]]
|
467
413
|
name = "windows_i686_gnullvm"
|
468
|
-
version = "0.
|
414
|
+
version = "0.53.0"
|
469
415
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
470
|
-
checksum = "
|
416
|
+
checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11"
|
471
417
|
|
472
418
|
[[package]]
|
473
419
|
name = "windows_i686_msvc"
|
474
|
-
version = "0.
|
420
|
+
version = "0.53.0"
|
475
421
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
476
|
-
checksum = "
|
422
|
+
checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d"
|
477
423
|
|
478
424
|
[[package]]
|
479
425
|
name = "windows_x86_64_gnu"
|
480
|
-
version = "0.
|
426
|
+
version = "0.53.0"
|
481
427
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
482
|
-
checksum = "
|
428
|
+
checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba"
|
483
429
|
|
484
430
|
[[package]]
|
485
431
|
name = "windows_x86_64_gnullvm"
|
486
|
-
version = "0.
|
432
|
+
version = "0.53.0"
|
487
433
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
488
|
-
checksum = "
|
434
|
+
checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57"
|
489
435
|
|
490
436
|
[[package]]
|
491
437
|
name = "windows_x86_64_msvc"
|
492
|
-
version = "0.
|
438
|
+
version = "0.53.0"
|
493
439
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
494
|
-
checksum = "
|
440
|
+
checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
|
data/Gemfile.lock
CHANGED
@@ -1,74 +1,80 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
tiktoken_ruby (0.0.
|
5
|
-
rb_sys (
|
4
|
+
tiktoken_ruby (0.0.12)
|
5
|
+
rb_sys (~> 0.9)
|
6
6
|
|
7
7
|
GEM
|
8
8
|
remote: https://rubygems.org/
|
9
9
|
specs:
|
10
|
-
ast (2.4.
|
11
|
-
diff-lcs (1.
|
12
|
-
json (2.
|
13
|
-
language_server-protocol (3.17.0.
|
10
|
+
ast (2.4.3)
|
11
|
+
diff-lcs (1.6.2)
|
12
|
+
json (2.12.0)
|
13
|
+
language_server-protocol (3.17.0.5)
|
14
14
|
lint_roller (1.1.0)
|
15
|
-
minitest (5.
|
16
|
-
parallel (1.
|
17
|
-
parser (3.3.
|
15
|
+
minitest (5.25.5)
|
16
|
+
parallel (1.27.0)
|
17
|
+
parser (3.3.8.0)
|
18
18
|
ast (~> 2.4.1)
|
19
19
|
racc
|
20
|
+
prism (1.4.0)
|
20
21
|
racc (1.8.1)
|
21
22
|
rainbow (3.1.1)
|
22
|
-
rake (13.
|
23
|
-
rake-compiler (1.
|
23
|
+
rake (13.3.0)
|
24
|
+
rake-compiler (1.3.0)
|
24
25
|
rake
|
25
|
-
|
26
|
+
rake-compiler-dock (1.9.1)
|
27
|
+
rb_sys (0.9.117)
|
28
|
+
rake-compiler-dock (= 1.9.1)
|
26
29
|
regexp_parser (2.10.0)
|
27
|
-
rspec (3.13.
|
30
|
+
rspec (3.13.1)
|
28
31
|
rspec-core (~> 3.13.0)
|
29
32
|
rspec-expectations (~> 3.13.0)
|
30
33
|
rspec-mocks (~> 3.13.0)
|
31
|
-
rspec-core (3.13.
|
34
|
+
rspec-core (3.13.5)
|
32
35
|
rspec-support (~> 3.13.0)
|
33
|
-
rspec-expectations (3.13.
|
36
|
+
rspec-expectations (3.13.5)
|
34
37
|
diff-lcs (>= 1.2.0, < 2.0)
|
35
38
|
rspec-support (~> 3.13.0)
|
36
|
-
rspec-mocks (3.13.
|
39
|
+
rspec-mocks (3.13.5)
|
37
40
|
diff-lcs (>= 1.2.0, < 2.0)
|
38
41
|
rspec-support (~> 3.13.0)
|
39
|
-
rspec-support (3.13.
|
40
|
-
rubocop (1.
|
42
|
+
rspec-support (3.13.5)
|
43
|
+
rubocop (1.75.7)
|
41
44
|
json (~> 2.3)
|
42
|
-
language_server-protocol (
|
45
|
+
language_server-protocol (~> 3.17.0.2)
|
46
|
+
lint_roller (~> 1.1.0)
|
43
47
|
parallel (~> 1.10)
|
44
48
|
parser (>= 3.3.0.2)
|
45
49
|
rainbow (>= 2.2.2, < 4.0)
|
46
50
|
regexp_parser (>= 2.9.3, < 3.0)
|
47
|
-
rubocop-ast (>= 1.
|
51
|
+
rubocop-ast (>= 1.44.0, < 2.0)
|
48
52
|
ruby-progressbar (~> 1.7)
|
49
53
|
unicode-display_width (>= 2.4.0, < 4.0)
|
50
|
-
rubocop-ast (1.
|
51
|
-
parser (>= 3.3.
|
52
|
-
|
53
|
-
|
54
|
-
|
54
|
+
rubocop-ast (1.44.1)
|
55
|
+
parser (>= 3.3.7.2)
|
56
|
+
prism (~> 1.4)
|
57
|
+
rubocop-performance (1.25.0)
|
58
|
+
lint_roller (~> 1.1)
|
59
|
+
rubocop (>= 1.75.0, < 2.0)
|
60
|
+
rubocop-ast (>= 1.38.0, < 2.0)
|
55
61
|
ruby-progressbar (1.13.0)
|
56
|
-
standard (1.
|
62
|
+
standard (1.50.0)
|
57
63
|
language_server-protocol (~> 3.17.0.2)
|
58
64
|
lint_roller (~> 1.0)
|
59
|
-
rubocop (~> 1.
|
65
|
+
rubocop (~> 1.75.5)
|
60
66
|
standard-custom (~> 1.0.0)
|
61
|
-
standard-performance (~> 1.
|
67
|
+
standard-performance (~> 1.8)
|
62
68
|
standard-custom (1.0.2)
|
63
69
|
lint_roller (~> 1.0)
|
64
70
|
rubocop (~> 1.50)
|
65
|
-
standard-performance (1.
|
71
|
+
standard-performance (1.8.0)
|
66
72
|
lint_roller (~> 1.1)
|
67
|
-
rubocop-performance (~> 1.
|
68
|
-
unicode-display_width (3.1.
|
73
|
+
rubocop-performance (~> 1.25.0)
|
74
|
+
unicode-display_width (3.1.4)
|
69
75
|
unicode-emoji (~> 4.0, >= 4.0.4)
|
70
76
|
unicode-emoji (4.0.4)
|
71
|
-
yard (0.9.
|
77
|
+
yard (0.9.37)
|
72
78
|
yard-doctest (0.1.17)
|
73
79
|
minitest
|
74
80
|
yard
|
@@ -89,4 +95,4 @@ DEPENDENCIES
|
|
89
95
|
yard-doctest
|
90
96
|
|
91
97
|
BUNDLED WITH
|
92
|
-
2.
|
98
|
+
2.6.9
|
data/README.md
CHANGED
@@ -5,11 +5,6 @@
|
|
5
5
|
[Tiktoken](https://github.com/openai/tiktoken) is BPE tokenizer from OpenAI used with their GPT models.
|
6
6
|
This is a wrapper around it aimed primarily at enabling accurate counts of GPT model tokens used.
|
7
7
|
|
8
|
-
## Request for maintainers
|
9
|
-
|
10
|
-
I can't really put substantial time into maintaining this. Probably nothing more than a couple hours every few months. If you have experience maintaining ruby gems and would like to
|
11
|
-
lend a hand please send me an email or reply to this [issue](https://github.com/IAPark/tiktoken_ruby/issues/26)
|
12
|
-
|
13
8
|
## Installation
|
14
9
|
|
15
10
|
Install the gem and add to the application's Gemfile by executing:
|
@@ -10,6 +10,6 @@ publish = false
|
|
10
10
|
crate-type = ["cdylib"]
|
11
11
|
|
12
12
|
[dependencies]
|
13
|
-
magnus = { version = "0.
|
14
|
-
rb-sys = { version = "0.9.
|
15
|
-
tiktoken-rs = { version = "0.
|
13
|
+
magnus = { version = "0.8.0" }
|
14
|
+
rb-sys = { version = "0.9.117", features = ["stable-api-compiled-fallback"] }
|
15
|
+
tiktoken-rs = { version = "0.7.0" }
|
@@ -27,7 +27,7 @@ impl CoreBPEWrapper {
|
|
27
27
|
let allowed_special: Vec<&str> = allowed_special.iter().map(|s| s.as_str()).collect();
|
28
28
|
let allowed_special: HashSet<&str> = HashSet::from_iter(allowed_special.iter().cloned());
|
29
29
|
|
30
|
-
Ok(self.core_bpe.encode(text.as_str(), allowed_special))
|
30
|
+
Ok(self.core_bpe.encode(text.as_str(), &allowed_special).0)
|
31
31
|
}
|
32
32
|
|
33
33
|
pub fn encode_with_special_tokens(&self, text: String) -> Vec<Rank> {
|
@@ -1,7 +1,7 @@
|
|
1
1
|
mod core_bpe_wrapper;
|
2
2
|
|
3
3
|
use core_bpe_wrapper::CoreBPEWrapper;
|
4
|
-
use magnus::{
|
4
|
+
use magnus::{function, method, prelude::*, Error, ExceptionClass, RModule, Ruby};
|
5
5
|
|
6
6
|
fn r50k_base() -> CoreBPEWrapper {
|
7
7
|
let core_bpe = tiktoken_rs::r50k_base().unwrap();
|
@@ -26,11 +26,11 @@ fn o200k_base() -> CoreBPEWrapper {
|
|
26
26
|
}
|
27
27
|
|
28
28
|
fn module() -> Result<RModule, magnus::Error> {
|
29
|
-
define_module("Tiktoken")
|
29
|
+
Ruby::get().unwrap().define_module("Tiktoken")
|
30
30
|
}
|
31
31
|
|
32
32
|
fn uncicode_error() -> Result<ExceptionClass, magnus::Error> {
|
33
|
-
module()?.define_error("UnicodeError",
|
33
|
+
module()?.define_error("UnicodeError", Ruby::get().unwrap().exception_standard_error())
|
34
34
|
}
|
35
35
|
|
36
36
|
#[magnus::init]
|
@@ -45,7 +45,7 @@ fn init() -> Result<(), Error> {
|
|
45
45
|
factory_module.define_singleton_method("o200k_base", function!(o200k_base, 0))?;
|
46
46
|
|
47
47
|
let ext_module = module.define_module("Ext")?;
|
48
|
-
let bpe_class = ext_module.define_class("CoreBPE",
|
48
|
+
let bpe_class = ext_module.define_class("CoreBPE", Ruby::get().unwrap().class_object())?;
|
49
49
|
|
50
50
|
bpe_class.define_method(
|
51
51
|
"encode_ordinary",
|
data/lib/tiktoken_ruby.rb
CHANGED
@@ -73,11 +73,17 @@ module Tiktoken
|
|
73
73
|
# https://github.com/Congyuwang/tiktoken-rs/blob/main/tiktoken-rs/src/tokenizer.rs#L50
|
74
74
|
# is the source of the mapping for the Rust library
|
75
75
|
MODEL_TO_ENCODING_NAME = {
|
76
|
+
# reasoning
|
77
|
+
o1: "o200k_base",
|
78
|
+
o3: "o200k_base",
|
79
|
+
"o4-mini": "o200k_base",
|
76
80
|
# chat
|
77
|
-
"
|
81
|
+
"gpt-4.1": "o200k_base",
|
82
|
+
"chatgpt-4o": "o200k_base",
|
78
83
|
"gpt-4o": "o200k_base",
|
79
84
|
"gpt-4": "cl100k_base",
|
80
85
|
"gpt-3.5-turbo": "cl100k_base",
|
86
|
+
"gpt-3.5": "cl100k_base", # Common shorthand
|
81
87
|
"gpt-35-turbo": "cl100k_base", # Azure deployment name
|
82
88
|
# base
|
83
89
|
"davinci-002": "cl100k_base",
|
@@ -124,12 +130,21 @@ module Tiktoken
|
|
124
130
|
}
|
125
131
|
|
126
132
|
MODEL_PREFIX_TO_ENCODING = {
|
133
|
+
# reasoning
|
134
|
+
"o1-": "o200k_base",
|
135
|
+
"o3-": "o200k_base",
|
136
|
+
"o4-": "o200k_base",
|
127
137
|
# chat
|
138
|
+
"gpt-5-": "o200k_base",
|
139
|
+
"gpt-4.5-": "o200k_base",
|
140
|
+
"gpt-4.1-": "o200k_base",
|
141
|
+
"chatgpt-4o-": "o200k_base",
|
128
142
|
"gpt-4o-": "o200k_base", # e.g., gpt-4o-2024-05-13, etc.
|
129
143
|
"gpt-4-": "cl100k_base", # e.g., gpt-4-0314, etc., plus gpt-4-32k
|
130
144
|
"gpt-3.5-turbo-": "cl100k_base", # e.g, gpt-3.5-turbo-0301, -0401, etc.
|
131
145
|
"gpt-35-turbo-": "cl100k_base", # Azure deployment name
|
132
146
|
# fine-tuned
|
147
|
+
"ft:gpt-4o": "cl100k_base",
|
133
148
|
"ft:gpt-4": "cl100k_base",
|
134
149
|
"ft:gpt-3.5-turbo": "cl100k_base",
|
135
150
|
"ft:davinci-002": "cl100k_base",
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "lib/tiktoken_ruby/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "tiktoken_ruby"
|
7
|
+
spec.version = Tiktoken::VERSION
|
8
|
+
spec.authors = ["IAPark"]
|
9
|
+
spec.email = ["isaac.a.park@gmail.com"]
|
10
|
+
spec.summary = "Ruby wrapper for Tiktoken"
|
11
|
+
spec.description = "An unofficial Ruby wrapper for Tiktoken, " \
|
12
|
+
"a BPE tokenizer written by and used by OpenAI. It can be used to " \
|
13
|
+
"count the number of tokens in text before sending it to OpenAI APIs."
|
14
|
+
spec.homepage = "https://github.com/IAPark/tiktoken_ruby"
|
15
|
+
spec.license = "MIT"
|
16
|
+
spec.required_ruby_version = ">= 3.1.0"
|
17
|
+
spec.required_rubygems_version = ">= 3.4.0"
|
18
|
+
spec.platform = Gem::Platform::RUBY
|
19
|
+
|
20
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
21
|
+
spec.metadata["source_code_uri"] = "https://github.com/IAPark/tiktoken_ruby"
|
22
|
+
spec.metadata["documentation_uri"] = "https://rubydoc.info/github/IAPark/tiktoken_ruby/main"
|
23
|
+
spec.files = Dir.chdir(__dir__) do
|
24
|
+
`git ls-files -z`.split("\x0").reject do |f|
|
25
|
+
(f == __FILE__) || f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|circleci)|appveyor)})
|
26
|
+
end
|
27
|
+
end
|
28
|
+
spec.bindir = "exe"
|
29
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
30
|
+
spec.require_paths = ["lib"]
|
31
|
+
spec.extensions = ["ext/tiktoken_ruby/extconf.rb"]
|
32
|
+
spec.add_dependency "rb_sys", "~> 0.9"
|
33
|
+
end
|
metadata
CHANGED
@@ -1,29 +1,28 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tiktoken_ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- IAPark
|
8
|
-
autorequire:
|
9
8
|
bindir: exe
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
13
|
name: rb_sys
|
15
14
|
requirement: !ruby/object:Gem::Requirement
|
16
15
|
requirements:
|
17
|
-
- -
|
16
|
+
- - "~>"
|
18
17
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.9
|
18
|
+
version: '0.9'
|
20
19
|
type: :runtime
|
21
20
|
prerelease: false
|
22
21
|
version_requirements: !ruby/object:Gem::Requirement
|
23
22
|
requirements:
|
24
|
-
- -
|
23
|
+
- - "~>"
|
25
24
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.9
|
25
|
+
version: '0.9'
|
27
26
|
description: An unofficial Ruby wrapper for Tiktoken, a BPE tokenizer written by and
|
28
27
|
used by OpenAI. It can be used to count the number of tokens in text before sending
|
29
28
|
it to OpenAI APIs.
|
@@ -54,6 +53,7 @@ files:
|
|
54
53
|
- lib/tiktoken_ruby/version.rb
|
55
54
|
- script/release
|
56
55
|
- sig/tiktoken_ruby.rbs
|
56
|
+
- tiktoken_ruby.gemspec
|
57
57
|
homepage: https://github.com/IAPark/tiktoken_ruby
|
58
58
|
licenses:
|
59
59
|
- MIT
|
@@ -61,7 +61,6 @@ metadata:
|
|
61
61
|
homepage_uri: https://github.com/IAPark/tiktoken_ruby
|
62
62
|
source_code_uri: https://github.com/IAPark/tiktoken_ruby
|
63
63
|
documentation_uri: https://rubydoc.info/github/IAPark/tiktoken_ruby/main
|
64
|
-
post_install_message:
|
65
64
|
rdoc_options: []
|
66
65
|
require_paths:
|
67
66
|
- lib
|
@@ -76,8 +75,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
76
75
|
- !ruby/object:Gem::Version
|
77
76
|
version: 3.4.0
|
78
77
|
requirements: []
|
79
|
-
rubygems_version: 3.
|
80
|
-
signing_key:
|
78
|
+
rubygems_version: 3.6.9
|
81
79
|
specification_version: 4
|
82
80
|
summary: Ruby wrapper for Tiktoken
|
83
81
|
test_files: []
|