tiktoken_ruby 0.0.9 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vscode/settings.json +5 -0
- data/Cargo.lock +78 -76
- data/Gemfile.lock +40 -40
- data/Rakefile +6 -0
- data/ext/tiktoken_ruby/Cargo.toml +3 -3
- data/ext/tiktoken_ruby/src/core_bpe_wrapper.rs +17 -14
- data/lib/tiktoken_ruby/version.rb +1 -1
- data/lib/tiktoken_ruby.rb +4 -1
- data/script/release +43 -0
- metadata +9 -8
- data/tiktoken_ruby.gemspec +0 -33
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9997f4334fdaff90a631036be451cad90eb58ce6919a9592de2ca09d7f8baf9e
|
|
4
|
+
data.tar.gz: 160c540bf8a76278ebcabfe80c50e9fdb5802a0742d4692295e659ba8626b492
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ea271891e7ca2fbfb637da4945c5f6da55f72b8c39efe20c4f83d5d059d2b9997f4344feadbd6109a29859729e4bba245fe51338e77c548e9e9a24bd1981a6e9
|
|
7
|
+
data.tar.gz: 3b6eeda82acaa6b6abb324e911196bce53d20a2bbec7750e07cb3b9d7a8381ed9ebd90b68a4e770e8bb1b4be332748216bc653d31389591348bc5192656e417f
|
data/Cargo.lock
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# This file is automatically @generated by Cargo.
|
|
2
2
|
# It is not intended for manual editing.
|
|
3
|
-
version =
|
|
3
|
+
version = 4
|
|
4
4
|
|
|
5
5
|
[[package]]
|
|
6
6
|
name = "aho-corasick"
|
|
@@ -13,15 +13,15 @@ dependencies = [
|
|
|
13
13
|
|
|
14
14
|
[[package]]
|
|
15
15
|
name = "anyhow"
|
|
16
|
-
version = "1.0.
|
|
16
|
+
version = "1.0.95"
|
|
17
17
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
18
|
-
checksum = "
|
|
18
|
+
checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04"
|
|
19
19
|
|
|
20
20
|
[[package]]
|
|
21
21
|
name = "autocfg"
|
|
22
|
-
version = "1.
|
|
22
|
+
version = "1.4.0"
|
|
23
23
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
24
|
-
checksum = "
|
|
24
|
+
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
|
|
25
25
|
|
|
26
26
|
[[package]]
|
|
27
27
|
name = "base64"
|
|
@@ -31,9 +31,9 @@ checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"
|
|
|
31
31
|
|
|
32
32
|
[[package]]
|
|
33
33
|
name = "bindgen"
|
|
34
|
-
version = "0.69.
|
|
34
|
+
version = "0.69.5"
|
|
35
35
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
36
|
-
checksum = "
|
|
36
|
+
checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088"
|
|
37
37
|
dependencies = [
|
|
38
38
|
"bitflags",
|
|
39
39
|
"cexpr",
|
|
@@ -66,15 +66,15 @@ checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
|
|
|
66
66
|
|
|
67
67
|
[[package]]
|
|
68
68
|
name = "bitflags"
|
|
69
|
-
version = "2.
|
|
69
|
+
version = "2.6.0"
|
|
70
70
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
71
|
-
checksum = "
|
|
71
|
+
checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
|
|
72
72
|
|
|
73
73
|
[[package]]
|
|
74
74
|
name = "bstr"
|
|
75
|
-
version = "1.
|
|
75
|
+
version = "1.11.1"
|
|
76
76
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
77
|
-
checksum = "
|
|
77
|
+
checksum = "786a307d683a5bf92e6fd5fd69a7eb613751668d1d8d67d802846dfe367c62c8"
|
|
78
78
|
dependencies = [
|
|
79
79
|
"memchr",
|
|
80
80
|
"regex-automata",
|
|
@@ -98,9 +98,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
|
|
98
98
|
|
|
99
99
|
[[package]]
|
|
100
100
|
name = "clang-sys"
|
|
101
|
-
version = "1.
|
|
101
|
+
version = "1.8.1"
|
|
102
102
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
103
|
-
checksum = "
|
|
103
|
+
checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
|
|
104
104
|
dependencies = [
|
|
105
105
|
"glob",
|
|
106
106
|
"libc",
|
|
@@ -109,25 +109,26 @@ dependencies = [
|
|
|
109
109
|
|
|
110
110
|
[[package]]
|
|
111
111
|
name = "either"
|
|
112
|
-
version = "1.
|
|
112
|
+
version = "1.13.0"
|
|
113
113
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
114
|
-
checksum = "
|
|
114
|
+
checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
|
|
115
115
|
|
|
116
116
|
[[package]]
|
|
117
117
|
name = "fancy-regex"
|
|
118
|
-
version = "0.
|
|
118
|
+
version = "0.13.0"
|
|
119
119
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
120
|
-
checksum = "
|
|
120
|
+
checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2"
|
|
121
121
|
dependencies = [
|
|
122
122
|
"bit-set",
|
|
123
|
-
"regex",
|
|
123
|
+
"regex-automata",
|
|
124
|
+
"regex-syntax",
|
|
124
125
|
]
|
|
125
126
|
|
|
126
127
|
[[package]]
|
|
127
128
|
name = "glob"
|
|
128
|
-
version = "0.3.
|
|
129
|
+
version = "0.3.2"
|
|
129
130
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
130
|
-
checksum = "
|
|
131
|
+
checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
|
|
131
132
|
|
|
132
133
|
[[package]]
|
|
133
134
|
name = "itertools"
|
|
@@ -140,9 +141,9 @@ dependencies = [
|
|
|
140
141
|
|
|
141
142
|
[[package]]
|
|
142
143
|
name = "lazy_static"
|
|
143
|
-
version = "1.
|
|
144
|
+
version = "1.5.0"
|
|
144
145
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
145
|
-
checksum = "
|
|
146
|
+
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
|
|
146
147
|
|
|
147
148
|
[[package]]
|
|
148
149
|
name = "lazycell"
|
|
@@ -152,15 +153,15 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
|
|
152
153
|
|
|
153
154
|
[[package]]
|
|
154
155
|
name = "libc"
|
|
155
|
-
version = "0.2.
|
|
156
|
+
version = "0.2.169"
|
|
156
157
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
157
|
-
checksum = "
|
|
158
|
+
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
|
|
158
159
|
|
|
159
160
|
[[package]]
|
|
160
161
|
name = "libloading"
|
|
161
|
-
version = "0.8.
|
|
162
|
+
version = "0.8.6"
|
|
162
163
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
163
|
-
checksum = "
|
|
164
|
+
checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34"
|
|
164
165
|
dependencies = [
|
|
165
166
|
"cfg-if",
|
|
166
167
|
"windows-targets",
|
|
@@ -178,9 +179,9 @@ dependencies = [
|
|
|
178
179
|
|
|
179
180
|
[[package]]
|
|
180
181
|
name = "magnus"
|
|
181
|
-
version = "0.
|
|
182
|
+
version = "0.7.1"
|
|
182
183
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
183
|
-
checksum = "
|
|
184
|
+
checksum = "3d87ae53030f3a22e83879e666cb94e58a7bdf31706878a0ba48752994146dab"
|
|
184
185
|
dependencies = [
|
|
185
186
|
"magnus-macros",
|
|
186
187
|
"rb-sys",
|
|
@@ -201,9 +202,9 @@ dependencies = [
|
|
|
201
202
|
|
|
202
203
|
[[package]]
|
|
203
204
|
name = "memchr"
|
|
204
|
-
version = "2.7.
|
|
205
|
+
version = "2.7.4"
|
|
205
206
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
206
|
-
checksum = "
|
|
207
|
+
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
|
|
207
208
|
|
|
208
209
|
[[package]]
|
|
209
210
|
name = "minimal-lexical"
|
|
@@ -223,9 +224,9 @@ dependencies = [
|
|
|
223
224
|
|
|
224
225
|
[[package]]
|
|
225
226
|
name = "parking_lot"
|
|
226
|
-
version = "0.12.
|
|
227
|
+
version = "0.12.3"
|
|
227
228
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
228
|
-
checksum = "
|
|
229
|
+
checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27"
|
|
229
230
|
dependencies = [
|
|
230
231
|
"lock_api",
|
|
231
232
|
"parking_lot_core",
|
|
@@ -246,36 +247,36 @@ dependencies = [
|
|
|
246
247
|
|
|
247
248
|
[[package]]
|
|
248
249
|
name = "proc-macro2"
|
|
249
|
-
version = "1.0.
|
|
250
|
+
version = "1.0.92"
|
|
250
251
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
251
|
-
checksum = "
|
|
252
|
+
checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0"
|
|
252
253
|
dependencies = [
|
|
253
254
|
"unicode-ident",
|
|
254
255
|
]
|
|
255
256
|
|
|
256
257
|
[[package]]
|
|
257
258
|
name = "quote"
|
|
258
|
-
version = "1.0.
|
|
259
|
+
version = "1.0.38"
|
|
259
260
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
260
|
-
checksum = "
|
|
261
|
+
checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc"
|
|
261
262
|
dependencies = [
|
|
262
263
|
"proc-macro2",
|
|
263
264
|
]
|
|
264
265
|
|
|
265
266
|
[[package]]
|
|
266
267
|
name = "rb-sys"
|
|
267
|
-
version = "0.9.
|
|
268
|
+
version = "0.9.106"
|
|
268
269
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
269
|
-
checksum = "
|
|
270
|
+
checksum = "17b6efdbc8c1a22cb8b5d7ead0237c16c362c9ef6fbdc09e2d1040615b0f4cd0"
|
|
270
271
|
dependencies = [
|
|
271
272
|
"rb-sys-build",
|
|
272
273
|
]
|
|
273
274
|
|
|
274
275
|
[[package]]
|
|
275
276
|
name = "rb-sys-build"
|
|
276
|
-
version = "0.9.
|
|
277
|
+
version = "0.9.106"
|
|
277
278
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
278
|
-
checksum = "
|
|
279
|
+
checksum = "e1d88c51e52f8636a5efc24ec5987056e64e48a91ed2a1af96cb5564686cc10f"
|
|
279
280
|
dependencies = [
|
|
280
281
|
"bindgen",
|
|
281
282
|
"lazy_static",
|
|
@@ -294,18 +295,18 @@ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
|
|
|
294
295
|
|
|
295
296
|
[[package]]
|
|
296
297
|
name = "redox_syscall"
|
|
297
|
-
version = "0.5.
|
|
298
|
+
version = "0.5.8"
|
|
298
299
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
299
|
-
checksum = "
|
|
300
|
+
checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834"
|
|
300
301
|
dependencies = [
|
|
301
302
|
"bitflags",
|
|
302
303
|
]
|
|
303
304
|
|
|
304
305
|
[[package]]
|
|
305
306
|
name = "regex"
|
|
306
|
-
version = "1.
|
|
307
|
+
version = "1.11.1"
|
|
307
308
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
308
|
-
checksum = "
|
|
309
|
+
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
|
|
309
310
|
dependencies = [
|
|
310
311
|
"aho-corasick",
|
|
311
312
|
"memchr",
|
|
@@ -315,9 +316,9 @@ dependencies = [
|
|
|
315
316
|
|
|
316
317
|
[[package]]
|
|
317
318
|
name = "regex-automata"
|
|
318
|
-
version = "0.4.
|
|
319
|
+
version = "0.4.9"
|
|
319
320
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
320
|
-
checksum = "
|
|
321
|
+
checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
|
|
321
322
|
dependencies = [
|
|
322
323
|
"aho-corasick",
|
|
323
324
|
"memchr",
|
|
@@ -326,9 +327,9 @@ dependencies = [
|
|
|
326
327
|
|
|
327
328
|
[[package]]
|
|
328
329
|
name = "regex-syntax"
|
|
329
|
-
version = "0.8.
|
|
330
|
+
version = "0.8.5"
|
|
330
331
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
331
|
-
checksum = "
|
|
332
|
+
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
|
|
332
333
|
|
|
333
334
|
[[package]]
|
|
334
335
|
name = "rustc-hash"
|
|
@@ -350,18 +351,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
|
|
|
350
351
|
|
|
351
352
|
[[package]]
|
|
352
353
|
name = "serde"
|
|
353
|
-
version = "1.0.
|
|
354
|
+
version = "1.0.217"
|
|
354
355
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
355
|
-
checksum = "
|
|
356
|
+
checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70"
|
|
356
357
|
dependencies = [
|
|
357
358
|
"serde_derive",
|
|
358
359
|
]
|
|
359
360
|
|
|
360
361
|
[[package]]
|
|
361
362
|
name = "serde_derive"
|
|
362
|
-
version = "1.0.
|
|
363
|
+
version = "1.0.217"
|
|
363
364
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
364
|
-
checksum = "
|
|
365
|
+
checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0"
|
|
365
366
|
dependencies = [
|
|
366
367
|
"proc-macro2",
|
|
367
368
|
"quote",
|
|
@@ -388,9 +389,9 @@ checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
|
|
|
388
389
|
|
|
389
390
|
[[package]]
|
|
390
391
|
name = "syn"
|
|
391
|
-
version = "2.0.
|
|
392
|
+
version = "2.0.93"
|
|
392
393
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
393
|
-
checksum = "
|
|
394
|
+
checksum = "9c786062daee0d6db1132800e623df74274a0a87322d8e183338e01b3d98d058"
|
|
394
395
|
dependencies = [
|
|
395
396
|
"proc-macro2",
|
|
396
397
|
"quote",
|
|
@@ -399,9 +400,9 @@ dependencies = [
|
|
|
399
400
|
|
|
400
401
|
[[package]]
|
|
401
402
|
name = "tiktoken-rs"
|
|
402
|
-
version = "0.
|
|
403
|
+
version = "0.6.0"
|
|
403
404
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
404
|
-
checksum = "
|
|
405
|
+
checksum = "44075987ee2486402f0808505dd65692163d243a337fc54363d49afac41087f6"
|
|
405
406
|
dependencies = [
|
|
406
407
|
"anyhow",
|
|
407
408
|
"base64",
|
|
@@ -409,6 +410,7 @@ dependencies = [
|
|
|
409
410
|
"fancy-regex",
|
|
410
411
|
"lazy_static",
|
|
411
412
|
"parking_lot",
|
|
413
|
+
"regex",
|
|
412
414
|
"rustc-hash",
|
|
413
415
|
]
|
|
414
416
|
|
|
@@ -423,15 +425,15 @@ dependencies = [
|
|
|
423
425
|
|
|
424
426
|
[[package]]
|
|
425
427
|
name = "unicode-ident"
|
|
426
|
-
version = "1.0.
|
|
428
|
+
version = "1.0.14"
|
|
427
429
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
428
|
-
checksum = "
|
|
430
|
+
checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
|
|
429
431
|
|
|
430
432
|
[[package]]
|
|
431
433
|
name = "windows-targets"
|
|
432
|
-
version = "0.52.
|
|
434
|
+
version = "0.52.6"
|
|
433
435
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
434
|
-
checksum = "
|
|
436
|
+
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
|
|
435
437
|
dependencies = [
|
|
436
438
|
"windows_aarch64_gnullvm",
|
|
437
439
|
"windows_aarch64_msvc",
|
|
@@ -445,48 +447,48 @@ dependencies = [
|
|
|
445
447
|
|
|
446
448
|
[[package]]
|
|
447
449
|
name = "windows_aarch64_gnullvm"
|
|
448
|
-
version = "0.52.
|
|
450
|
+
version = "0.52.6"
|
|
449
451
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
450
|
-
checksum = "
|
|
452
|
+
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
|
|
451
453
|
|
|
452
454
|
[[package]]
|
|
453
455
|
name = "windows_aarch64_msvc"
|
|
454
|
-
version = "0.52.
|
|
456
|
+
version = "0.52.6"
|
|
455
457
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
456
|
-
checksum = "
|
|
458
|
+
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
|
|
457
459
|
|
|
458
460
|
[[package]]
|
|
459
461
|
name = "windows_i686_gnu"
|
|
460
|
-
version = "0.52.
|
|
462
|
+
version = "0.52.6"
|
|
461
463
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
462
|
-
checksum = "
|
|
464
|
+
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
|
|
463
465
|
|
|
464
466
|
[[package]]
|
|
465
467
|
name = "windows_i686_gnullvm"
|
|
466
|
-
version = "0.52.
|
|
468
|
+
version = "0.52.6"
|
|
467
469
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
468
|
-
checksum = "
|
|
470
|
+
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
|
|
469
471
|
|
|
470
472
|
[[package]]
|
|
471
473
|
name = "windows_i686_msvc"
|
|
472
|
-
version = "0.52.
|
|
474
|
+
version = "0.52.6"
|
|
473
475
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
474
|
-
checksum = "
|
|
476
|
+
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
|
|
475
477
|
|
|
476
478
|
[[package]]
|
|
477
479
|
name = "windows_x86_64_gnu"
|
|
478
|
-
version = "0.52.
|
|
480
|
+
version = "0.52.6"
|
|
479
481
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
480
|
-
checksum = "
|
|
482
|
+
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
|
|
481
483
|
|
|
482
484
|
[[package]]
|
|
483
485
|
name = "windows_x86_64_gnullvm"
|
|
484
|
-
version = "0.52.
|
|
486
|
+
version = "0.52.6"
|
|
485
487
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
486
|
-
checksum = "
|
|
488
|
+
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
|
|
487
489
|
|
|
488
490
|
[[package]]
|
|
489
491
|
name = "windows_x86_64_msvc"
|
|
490
|
-
version = "0.52.
|
|
492
|
+
version = "0.52.6"
|
|
491
493
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
492
|
-
checksum = "
|
|
494
|
+
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
data/Gemfile.lock
CHANGED
|
@@ -1,73 +1,73 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
tiktoken_ruby (0.0.
|
|
5
|
-
rb_sys (= 0.9.
|
|
4
|
+
tiktoken_ruby (0.0.11)
|
|
5
|
+
rb_sys (= 0.9.106)
|
|
6
6
|
|
|
7
7
|
GEM
|
|
8
8
|
remote: https://rubygems.org/
|
|
9
9
|
specs:
|
|
10
10
|
ast (2.4.2)
|
|
11
|
-
diff-lcs (1.5.
|
|
12
|
-
json (2.
|
|
11
|
+
diff-lcs (1.5.1)
|
|
12
|
+
json (2.9.1)
|
|
13
13
|
language_server-protocol (3.17.0.3)
|
|
14
14
|
lint_roller (1.1.0)
|
|
15
15
|
minitest (5.21.2)
|
|
16
|
-
parallel (1.
|
|
17
|
-
parser (3.3.0
|
|
16
|
+
parallel (1.26.3)
|
|
17
|
+
parser (3.3.6.0)
|
|
18
18
|
ast (~> 2.4.1)
|
|
19
19
|
racc
|
|
20
|
-
racc (1.
|
|
20
|
+
racc (1.8.1)
|
|
21
21
|
rainbow (3.1.1)
|
|
22
|
-
rake (13.1
|
|
23
|
-
rake-compiler (1.2.
|
|
22
|
+
rake (13.2.1)
|
|
23
|
+
rake-compiler (1.2.9)
|
|
24
24
|
rake
|
|
25
|
-
rb_sys (0.9.
|
|
26
|
-
regexp_parser (2.
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
rspec-
|
|
30
|
-
rspec-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
rspec-expectations (3.12.3)
|
|
25
|
+
rb_sys (0.9.106)
|
|
26
|
+
regexp_parser (2.10.0)
|
|
27
|
+
rspec (3.13.0)
|
|
28
|
+
rspec-core (~> 3.13.0)
|
|
29
|
+
rspec-expectations (~> 3.13.0)
|
|
30
|
+
rspec-mocks (~> 3.13.0)
|
|
31
|
+
rspec-core (3.13.2)
|
|
32
|
+
rspec-support (~> 3.13.0)
|
|
33
|
+
rspec-expectations (3.13.3)
|
|
35
34
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
36
|
-
rspec-support (~> 3.
|
|
37
|
-
rspec-mocks (3.
|
|
35
|
+
rspec-support (~> 3.13.0)
|
|
36
|
+
rspec-mocks (3.13.2)
|
|
38
37
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
39
|
-
rspec-support (~> 3.
|
|
40
|
-
rspec-support (3.
|
|
41
|
-
rubocop (1.
|
|
38
|
+
rspec-support (~> 3.13.0)
|
|
39
|
+
rspec-support (3.13.2)
|
|
40
|
+
rubocop (1.69.2)
|
|
42
41
|
json (~> 2.3)
|
|
43
42
|
language_server-protocol (>= 3.17.0)
|
|
44
43
|
parallel (~> 1.10)
|
|
45
|
-
parser (>= 3.
|
|
44
|
+
parser (>= 3.3.0.2)
|
|
46
45
|
rainbow (>= 2.2.2, < 4.0)
|
|
47
|
-
regexp_parser (>=
|
|
48
|
-
|
|
49
|
-
rubocop-ast (>= 1.30.0, < 2.0)
|
|
46
|
+
regexp_parser (>= 2.9.3, < 3.0)
|
|
47
|
+
rubocop-ast (>= 1.36.2, < 2.0)
|
|
50
48
|
ruby-progressbar (~> 1.7)
|
|
51
|
-
unicode-display_width (>= 2.4.0, <
|
|
52
|
-
rubocop-ast (1.
|
|
53
|
-
parser (>= 3.
|
|
54
|
-
rubocop-performance (1.
|
|
49
|
+
unicode-display_width (>= 2.4.0, < 4.0)
|
|
50
|
+
rubocop-ast (1.37.0)
|
|
51
|
+
parser (>= 3.3.1.0)
|
|
52
|
+
rubocop-performance (1.23.0)
|
|
55
53
|
rubocop (>= 1.48.1, < 2.0)
|
|
56
|
-
rubocop-ast (>= 1.
|
|
54
|
+
rubocop-ast (>= 1.31.1, < 2.0)
|
|
57
55
|
ruby-progressbar (1.13.0)
|
|
58
|
-
standard (1.
|
|
56
|
+
standard (1.43.0)
|
|
59
57
|
language_server-protocol (~> 3.17.0.2)
|
|
60
58
|
lint_roller (~> 1.0)
|
|
61
|
-
rubocop (~> 1.
|
|
59
|
+
rubocop (~> 1.69.1)
|
|
62
60
|
standard-custom (~> 1.0.0)
|
|
63
|
-
standard-performance (~> 1.
|
|
61
|
+
standard-performance (~> 1.6)
|
|
64
62
|
standard-custom (1.0.2)
|
|
65
63
|
lint_roller (~> 1.0)
|
|
66
64
|
rubocop (~> 1.50)
|
|
67
|
-
standard-performance (1.
|
|
65
|
+
standard-performance (1.6.0)
|
|
68
66
|
lint_roller (~> 1.1)
|
|
69
|
-
rubocop-performance (~> 1.
|
|
70
|
-
unicode-display_width (
|
|
67
|
+
rubocop-performance (~> 1.23.0)
|
|
68
|
+
unicode-display_width (3.1.3)
|
|
69
|
+
unicode-emoji (~> 4.0, >= 4.0.4)
|
|
70
|
+
unicode-emoji (4.0.4)
|
|
71
71
|
yard (0.9.34)
|
|
72
72
|
yard-doctest (0.1.17)
|
|
73
73
|
minitest
|
|
@@ -89,4 +89,4 @@ DEPENDENCIES
|
|
|
89
89
|
yard-doctest
|
|
90
90
|
|
|
91
91
|
BUNDLED WITH
|
|
92
|
-
2.
|
|
92
|
+
2.5.18
|
data/Rakefile
CHANGED
|
@@ -21,3 +21,9 @@ end
|
|
|
21
21
|
task build: :compile
|
|
22
22
|
|
|
23
23
|
task default: %i[compile spec standard]
|
|
24
|
+
|
|
25
|
+
# Packaging default (non-precompiled) gem
|
|
26
|
+
require "rubygems/package_task"
|
|
27
|
+
gem_path = Gem::PackageTask.new(GEMSPEC).define
|
|
28
|
+
desc "Package the Ruby gem"
|
|
29
|
+
task "package" => [gem_path]
|
|
@@ -10,6 +10,6 @@ publish = false
|
|
|
10
10
|
crate-type = ["cdylib"]
|
|
11
11
|
|
|
12
12
|
[dependencies]
|
|
13
|
-
magnus = { version = "0.
|
|
14
|
-
rb-sys = { version = "0.9.
|
|
15
|
-
tiktoken-rs = { version = "0.
|
|
13
|
+
magnus = { version = "0.7.1" }
|
|
14
|
+
rb-sys = { version = "0.9.106", features = ["stable-api-compiled-fallback"] }
|
|
15
|
+
tiktoken-rs = { version = "0.6.0" }
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
use std::collections::HashSet;
|
|
2
2
|
|
|
3
|
-
use
|
|
3
|
+
use tiktoken_rs::Rank;
|
|
4
4
|
|
|
5
|
+
use crate::uncicode_error;
|
|
5
6
|
|
|
6
7
|
#[magnus::wrap(class = "Tiktoken::Ext::CoreBPE")]
|
|
7
8
|
pub struct CoreBPEWrapper {
|
|
@@ -13,11 +14,15 @@ impl CoreBPEWrapper {
|
|
|
13
14
|
Self { core_bpe }
|
|
14
15
|
}
|
|
15
16
|
|
|
16
|
-
pub fn encode_ordinary(&self, text: String) -> Vec<
|
|
17
|
+
pub fn encode_ordinary(&self, text: String) -> Vec<Rank> {
|
|
17
18
|
self.core_bpe.encode_ordinary(text.as_str())
|
|
18
19
|
}
|
|
19
20
|
|
|
20
|
-
pub fn encode(
|
|
21
|
+
pub fn encode(
|
|
22
|
+
&self,
|
|
23
|
+
text: String,
|
|
24
|
+
allowed_special: magnus::RArray,
|
|
25
|
+
) -> Result<Vec<Rank>, magnus::Error> {
|
|
21
26
|
let allowed_special: Vec<String> = allowed_special.to_vec()?;
|
|
22
27
|
let allowed_special: Vec<&str> = allowed_special.iter().map(|s| s.as_str()).collect();
|
|
23
28
|
let allowed_special: HashSet<&str> = HashSet::from_iter(allowed_special.iter().cloned());
|
|
@@ -25,20 +30,18 @@ impl CoreBPEWrapper {
|
|
|
25
30
|
Ok(self.core_bpe.encode(text.as_str(), allowed_special))
|
|
26
31
|
}
|
|
27
32
|
|
|
28
|
-
pub fn encode_with_special_tokens(&self, text: String) -> Vec<
|
|
33
|
+
pub fn encode_with_special_tokens(&self, text: String) -> Vec<Rank> {
|
|
29
34
|
self.core_bpe.encode_with_special_tokens(text.as_str())
|
|
30
35
|
}
|
|
31
36
|
|
|
32
|
-
pub fn decode(&self, ids: Vec<
|
|
33
|
-
self.core_bpe.decode(ids)
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
};
|
|
39
|
-
|
|
40
|
-
magnus::Error::new(error, e.to_string())
|
|
41
|
-
})
|
|
37
|
+
pub fn decode(&self, ids: Vec<Rank>) -> Result<String, magnus::Error> {
|
|
38
|
+
self.core_bpe.decode(ids).map_err(|e| {
|
|
39
|
+
let error = match uncicode_error() {
|
|
40
|
+
Ok(error) => error,
|
|
41
|
+
Err(e) => return e,
|
|
42
|
+
};
|
|
42
43
|
|
|
44
|
+
magnus::Error::new(error, e.to_string())
|
|
45
|
+
})
|
|
43
46
|
}
|
|
44
47
|
}
|
data/lib/tiktoken_ruby.rb
CHANGED
|
@@ -69,9 +69,12 @@ module Tiktoken
|
|
|
69
69
|
]
|
|
70
70
|
|
|
71
71
|
# taken from the python library here https://github.com/openai/tiktoken/blob/main/tiktoken/model.py
|
|
72
|
-
# that is also MIT licensed but by OpenAI
|
|
72
|
+
# that is also MIT licensed but by OpenAI;
|
|
73
|
+
# https://github.com/Congyuwang/tiktoken-rs/blob/main/tiktoken-rs/src/tokenizer.rs#L50
|
|
74
|
+
# is the source of the mapping for the Rust library
|
|
73
75
|
MODEL_TO_ENCODING_NAME = {
|
|
74
76
|
# chat
|
|
77
|
+
"chatgpt-4o-latest": "o200k_base",
|
|
75
78
|
"gpt-4o": "o200k_base",
|
|
76
79
|
"gpt-4": "cl100k_base",
|
|
77
80
|
"gpt-3.5-turbo": "cl100k_base",
|
data/script/release
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
|
|
3
|
+
set -e
|
|
4
|
+
|
|
5
|
+
if [ -z "${TIKTOKEN_PUBLISH_KEY}" ]; then
|
|
6
|
+
echo "Error: TIKTOKEN_PUBLISH_KEY is not set. This is the RubyGems API key to push the gem."
|
|
7
|
+
exit 1
|
|
8
|
+
fi
|
|
9
|
+
|
|
10
|
+
run_id=""
|
|
11
|
+
# Parse arguments
|
|
12
|
+
while [[ "$#" -gt 0 ]]; do
|
|
13
|
+
case $1 in
|
|
14
|
+
--run-id)
|
|
15
|
+
run_id="$2"
|
|
16
|
+
shift 2
|
|
17
|
+
;;
|
|
18
|
+
*)
|
|
19
|
+
echo "Unknown parameter passed: $1"
|
|
20
|
+
exit 1
|
|
21
|
+
;;
|
|
22
|
+
esac
|
|
23
|
+
done
|
|
24
|
+
|
|
25
|
+
if [ -z "${run_id}" ]; then
|
|
26
|
+
echo "Error: --run-id is not provided. Please provide the GitHub Action run id for the cross-compile workflow."
|
|
27
|
+
exit 1
|
|
28
|
+
fi
|
|
29
|
+
|
|
30
|
+
version=$(grep VERSION lib/tiktoken_ruby/version.rb | head -n 1 | cut -d'"' -f2)
|
|
31
|
+
echo "Building tiktoken_ruby v$version, using artifacts from run $run_id"
|
|
32
|
+
|
|
33
|
+
rm -rf pkg/cross-compiled
|
|
34
|
+
gh run download "$run_id" -D pkg/cross-compiled
|
|
35
|
+
|
|
36
|
+
for gem in pkg/cross-compiled/cross-gem-*/tiktoken_ruby-"$version"*.gem ; do
|
|
37
|
+
echo "Publishing $gem"
|
|
38
|
+
GEM_HOST_API_KEY="${TIKTOKEN_PUBLISH_KEY}" gem push "$gem" --host https://rubygems.org
|
|
39
|
+
done
|
|
40
|
+
|
|
41
|
+
# last but not least, the uncompiled gem
|
|
42
|
+
bundle exec rake package
|
|
43
|
+
GEM_HOST_API_KEY="${TIKTOKEN_PUBLISH_KEY}" gem push "pkg/tiktoken_ruby-$version.gem" --host https://rubygems.org
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: tiktoken_ruby
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.11
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- IAPark
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2025-01-08 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rb_sys
|
|
@@ -16,14 +16,14 @@ dependencies:
|
|
|
16
16
|
requirements:
|
|
17
17
|
- - '='
|
|
18
18
|
- !ruby/object:Gem::Version
|
|
19
|
-
version: 0.9.
|
|
19
|
+
version: 0.9.106
|
|
20
20
|
type: :runtime
|
|
21
21
|
prerelease: false
|
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
23
|
requirements:
|
|
24
24
|
- - '='
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
|
-
version: 0.9.
|
|
26
|
+
version: 0.9.106
|
|
27
27
|
description: An unofficial Ruby wrapper for Tiktoken, a BPE tokenizer written by and
|
|
28
28
|
used by OpenAI. It can be used to count the number of tokens in text before sending
|
|
29
29
|
it to OpenAI APIs.
|
|
@@ -36,6 +36,7 @@ extra_rdoc_files: []
|
|
|
36
36
|
files:
|
|
37
37
|
- ".rspec"
|
|
38
38
|
- ".standard.yml"
|
|
39
|
+
- ".vscode/settings.json"
|
|
39
40
|
- Cargo.lock
|
|
40
41
|
- Cargo.toml
|
|
41
42
|
- Gemfile
|
|
@@ -51,8 +52,8 @@ files:
|
|
|
51
52
|
- lib/tiktoken_ruby.rb
|
|
52
53
|
- lib/tiktoken_ruby/encoding.rb
|
|
53
54
|
- lib/tiktoken_ruby/version.rb
|
|
55
|
+
- script/release
|
|
54
56
|
- sig/tiktoken_ruby.rbs
|
|
55
|
-
- tiktoken_ruby.gemspec
|
|
56
57
|
homepage: https://github.com/IAPark/tiktoken_ruby
|
|
57
58
|
licenses:
|
|
58
59
|
- MIT
|
|
@@ -68,14 +69,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
68
69
|
requirements:
|
|
69
70
|
- - ">="
|
|
70
71
|
- !ruby/object:Gem::Version
|
|
71
|
-
version:
|
|
72
|
+
version: 3.1.0
|
|
72
73
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
73
74
|
requirements:
|
|
74
75
|
- - ">="
|
|
75
76
|
- !ruby/object:Gem::Version
|
|
76
|
-
version: 3.
|
|
77
|
+
version: 3.4.0
|
|
77
78
|
requirements: []
|
|
78
|
-
rubygems_version: 3.
|
|
79
|
+
rubygems_version: 3.5.22
|
|
79
80
|
signing_key:
|
|
80
81
|
specification_version: 4
|
|
81
82
|
summary: Ruby wrapper for Tiktoken
|
data/tiktoken_ruby.gemspec
DELETED
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require_relative "lib/tiktoken_ruby/version"
|
|
4
|
-
|
|
5
|
-
Gem::Specification.new do |spec|
|
|
6
|
-
spec.name = "tiktoken_ruby"
|
|
7
|
-
spec.version = Tiktoken::VERSION
|
|
8
|
-
spec.authors = ["IAPark"]
|
|
9
|
-
spec.email = ["isaac.a.park@gmail.com"]
|
|
10
|
-
spec.summary = "Ruby wrapper for Tiktoken"
|
|
11
|
-
spec.description = "An unofficial Ruby wrapper for Tiktoken, " \
|
|
12
|
-
"a BPE tokenizer written by and used by OpenAI. It can be used to " \
|
|
13
|
-
"count the number of tokens in text before sending it to OpenAI APIs."
|
|
14
|
-
spec.homepage = "https://github.com/IAPark/tiktoken_ruby"
|
|
15
|
-
spec.license = "MIT"
|
|
16
|
-
spec.required_ruby_version = ">= 2.7.0"
|
|
17
|
-
spec.required_rubygems_version = ">= 3.1.0"
|
|
18
|
-
spec.platform = Gem::Platform::RUBY
|
|
19
|
-
|
|
20
|
-
spec.metadata["homepage_uri"] = spec.homepage
|
|
21
|
-
spec.metadata["source_code_uri"] = "https://github.com/IAPark/tiktoken_ruby"
|
|
22
|
-
spec.metadata["documentation_uri"] = "https://rubydoc.info/github/IAPark/tiktoken_ruby/main"
|
|
23
|
-
spec.files = Dir.chdir(__dir__) do
|
|
24
|
-
`git ls-files -z`.split("\x0").reject do |f|
|
|
25
|
-
(f == __FILE__) || f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|circleci)|appveyor)})
|
|
26
|
-
end
|
|
27
|
-
end
|
|
28
|
-
spec.bindir = "exe"
|
|
29
|
-
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
|
30
|
-
spec.require_paths = ["lib"]
|
|
31
|
-
spec.extensions = ["ext/tiktoken_ruby/extconf.rb"]
|
|
32
|
-
spec.add_dependency "rb_sys", "= 0.9.87"
|
|
33
|
-
end
|