tiktoken_ruby 0.0.9 → 0.0.11
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.vscode/settings.json +5 -0
- data/Cargo.lock +78 -76
- data/Gemfile.lock +40 -40
- data/Rakefile +6 -0
- data/ext/tiktoken_ruby/Cargo.toml +3 -3
- data/ext/tiktoken_ruby/src/core_bpe_wrapper.rs +17 -14
- data/lib/tiktoken_ruby/version.rb +1 -1
- data/lib/tiktoken_ruby.rb +4 -1
- data/script/release +43 -0
- metadata +9 -8
- data/tiktoken_ruby.gemspec +0 -33
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9997f4334fdaff90a631036be451cad90eb58ce6919a9592de2ca09d7f8baf9e
|
4
|
+
data.tar.gz: 160c540bf8a76278ebcabfe80c50e9fdb5802a0742d4692295e659ba8626b492
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ea271891e7ca2fbfb637da4945c5f6da55f72b8c39efe20c4f83d5d059d2b9997f4344feadbd6109a29859729e4bba245fe51338e77c548e9e9a24bd1981a6e9
|
7
|
+
data.tar.gz: 3b6eeda82acaa6b6abb324e911196bce53d20a2bbec7750e07cb3b9d7a8381ed9ebd90b68a4e770e8bb1b4be332748216bc653d31389591348bc5192656e417f
|
data/Cargo.lock
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# This file is automatically @generated by Cargo.
|
2
2
|
# It is not intended for manual editing.
|
3
|
-
version =
|
3
|
+
version = 4
|
4
4
|
|
5
5
|
[[package]]
|
6
6
|
name = "aho-corasick"
|
@@ -13,15 +13,15 @@ dependencies = [
|
|
13
13
|
|
14
14
|
[[package]]
|
15
15
|
name = "anyhow"
|
16
|
-
version = "1.0.
|
16
|
+
version = "1.0.95"
|
17
17
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
18
|
-
checksum = "
|
18
|
+
checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04"
|
19
19
|
|
20
20
|
[[package]]
|
21
21
|
name = "autocfg"
|
22
|
-
version = "1.
|
22
|
+
version = "1.4.0"
|
23
23
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
24
|
-
checksum = "
|
24
|
+
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
|
25
25
|
|
26
26
|
[[package]]
|
27
27
|
name = "base64"
|
@@ -31,9 +31,9 @@ checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"
|
|
31
31
|
|
32
32
|
[[package]]
|
33
33
|
name = "bindgen"
|
34
|
-
version = "0.69.
|
34
|
+
version = "0.69.5"
|
35
35
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
36
|
-
checksum = "
|
36
|
+
checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088"
|
37
37
|
dependencies = [
|
38
38
|
"bitflags",
|
39
39
|
"cexpr",
|
@@ -66,15 +66,15 @@ checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
|
|
66
66
|
|
67
67
|
[[package]]
|
68
68
|
name = "bitflags"
|
69
|
-
version = "2.
|
69
|
+
version = "2.6.0"
|
70
70
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
71
|
-
checksum = "
|
71
|
+
checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
|
72
72
|
|
73
73
|
[[package]]
|
74
74
|
name = "bstr"
|
75
|
-
version = "1.
|
75
|
+
version = "1.11.1"
|
76
76
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
77
|
-
checksum = "
|
77
|
+
checksum = "786a307d683a5bf92e6fd5fd69a7eb613751668d1d8d67d802846dfe367c62c8"
|
78
78
|
dependencies = [
|
79
79
|
"memchr",
|
80
80
|
"regex-automata",
|
@@ -98,9 +98,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
|
98
98
|
|
99
99
|
[[package]]
|
100
100
|
name = "clang-sys"
|
101
|
-
version = "1.
|
101
|
+
version = "1.8.1"
|
102
102
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
103
|
-
checksum = "
|
103
|
+
checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
|
104
104
|
dependencies = [
|
105
105
|
"glob",
|
106
106
|
"libc",
|
@@ -109,25 +109,26 @@ dependencies = [
|
|
109
109
|
|
110
110
|
[[package]]
|
111
111
|
name = "either"
|
112
|
-
version = "1.
|
112
|
+
version = "1.13.0"
|
113
113
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
114
|
-
checksum = "
|
114
|
+
checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
|
115
115
|
|
116
116
|
[[package]]
|
117
117
|
name = "fancy-regex"
|
118
|
-
version = "0.
|
118
|
+
version = "0.13.0"
|
119
119
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
120
|
-
checksum = "
|
120
|
+
checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2"
|
121
121
|
dependencies = [
|
122
122
|
"bit-set",
|
123
|
-
"regex",
|
123
|
+
"regex-automata",
|
124
|
+
"regex-syntax",
|
124
125
|
]
|
125
126
|
|
126
127
|
[[package]]
|
127
128
|
name = "glob"
|
128
|
-
version = "0.3.
|
129
|
+
version = "0.3.2"
|
129
130
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
130
|
-
checksum = "
|
131
|
+
checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
|
131
132
|
|
132
133
|
[[package]]
|
133
134
|
name = "itertools"
|
@@ -140,9 +141,9 @@ dependencies = [
|
|
140
141
|
|
141
142
|
[[package]]
|
142
143
|
name = "lazy_static"
|
143
|
-
version = "1.
|
144
|
+
version = "1.5.0"
|
144
145
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
145
|
-
checksum = "
|
146
|
+
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
|
146
147
|
|
147
148
|
[[package]]
|
148
149
|
name = "lazycell"
|
@@ -152,15 +153,15 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
|
152
153
|
|
153
154
|
[[package]]
|
154
155
|
name = "libc"
|
155
|
-
version = "0.2.
|
156
|
+
version = "0.2.169"
|
156
157
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
157
|
-
checksum = "
|
158
|
+
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
|
158
159
|
|
159
160
|
[[package]]
|
160
161
|
name = "libloading"
|
161
|
-
version = "0.8.
|
162
|
+
version = "0.8.6"
|
162
163
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
163
|
-
checksum = "
|
164
|
+
checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34"
|
164
165
|
dependencies = [
|
165
166
|
"cfg-if",
|
166
167
|
"windows-targets",
|
@@ -178,9 +179,9 @@ dependencies = [
|
|
178
179
|
|
179
180
|
[[package]]
|
180
181
|
name = "magnus"
|
181
|
-
version = "0.
|
182
|
+
version = "0.7.1"
|
182
183
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
183
|
-
checksum = "
|
184
|
+
checksum = "3d87ae53030f3a22e83879e666cb94e58a7bdf31706878a0ba48752994146dab"
|
184
185
|
dependencies = [
|
185
186
|
"magnus-macros",
|
186
187
|
"rb-sys",
|
@@ -201,9 +202,9 @@ dependencies = [
|
|
201
202
|
|
202
203
|
[[package]]
|
203
204
|
name = "memchr"
|
204
|
-
version = "2.7.
|
205
|
+
version = "2.7.4"
|
205
206
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
206
|
-
checksum = "
|
207
|
+
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
|
207
208
|
|
208
209
|
[[package]]
|
209
210
|
name = "minimal-lexical"
|
@@ -223,9 +224,9 @@ dependencies = [
|
|
223
224
|
|
224
225
|
[[package]]
|
225
226
|
name = "parking_lot"
|
226
|
-
version = "0.12.
|
227
|
+
version = "0.12.3"
|
227
228
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
228
|
-
checksum = "
|
229
|
+
checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27"
|
229
230
|
dependencies = [
|
230
231
|
"lock_api",
|
231
232
|
"parking_lot_core",
|
@@ -246,36 +247,36 @@ dependencies = [
|
|
246
247
|
|
247
248
|
[[package]]
|
248
249
|
name = "proc-macro2"
|
249
|
-
version = "1.0.
|
250
|
+
version = "1.0.92"
|
250
251
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
251
|
-
checksum = "
|
252
|
+
checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0"
|
252
253
|
dependencies = [
|
253
254
|
"unicode-ident",
|
254
255
|
]
|
255
256
|
|
256
257
|
[[package]]
|
257
258
|
name = "quote"
|
258
|
-
version = "1.0.
|
259
|
+
version = "1.0.38"
|
259
260
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
260
|
-
checksum = "
|
261
|
+
checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc"
|
261
262
|
dependencies = [
|
262
263
|
"proc-macro2",
|
263
264
|
]
|
264
265
|
|
265
266
|
[[package]]
|
266
267
|
name = "rb-sys"
|
267
|
-
version = "0.9.
|
268
|
+
version = "0.9.106"
|
268
269
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
269
|
-
checksum = "
|
270
|
+
checksum = "17b6efdbc8c1a22cb8b5d7ead0237c16c362c9ef6fbdc09e2d1040615b0f4cd0"
|
270
271
|
dependencies = [
|
271
272
|
"rb-sys-build",
|
272
273
|
]
|
273
274
|
|
274
275
|
[[package]]
|
275
276
|
name = "rb-sys-build"
|
276
|
-
version = "0.9.
|
277
|
+
version = "0.9.106"
|
277
278
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
278
|
-
checksum = "
|
279
|
+
checksum = "e1d88c51e52f8636a5efc24ec5987056e64e48a91ed2a1af96cb5564686cc10f"
|
279
280
|
dependencies = [
|
280
281
|
"bindgen",
|
281
282
|
"lazy_static",
|
@@ -294,18 +295,18 @@ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
|
|
294
295
|
|
295
296
|
[[package]]
|
296
297
|
name = "redox_syscall"
|
297
|
-
version = "0.5.
|
298
|
+
version = "0.5.8"
|
298
299
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
299
|
-
checksum = "
|
300
|
+
checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834"
|
300
301
|
dependencies = [
|
301
302
|
"bitflags",
|
302
303
|
]
|
303
304
|
|
304
305
|
[[package]]
|
305
306
|
name = "regex"
|
306
|
-
version = "1.
|
307
|
+
version = "1.11.1"
|
307
308
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
308
|
-
checksum = "
|
309
|
+
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
|
309
310
|
dependencies = [
|
310
311
|
"aho-corasick",
|
311
312
|
"memchr",
|
@@ -315,9 +316,9 @@ dependencies = [
|
|
315
316
|
|
316
317
|
[[package]]
|
317
318
|
name = "regex-automata"
|
318
|
-
version = "0.4.
|
319
|
+
version = "0.4.9"
|
319
320
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
320
|
-
checksum = "
|
321
|
+
checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
|
321
322
|
dependencies = [
|
322
323
|
"aho-corasick",
|
323
324
|
"memchr",
|
@@ -326,9 +327,9 @@ dependencies = [
|
|
326
327
|
|
327
328
|
[[package]]
|
328
329
|
name = "regex-syntax"
|
329
|
-
version = "0.8.
|
330
|
+
version = "0.8.5"
|
330
331
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
331
|
-
checksum = "
|
332
|
+
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
|
332
333
|
|
333
334
|
[[package]]
|
334
335
|
name = "rustc-hash"
|
@@ -350,18 +351,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
|
|
350
351
|
|
351
352
|
[[package]]
|
352
353
|
name = "serde"
|
353
|
-
version = "1.0.
|
354
|
+
version = "1.0.217"
|
354
355
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
355
|
-
checksum = "
|
356
|
+
checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70"
|
356
357
|
dependencies = [
|
357
358
|
"serde_derive",
|
358
359
|
]
|
359
360
|
|
360
361
|
[[package]]
|
361
362
|
name = "serde_derive"
|
362
|
-
version = "1.0.
|
363
|
+
version = "1.0.217"
|
363
364
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
364
|
-
checksum = "
|
365
|
+
checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0"
|
365
366
|
dependencies = [
|
366
367
|
"proc-macro2",
|
367
368
|
"quote",
|
@@ -388,9 +389,9 @@ checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
|
|
388
389
|
|
389
390
|
[[package]]
|
390
391
|
name = "syn"
|
391
|
-
version = "2.0.
|
392
|
+
version = "2.0.93"
|
392
393
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
393
|
-
checksum = "
|
394
|
+
checksum = "9c786062daee0d6db1132800e623df74274a0a87322d8e183338e01b3d98d058"
|
394
395
|
dependencies = [
|
395
396
|
"proc-macro2",
|
396
397
|
"quote",
|
@@ -399,9 +400,9 @@ dependencies = [
|
|
399
400
|
|
400
401
|
[[package]]
|
401
402
|
name = "tiktoken-rs"
|
402
|
-
version = "0.
|
403
|
+
version = "0.6.0"
|
403
404
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
404
|
-
checksum = "
|
405
|
+
checksum = "44075987ee2486402f0808505dd65692163d243a337fc54363d49afac41087f6"
|
405
406
|
dependencies = [
|
406
407
|
"anyhow",
|
407
408
|
"base64",
|
@@ -409,6 +410,7 @@ dependencies = [
|
|
409
410
|
"fancy-regex",
|
410
411
|
"lazy_static",
|
411
412
|
"parking_lot",
|
413
|
+
"regex",
|
412
414
|
"rustc-hash",
|
413
415
|
]
|
414
416
|
|
@@ -423,15 +425,15 @@ dependencies = [
|
|
423
425
|
|
424
426
|
[[package]]
|
425
427
|
name = "unicode-ident"
|
426
|
-
version = "1.0.
|
428
|
+
version = "1.0.14"
|
427
429
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
428
|
-
checksum = "
|
430
|
+
checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
|
429
431
|
|
430
432
|
[[package]]
|
431
433
|
name = "windows-targets"
|
432
|
-
version = "0.52.
|
434
|
+
version = "0.52.6"
|
433
435
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
434
|
-
checksum = "
|
436
|
+
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
|
435
437
|
dependencies = [
|
436
438
|
"windows_aarch64_gnullvm",
|
437
439
|
"windows_aarch64_msvc",
|
@@ -445,48 +447,48 @@ dependencies = [
|
|
445
447
|
|
446
448
|
[[package]]
|
447
449
|
name = "windows_aarch64_gnullvm"
|
448
|
-
version = "0.52.
|
450
|
+
version = "0.52.6"
|
449
451
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
450
|
-
checksum = "
|
452
|
+
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
|
451
453
|
|
452
454
|
[[package]]
|
453
455
|
name = "windows_aarch64_msvc"
|
454
|
-
version = "0.52.
|
456
|
+
version = "0.52.6"
|
455
457
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
456
|
-
checksum = "
|
458
|
+
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
|
457
459
|
|
458
460
|
[[package]]
|
459
461
|
name = "windows_i686_gnu"
|
460
|
-
version = "0.52.
|
462
|
+
version = "0.52.6"
|
461
463
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
462
|
-
checksum = "
|
464
|
+
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
|
463
465
|
|
464
466
|
[[package]]
|
465
467
|
name = "windows_i686_gnullvm"
|
466
|
-
version = "0.52.
|
468
|
+
version = "0.52.6"
|
467
469
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
468
|
-
checksum = "
|
470
|
+
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
|
469
471
|
|
470
472
|
[[package]]
|
471
473
|
name = "windows_i686_msvc"
|
472
|
-
version = "0.52.
|
474
|
+
version = "0.52.6"
|
473
475
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
474
|
-
checksum = "
|
476
|
+
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
|
475
477
|
|
476
478
|
[[package]]
|
477
479
|
name = "windows_x86_64_gnu"
|
478
|
-
version = "0.52.
|
480
|
+
version = "0.52.6"
|
479
481
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
480
|
-
checksum = "
|
482
|
+
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
|
481
483
|
|
482
484
|
[[package]]
|
483
485
|
name = "windows_x86_64_gnullvm"
|
484
|
-
version = "0.52.
|
486
|
+
version = "0.52.6"
|
485
487
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
486
|
-
checksum = "
|
488
|
+
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
|
487
489
|
|
488
490
|
[[package]]
|
489
491
|
name = "windows_x86_64_msvc"
|
490
|
-
version = "0.52.
|
492
|
+
version = "0.52.6"
|
491
493
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
492
|
-
checksum = "
|
494
|
+
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
data/Gemfile.lock
CHANGED
@@ -1,73 +1,73 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
tiktoken_ruby (0.0.
|
5
|
-
rb_sys (= 0.9.
|
4
|
+
tiktoken_ruby (0.0.11)
|
5
|
+
rb_sys (= 0.9.106)
|
6
6
|
|
7
7
|
GEM
|
8
8
|
remote: https://rubygems.org/
|
9
9
|
specs:
|
10
10
|
ast (2.4.2)
|
11
|
-
diff-lcs (1.5.
|
12
|
-
json (2.
|
11
|
+
diff-lcs (1.5.1)
|
12
|
+
json (2.9.1)
|
13
13
|
language_server-protocol (3.17.0.3)
|
14
14
|
lint_roller (1.1.0)
|
15
15
|
minitest (5.21.2)
|
16
|
-
parallel (1.
|
17
|
-
parser (3.3.0
|
16
|
+
parallel (1.26.3)
|
17
|
+
parser (3.3.6.0)
|
18
18
|
ast (~> 2.4.1)
|
19
19
|
racc
|
20
|
-
racc (1.
|
20
|
+
racc (1.8.1)
|
21
21
|
rainbow (3.1.1)
|
22
|
-
rake (13.1
|
23
|
-
rake-compiler (1.2.
|
22
|
+
rake (13.2.1)
|
23
|
+
rake-compiler (1.2.9)
|
24
24
|
rake
|
25
|
-
rb_sys (0.9.
|
26
|
-
regexp_parser (2.
|
27
|
-
|
28
|
-
|
29
|
-
rspec-
|
30
|
-
rspec-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
rspec-expectations (3.12.3)
|
25
|
+
rb_sys (0.9.106)
|
26
|
+
regexp_parser (2.10.0)
|
27
|
+
rspec (3.13.0)
|
28
|
+
rspec-core (~> 3.13.0)
|
29
|
+
rspec-expectations (~> 3.13.0)
|
30
|
+
rspec-mocks (~> 3.13.0)
|
31
|
+
rspec-core (3.13.2)
|
32
|
+
rspec-support (~> 3.13.0)
|
33
|
+
rspec-expectations (3.13.3)
|
35
34
|
diff-lcs (>= 1.2.0, < 2.0)
|
36
|
-
rspec-support (~> 3.
|
37
|
-
rspec-mocks (3.
|
35
|
+
rspec-support (~> 3.13.0)
|
36
|
+
rspec-mocks (3.13.2)
|
38
37
|
diff-lcs (>= 1.2.0, < 2.0)
|
39
|
-
rspec-support (~> 3.
|
40
|
-
rspec-support (3.
|
41
|
-
rubocop (1.
|
38
|
+
rspec-support (~> 3.13.0)
|
39
|
+
rspec-support (3.13.2)
|
40
|
+
rubocop (1.69.2)
|
42
41
|
json (~> 2.3)
|
43
42
|
language_server-protocol (>= 3.17.0)
|
44
43
|
parallel (~> 1.10)
|
45
|
-
parser (>= 3.
|
44
|
+
parser (>= 3.3.0.2)
|
46
45
|
rainbow (>= 2.2.2, < 4.0)
|
47
|
-
regexp_parser (>=
|
48
|
-
|
49
|
-
rubocop-ast (>= 1.30.0, < 2.0)
|
46
|
+
regexp_parser (>= 2.9.3, < 3.0)
|
47
|
+
rubocop-ast (>= 1.36.2, < 2.0)
|
50
48
|
ruby-progressbar (~> 1.7)
|
51
|
-
unicode-display_width (>= 2.4.0, <
|
52
|
-
rubocop-ast (1.
|
53
|
-
parser (>= 3.
|
54
|
-
rubocop-performance (1.
|
49
|
+
unicode-display_width (>= 2.4.0, < 4.0)
|
50
|
+
rubocop-ast (1.37.0)
|
51
|
+
parser (>= 3.3.1.0)
|
52
|
+
rubocop-performance (1.23.0)
|
55
53
|
rubocop (>= 1.48.1, < 2.0)
|
56
|
-
rubocop-ast (>= 1.
|
54
|
+
rubocop-ast (>= 1.31.1, < 2.0)
|
57
55
|
ruby-progressbar (1.13.0)
|
58
|
-
standard (1.
|
56
|
+
standard (1.43.0)
|
59
57
|
language_server-protocol (~> 3.17.0.2)
|
60
58
|
lint_roller (~> 1.0)
|
61
|
-
rubocop (~> 1.
|
59
|
+
rubocop (~> 1.69.1)
|
62
60
|
standard-custom (~> 1.0.0)
|
63
|
-
standard-performance (~> 1.
|
61
|
+
standard-performance (~> 1.6)
|
64
62
|
standard-custom (1.0.2)
|
65
63
|
lint_roller (~> 1.0)
|
66
64
|
rubocop (~> 1.50)
|
67
|
-
standard-performance (1.
|
65
|
+
standard-performance (1.6.0)
|
68
66
|
lint_roller (~> 1.1)
|
69
|
-
rubocop-performance (~> 1.
|
70
|
-
unicode-display_width (
|
67
|
+
rubocop-performance (~> 1.23.0)
|
68
|
+
unicode-display_width (3.1.3)
|
69
|
+
unicode-emoji (~> 4.0, >= 4.0.4)
|
70
|
+
unicode-emoji (4.0.4)
|
71
71
|
yard (0.9.34)
|
72
72
|
yard-doctest (0.1.17)
|
73
73
|
minitest
|
@@ -89,4 +89,4 @@ DEPENDENCIES
|
|
89
89
|
yard-doctest
|
90
90
|
|
91
91
|
BUNDLED WITH
|
92
|
-
2.
|
92
|
+
2.5.18
|
data/Rakefile
CHANGED
@@ -21,3 +21,9 @@ end
|
|
21
21
|
task build: :compile
|
22
22
|
|
23
23
|
task default: %i[compile spec standard]
|
24
|
+
|
25
|
+
# Packaging default (non-precompiled) gem
|
26
|
+
require "rubygems/package_task"
|
27
|
+
gem_path = Gem::PackageTask.new(GEMSPEC).define
|
28
|
+
desc "Package the Ruby gem"
|
29
|
+
task "package" => [gem_path]
|
@@ -10,6 +10,6 @@ publish = false
|
|
10
10
|
crate-type = ["cdylib"]
|
11
11
|
|
12
12
|
[dependencies]
|
13
|
-
magnus = { version = "0.
|
14
|
-
rb-sys = { version = "0.9.
|
15
|
-
tiktoken-rs = { version = "0.
|
13
|
+
magnus = { version = "0.7.1" }
|
14
|
+
rb-sys = { version = "0.9.106", features = ["stable-api-compiled-fallback"] }
|
15
|
+
tiktoken-rs = { version = "0.6.0" }
|
@@ -1,7 +1,8 @@
|
|
1
1
|
use std::collections::HashSet;
|
2
2
|
|
3
|
-
use
|
3
|
+
use tiktoken_rs::Rank;
|
4
4
|
|
5
|
+
use crate::uncicode_error;
|
5
6
|
|
6
7
|
#[magnus::wrap(class = "Tiktoken::Ext::CoreBPE")]
|
7
8
|
pub struct CoreBPEWrapper {
|
@@ -13,11 +14,15 @@ impl CoreBPEWrapper {
|
|
13
14
|
Self { core_bpe }
|
14
15
|
}
|
15
16
|
|
16
|
-
pub fn encode_ordinary(&self, text: String) -> Vec<
|
17
|
+
pub fn encode_ordinary(&self, text: String) -> Vec<Rank> {
|
17
18
|
self.core_bpe.encode_ordinary(text.as_str())
|
18
19
|
}
|
19
20
|
|
20
|
-
pub fn encode(
|
21
|
+
pub fn encode(
|
22
|
+
&self,
|
23
|
+
text: String,
|
24
|
+
allowed_special: magnus::RArray,
|
25
|
+
) -> Result<Vec<Rank>, magnus::Error> {
|
21
26
|
let allowed_special: Vec<String> = allowed_special.to_vec()?;
|
22
27
|
let allowed_special: Vec<&str> = allowed_special.iter().map(|s| s.as_str()).collect();
|
23
28
|
let allowed_special: HashSet<&str> = HashSet::from_iter(allowed_special.iter().cloned());
|
@@ -25,20 +30,18 @@ impl CoreBPEWrapper {
|
|
25
30
|
Ok(self.core_bpe.encode(text.as_str(), allowed_special))
|
26
31
|
}
|
27
32
|
|
28
|
-
pub fn encode_with_special_tokens(&self, text: String) -> Vec<
|
33
|
+
pub fn encode_with_special_tokens(&self, text: String) -> Vec<Rank> {
|
29
34
|
self.core_bpe.encode_with_special_tokens(text.as_str())
|
30
35
|
}
|
31
36
|
|
32
|
-
pub fn decode(&self, ids: Vec<
|
33
|
-
self.core_bpe.decode(ids)
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
};
|
39
|
-
|
40
|
-
magnus::Error::new(error, e.to_string())
|
41
|
-
})
|
37
|
+
pub fn decode(&self, ids: Vec<Rank>) -> Result<String, magnus::Error> {
|
38
|
+
self.core_bpe.decode(ids).map_err(|e| {
|
39
|
+
let error = match uncicode_error() {
|
40
|
+
Ok(error) => error,
|
41
|
+
Err(e) => return e,
|
42
|
+
};
|
42
43
|
|
44
|
+
magnus::Error::new(error, e.to_string())
|
45
|
+
})
|
43
46
|
}
|
44
47
|
}
|
data/lib/tiktoken_ruby.rb
CHANGED
@@ -69,9 +69,12 @@ module Tiktoken
|
|
69
69
|
]
|
70
70
|
|
71
71
|
# taken from the python library here https://github.com/openai/tiktoken/blob/main/tiktoken/model.py
|
72
|
-
# that is also MIT licensed but by OpenAI
|
72
|
+
# that is also MIT licensed but by OpenAI;
|
73
|
+
# https://github.com/Congyuwang/tiktoken-rs/blob/main/tiktoken-rs/src/tokenizer.rs#L50
|
74
|
+
# is the source of the mapping for the Rust library
|
73
75
|
MODEL_TO_ENCODING_NAME = {
|
74
76
|
# chat
|
77
|
+
"chatgpt-4o-latest": "o200k_base",
|
75
78
|
"gpt-4o": "o200k_base",
|
76
79
|
"gpt-4": "cl100k_base",
|
77
80
|
"gpt-3.5-turbo": "cl100k_base",
|
data/script/release
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
|
3
|
+
set -e
|
4
|
+
|
5
|
+
if [ -z "${TIKTOKEN_PUBLISH_KEY}" ]; then
|
6
|
+
echo "Error: TIKTOKEN_PUBLISH_KEY is not set. This is the RubyGems API key to push the gem."
|
7
|
+
exit 1
|
8
|
+
fi
|
9
|
+
|
10
|
+
run_id=""
|
11
|
+
# Parse arguments
|
12
|
+
while [[ "$#" -gt 0 ]]; do
|
13
|
+
case $1 in
|
14
|
+
--run-id)
|
15
|
+
run_id="$2"
|
16
|
+
shift 2
|
17
|
+
;;
|
18
|
+
*)
|
19
|
+
echo "Unknown parameter passed: $1"
|
20
|
+
exit 1
|
21
|
+
;;
|
22
|
+
esac
|
23
|
+
done
|
24
|
+
|
25
|
+
if [ -z "${run_id}" ]; then
|
26
|
+
echo "Error: --run-id is not provided. Please provide the GitHub Action run id for the cross-compile workflow."
|
27
|
+
exit 1
|
28
|
+
fi
|
29
|
+
|
30
|
+
version=$(grep VERSION lib/tiktoken_ruby/version.rb | head -n 1 | cut -d'"' -f2)
|
31
|
+
echo "Building tiktoken_ruby v$version, using artifacts from run $run_id"
|
32
|
+
|
33
|
+
rm -rf pkg/cross-compiled
|
34
|
+
gh run download "$run_id" -D pkg/cross-compiled
|
35
|
+
|
36
|
+
for gem in pkg/cross-compiled/cross-gem-*/tiktoken_ruby-"$version"*.gem ; do
|
37
|
+
echo "Publishing $gem"
|
38
|
+
GEM_HOST_API_KEY="${TIKTOKEN_PUBLISH_KEY}" gem push "$gem" --host https://rubygems.org
|
39
|
+
done
|
40
|
+
|
41
|
+
# last but not least, the uncompiled gem
|
42
|
+
bundle exec rake package
|
43
|
+
GEM_HOST_API_KEY="${TIKTOKEN_PUBLISH_KEY}" gem push "pkg/tiktoken_ruby-$version.gem" --host https://rubygems.org
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tiktoken_ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.11
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- IAPark
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-01-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - '='
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.9.
|
19
|
+
version: 0.9.106
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - '='
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.9.
|
26
|
+
version: 0.9.106
|
27
27
|
description: An unofficial Ruby wrapper for Tiktoken, a BPE tokenizer written by and
|
28
28
|
used by OpenAI. It can be used to count the number of tokens in text before sending
|
29
29
|
it to OpenAI APIs.
|
@@ -36,6 +36,7 @@ extra_rdoc_files: []
|
|
36
36
|
files:
|
37
37
|
- ".rspec"
|
38
38
|
- ".standard.yml"
|
39
|
+
- ".vscode/settings.json"
|
39
40
|
- Cargo.lock
|
40
41
|
- Cargo.toml
|
41
42
|
- Gemfile
|
@@ -51,8 +52,8 @@ files:
|
|
51
52
|
- lib/tiktoken_ruby.rb
|
52
53
|
- lib/tiktoken_ruby/encoding.rb
|
53
54
|
- lib/tiktoken_ruby/version.rb
|
55
|
+
- script/release
|
54
56
|
- sig/tiktoken_ruby.rbs
|
55
|
-
- tiktoken_ruby.gemspec
|
56
57
|
homepage: https://github.com/IAPark/tiktoken_ruby
|
57
58
|
licenses:
|
58
59
|
- MIT
|
@@ -68,14 +69,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
68
69
|
requirements:
|
69
70
|
- - ">="
|
70
71
|
- !ruby/object:Gem::Version
|
71
|
-
version:
|
72
|
+
version: 3.1.0
|
72
73
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
73
74
|
requirements:
|
74
75
|
- - ">="
|
75
76
|
- !ruby/object:Gem::Version
|
76
|
-
version: 3.
|
77
|
+
version: 3.4.0
|
77
78
|
requirements: []
|
78
|
-
rubygems_version: 3.
|
79
|
+
rubygems_version: 3.5.22
|
79
80
|
signing_key:
|
80
81
|
specification_version: 4
|
81
82
|
summary: Ruby wrapper for Tiktoken
|
data/tiktoken_ruby.gemspec
DELETED
@@ -1,33 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative "lib/tiktoken_ruby/version"
|
4
|
-
|
5
|
-
Gem::Specification.new do |spec|
|
6
|
-
spec.name = "tiktoken_ruby"
|
7
|
-
spec.version = Tiktoken::VERSION
|
8
|
-
spec.authors = ["IAPark"]
|
9
|
-
spec.email = ["isaac.a.park@gmail.com"]
|
10
|
-
spec.summary = "Ruby wrapper for Tiktoken"
|
11
|
-
spec.description = "An unofficial Ruby wrapper for Tiktoken, " \
|
12
|
-
"a BPE tokenizer written by and used by OpenAI. It can be used to " \
|
13
|
-
"count the number of tokens in text before sending it to OpenAI APIs."
|
14
|
-
spec.homepage = "https://github.com/IAPark/tiktoken_ruby"
|
15
|
-
spec.license = "MIT"
|
16
|
-
spec.required_ruby_version = ">= 2.7.0"
|
17
|
-
spec.required_rubygems_version = ">= 3.1.0"
|
18
|
-
spec.platform = Gem::Platform::RUBY
|
19
|
-
|
20
|
-
spec.metadata["homepage_uri"] = spec.homepage
|
21
|
-
spec.metadata["source_code_uri"] = "https://github.com/IAPark/tiktoken_ruby"
|
22
|
-
spec.metadata["documentation_uri"] = "https://rubydoc.info/github/IAPark/tiktoken_ruby/main"
|
23
|
-
spec.files = Dir.chdir(__dir__) do
|
24
|
-
`git ls-files -z`.split("\x0").reject do |f|
|
25
|
-
(f == __FILE__) || f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|circleci)|appveyor)})
|
26
|
-
end
|
27
|
-
end
|
28
|
-
spec.bindir = "exe"
|
29
|
-
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
30
|
-
spec.require_paths = ["lib"]
|
31
|
-
spec.extensions = ["ext/tiktoken_ruby/extconf.rb"]
|
32
|
-
spec.add_dependency "rb_sys", "= 0.9.87"
|
33
|
-
end
|