tiktoken_ruby 0.0.7 → 0.0.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Cargo.lock +126 -133
- data/Gemfile.lock +4 -4
- data/README.md +10 -3
- data/ext/tiktoken_ruby/Cargo.toml +2 -2
- data/ext/tiktoken_ruby/src/lib.rs +6 -0
- data/lib/tiktoken_ruby/encoding.rb +6 -2
- data/lib/tiktoken_ruby/version.rb +1 -1
- data/lib/tiktoken_ruby.rb +11 -6
- data/tiktoken_ruby.gemspec +1 -1
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: abd5db9516cf5d26ace1790e1267038910af214b3315d157e35df851530b83cb
|
4
|
+
data.tar.gz: 88541080f80f27a52c8a7eb9bd9a2baf4dc3c67af4ba152bc03150ade1c89f72
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bdba999ff6ee22d57a993e7cbff9f1f95c22540973e7af26a58ded2e49e77c9863daedd936255a86c9722598b8fcc442d746c502d9b295f2758e52774aa8fd7c
|
7
|
+
data.tar.gz: 8917f5a08dbed662c890d0102c9d4be3d24a34d5537804b7ceb563819dd01e36d5b580d5e9f68a20464057620e8f2497558e0e3f1e0faf2afc044d2a4feec7e2
|
data/Cargo.lock
CHANGED
@@ -4,43 +4,43 @@ version = 3
|
|
4
4
|
|
5
5
|
[[package]]
|
6
6
|
name = "aho-corasick"
|
7
|
-
version = "
|
7
|
+
version = "1.1.3"
|
8
8
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
9
|
-
checksum = "
|
9
|
+
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
|
10
10
|
dependencies = [
|
11
11
|
"memchr",
|
12
12
|
]
|
13
13
|
|
14
14
|
[[package]]
|
15
15
|
name = "anyhow"
|
16
|
-
version = "1.0.
|
16
|
+
version = "1.0.83"
|
17
17
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
18
|
-
checksum = "
|
18
|
+
checksum = "25bdb32cbbdce2b519a9cd7df3a678443100e265d5e25ca763b7572a5104f5f3"
|
19
19
|
|
20
20
|
[[package]]
|
21
21
|
name = "autocfg"
|
22
|
-
version = "1.
|
22
|
+
version = "1.3.0"
|
23
23
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
24
|
-
checksum = "
|
24
|
+
checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0"
|
25
25
|
|
26
26
|
[[package]]
|
27
27
|
name = "base64"
|
28
|
-
version = "0.21.
|
28
|
+
version = "0.21.7"
|
29
29
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
30
|
-
checksum = "
|
30
|
+
checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"
|
31
31
|
|
32
32
|
[[package]]
|
33
33
|
name = "bindgen"
|
34
|
-
version = "0.
|
34
|
+
version = "0.69.4"
|
35
35
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
36
|
-
checksum = "
|
36
|
+
checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0"
|
37
37
|
dependencies = [
|
38
|
-
"bitflags
|
38
|
+
"bitflags",
|
39
39
|
"cexpr",
|
40
40
|
"clang-sys",
|
41
|
+
"itertools",
|
41
42
|
"lazy_static",
|
42
43
|
"lazycell",
|
43
|
-
"peeking_take_while",
|
44
44
|
"proc-macro2",
|
45
45
|
"quote",
|
46
46
|
"regex",
|
@@ -66,24 +66,17 @@ checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
|
|
66
66
|
|
67
67
|
[[package]]
|
68
68
|
name = "bitflags"
|
69
|
-
version = "
|
70
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
71
|
-
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
72
|
-
|
73
|
-
[[package]]
|
74
|
-
name = "bitflags"
|
75
|
-
version = "2.4.0"
|
69
|
+
version = "2.5.0"
|
76
70
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
77
|
-
checksum = "
|
71
|
+
checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1"
|
78
72
|
|
79
73
|
[[package]]
|
80
74
|
name = "bstr"
|
81
|
-
version = "1.
|
75
|
+
version = "1.9.1"
|
82
76
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
83
|
-
checksum = "
|
77
|
+
checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706"
|
84
78
|
dependencies = [
|
85
79
|
"memchr",
|
86
|
-
"once_cell",
|
87
80
|
"regex-automata",
|
88
81
|
"serde",
|
89
82
|
]
|
@@ -105,20 +98,26 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
|
105
98
|
|
106
99
|
[[package]]
|
107
100
|
name = "clang-sys"
|
108
|
-
version = "1.
|
101
|
+
version = "1.7.0"
|
109
102
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
110
|
-
checksum = "
|
103
|
+
checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1"
|
111
104
|
dependencies = [
|
112
105
|
"glob",
|
113
106
|
"libc",
|
114
107
|
"libloading",
|
115
108
|
]
|
116
109
|
|
110
|
+
[[package]]
|
111
|
+
name = "either"
|
112
|
+
version = "1.11.0"
|
113
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
114
|
+
checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2"
|
115
|
+
|
117
116
|
[[package]]
|
118
117
|
name = "fancy-regex"
|
119
|
-
version = "0.
|
118
|
+
version = "0.12.0"
|
120
119
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
121
|
-
checksum = "
|
120
|
+
checksum = "7493d4c459da9f84325ad297371a6b2b8a162800873a22e3b6b6512e61d18c05"
|
122
121
|
dependencies = [
|
123
122
|
"bit-set",
|
124
123
|
"regex",
|
@@ -130,6 +129,15 @@ version = "0.3.1"
|
|
130
129
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
131
130
|
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
132
131
|
|
132
|
+
[[package]]
|
133
|
+
name = "itertools"
|
134
|
+
version = "0.12.1"
|
135
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
136
|
+
checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
|
137
|
+
dependencies = [
|
138
|
+
"either",
|
139
|
+
]
|
140
|
+
|
133
141
|
[[package]]
|
134
142
|
name = "lazy_static"
|
135
143
|
version = "1.4.0"
|
@@ -144,25 +152,25 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
|
144
152
|
|
145
153
|
[[package]]
|
146
154
|
name = "libc"
|
147
|
-
version = "0.2.
|
155
|
+
version = "0.2.154"
|
148
156
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
149
|
-
checksum = "
|
157
|
+
checksum = "ae743338b92ff9146ce83992f766a31066a91a8c84a45e0e9f21e7cf6de6d346"
|
150
158
|
|
151
159
|
[[package]]
|
152
160
|
name = "libloading"
|
153
|
-
version = "0.
|
161
|
+
version = "0.8.3"
|
154
162
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
155
|
-
checksum = "
|
163
|
+
checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19"
|
156
164
|
dependencies = [
|
157
165
|
"cfg-if",
|
158
|
-
"
|
166
|
+
"windows-targets",
|
159
167
|
]
|
160
168
|
|
161
169
|
[[package]]
|
162
170
|
name = "lock_api"
|
163
|
-
version = "0.4.
|
171
|
+
version = "0.4.12"
|
164
172
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
165
|
-
checksum = "
|
173
|
+
checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
|
166
174
|
dependencies = [
|
167
175
|
"autocfg",
|
168
176
|
"scopeguard",
|
@@ -170,9 +178,9 @@ dependencies = [
|
|
170
178
|
|
171
179
|
[[package]]
|
172
180
|
name = "magnus"
|
173
|
-
version = "0.6.
|
181
|
+
version = "0.6.4"
|
174
182
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
175
|
-
checksum = "
|
183
|
+
checksum = "b1597ef40aa8c36be098249e82c9a20cf7199278ac1c1a1a995eeead6a184479"
|
176
184
|
dependencies = [
|
177
185
|
"magnus-macros",
|
178
186
|
"rb-sys",
|
@@ -193,9 +201,9 @@ dependencies = [
|
|
193
201
|
|
194
202
|
[[package]]
|
195
203
|
name = "memchr"
|
196
|
-
version = "2.
|
204
|
+
version = "2.7.2"
|
197
205
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
198
|
-
checksum = "
|
206
|
+
checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
|
199
207
|
|
200
208
|
[[package]]
|
201
209
|
name = "minimal-lexical"
|
@@ -213,17 +221,11 @@ dependencies = [
|
|
213
221
|
"minimal-lexical",
|
214
222
|
]
|
215
223
|
|
216
|
-
[[package]]
|
217
|
-
name = "once_cell"
|
218
|
-
version = "1.17.1"
|
219
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
220
|
-
checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"
|
221
|
-
|
222
224
|
[[package]]
|
223
225
|
name = "parking_lot"
|
224
|
-
version = "0.12.
|
226
|
+
version = "0.12.2"
|
225
227
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
226
|
-
checksum = "
|
228
|
+
checksum = "7e4af0ca4f6caed20e900d564c242b8e5d4903fdacf31d3daf527b66fe6f42fb"
|
227
229
|
dependencies = [
|
228
230
|
"lock_api",
|
229
231
|
"parking_lot_core",
|
@@ -231,55 +233,49 @@ dependencies = [
|
|
231
233
|
|
232
234
|
[[package]]
|
233
235
|
name = "parking_lot_core"
|
234
|
-
version = "0.9.
|
236
|
+
version = "0.9.10"
|
235
237
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
236
|
-
checksum = "
|
238
|
+
checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8"
|
237
239
|
dependencies = [
|
238
240
|
"cfg-if",
|
239
241
|
"libc",
|
240
242
|
"redox_syscall",
|
241
243
|
"smallvec",
|
242
|
-
"windows-
|
244
|
+
"windows-targets",
|
243
245
|
]
|
244
246
|
|
245
|
-
[[package]]
|
246
|
-
name = "peeking_take_while"
|
247
|
-
version = "0.1.2"
|
248
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
249
|
-
checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
|
250
|
-
|
251
247
|
[[package]]
|
252
248
|
name = "proc-macro2"
|
253
|
-
version = "1.0.
|
249
|
+
version = "1.0.82"
|
254
250
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
255
|
-
checksum = "
|
251
|
+
checksum = "8ad3d49ab951a01fbaafe34f2ec74122942fe18a3f9814c3268f1bb72042131b"
|
256
252
|
dependencies = [
|
257
253
|
"unicode-ident",
|
258
254
|
]
|
259
255
|
|
260
256
|
[[package]]
|
261
257
|
name = "quote"
|
262
|
-
version = "1.0.
|
258
|
+
version = "1.0.36"
|
263
259
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
264
|
-
checksum = "
|
260
|
+
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
|
265
261
|
dependencies = [
|
266
262
|
"proc-macro2",
|
267
263
|
]
|
268
264
|
|
269
265
|
[[package]]
|
270
266
|
name = "rb-sys"
|
271
|
-
version = "0.9.
|
267
|
+
version = "0.9.97"
|
272
268
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
273
|
-
checksum = "
|
269
|
+
checksum = "47d30bcad206b51f2f66121190ca678dce1fdf3a2eae0ac5d838d1818b19bdf5"
|
274
270
|
dependencies = [
|
275
271
|
"rb-sys-build",
|
276
272
|
]
|
277
273
|
|
278
274
|
[[package]]
|
279
275
|
name = "rb-sys-build"
|
280
|
-
version = "0.9.
|
276
|
+
version = "0.9.97"
|
281
277
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
282
|
-
checksum = "
|
278
|
+
checksum = "3cbd92f281615f3c2dcb9dcb0f0576624752afbf9a7f99173b37c4b55b62dd8a"
|
283
279
|
dependencies = [
|
284
280
|
"bindgen",
|
285
281
|
"lazy_static",
|
@@ -298,35 +294,41 @@ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
|
|
298
294
|
|
299
295
|
[[package]]
|
300
296
|
name = "redox_syscall"
|
301
|
-
version = "0.
|
297
|
+
version = "0.5.1"
|
302
298
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
303
|
-
checksum = "
|
299
|
+
checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e"
|
304
300
|
dependencies = [
|
305
|
-
"bitflags
|
301
|
+
"bitflags",
|
306
302
|
]
|
307
303
|
|
308
304
|
[[package]]
|
309
305
|
name = "regex"
|
310
|
-
version = "1.
|
306
|
+
version = "1.10.4"
|
311
307
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
312
|
-
checksum = "
|
308
|
+
checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c"
|
313
309
|
dependencies = [
|
314
310
|
"aho-corasick",
|
315
311
|
"memchr",
|
312
|
+
"regex-automata",
|
316
313
|
"regex-syntax",
|
317
314
|
]
|
318
315
|
|
319
316
|
[[package]]
|
320
317
|
name = "regex-automata"
|
321
|
-
version = "0.
|
318
|
+
version = "0.4.6"
|
322
319
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
323
|
-
checksum = "
|
320
|
+
checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
|
321
|
+
dependencies = [
|
322
|
+
"aho-corasick",
|
323
|
+
"memchr",
|
324
|
+
"regex-syntax",
|
325
|
+
]
|
324
326
|
|
325
327
|
[[package]]
|
326
328
|
name = "regex-syntax"
|
327
|
-
version = "0.
|
329
|
+
version = "0.8.3"
|
328
330
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
329
|
-
checksum = "
|
331
|
+
checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56"
|
330
332
|
|
331
333
|
[[package]]
|
332
334
|
name = "rustc-hash"
|
@@ -336,9 +338,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
|
336
338
|
|
337
339
|
[[package]]
|
338
340
|
name = "scopeguard"
|
339
|
-
version = "1.
|
341
|
+
version = "1.2.0"
|
340
342
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
341
|
-
checksum = "
|
343
|
+
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
342
344
|
|
343
345
|
[[package]]
|
344
346
|
name = "seq-macro"
|
@@ -348,9 +350,23 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
|
|
348
350
|
|
349
351
|
[[package]]
|
350
352
|
name = "serde"
|
351
|
-
version = "1.0.
|
353
|
+
version = "1.0.202"
|
352
354
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
353
|
-
checksum = "
|
355
|
+
checksum = "226b61a0d411b2ba5ff6d7f73a476ac4f8bb900373459cd00fab8512828ba395"
|
356
|
+
dependencies = [
|
357
|
+
"serde_derive",
|
358
|
+
]
|
359
|
+
|
360
|
+
[[package]]
|
361
|
+
name = "serde_derive"
|
362
|
+
version = "1.0.202"
|
363
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
364
|
+
checksum = "6048858004bcff69094cd972ed40a32500f153bd3be9f716b2eed2e8217c4838"
|
365
|
+
dependencies = [
|
366
|
+
"proc-macro2",
|
367
|
+
"quote",
|
368
|
+
"syn",
|
369
|
+
]
|
354
370
|
|
355
371
|
[[package]]
|
356
372
|
name = "shell-words"
|
@@ -360,21 +376,21 @@ checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde"
|
|
360
376
|
|
361
377
|
[[package]]
|
362
378
|
name = "shlex"
|
363
|
-
version = "1.
|
379
|
+
version = "1.3.0"
|
364
380
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
365
|
-
checksum = "
|
381
|
+
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
366
382
|
|
367
383
|
[[package]]
|
368
384
|
name = "smallvec"
|
369
|
-
version = "1.
|
385
|
+
version = "1.13.2"
|
370
386
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
371
|
-
checksum = "
|
387
|
+
checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
|
372
388
|
|
373
389
|
[[package]]
|
374
390
|
name = "syn"
|
375
|
-
version = "2.0.
|
391
|
+
version = "2.0.63"
|
376
392
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
377
|
-
checksum = "
|
393
|
+
checksum = "bf5be731623ca1a1fb7d8be6f261a3be6d3e2337b8a1f97be944d020c8fcb704"
|
378
394
|
dependencies = [
|
379
395
|
"proc-macro2",
|
380
396
|
"quote",
|
@@ -383,8 +399,9 @@ dependencies = [
|
|
383
399
|
|
384
400
|
[[package]]
|
385
401
|
name = "tiktoken-rs"
|
386
|
-
version = "0.
|
387
|
-
source = "
|
402
|
+
version = "0.5.9"
|
403
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
404
|
+
checksum = "c314e7ce51440f9e8f5a497394682a57b7c323d0f4d0a6b1b13c429056e0e234"
|
388
405
|
dependencies = [
|
389
406
|
"anyhow",
|
390
407
|
"base64",
|
@@ -406,50 +423,20 @@ dependencies = [
|
|
406
423
|
|
407
424
|
[[package]]
|
408
425
|
name = "unicode-ident"
|
409
|
-
version = "1.0.
|
410
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
411
|
-
checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4"
|
412
|
-
|
413
|
-
[[package]]
|
414
|
-
name = "winapi"
|
415
|
-
version = "0.3.9"
|
426
|
+
version = "1.0.12"
|
416
427
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
417
|
-
checksum = "
|
418
|
-
dependencies = [
|
419
|
-
"winapi-i686-pc-windows-gnu",
|
420
|
-
"winapi-x86_64-pc-windows-gnu",
|
421
|
-
]
|
422
|
-
|
423
|
-
[[package]]
|
424
|
-
name = "winapi-i686-pc-windows-gnu"
|
425
|
-
version = "0.4.0"
|
426
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
427
|
-
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
428
|
-
|
429
|
-
[[package]]
|
430
|
-
name = "winapi-x86_64-pc-windows-gnu"
|
431
|
-
version = "0.4.0"
|
432
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
433
|
-
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
434
|
-
|
435
|
-
[[package]]
|
436
|
-
name = "windows-sys"
|
437
|
-
version = "0.45.0"
|
438
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
439
|
-
checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
|
440
|
-
dependencies = [
|
441
|
-
"windows-targets",
|
442
|
-
]
|
428
|
+
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
443
429
|
|
444
430
|
[[package]]
|
445
431
|
name = "windows-targets"
|
446
|
-
version = "0.
|
432
|
+
version = "0.52.5"
|
447
433
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
448
|
-
checksum = "
|
434
|
+
checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb"
|
449
435
|
dependencies = [
|
450
436
|
"windows_aarch64_gnullvm",
|
451
437
|
"windows_aarch64_msvc",
|
452
438
|
"windows_i686_gnu",
|
439
|
+
"windows_i686_gnullvm",
|
453
440
|
"windows_i686_msvc",
|
454
441
|
"windows_x86_64_gnu",
|
455
442
|
"windows_x86_64_gnullvm",
|
@@ -458,42 +445,48 @@ dependencies = [
|
|
458
445
|
|
459
446
|
[[package]]
|
460
447
|
name = "windows_aarch64_gnullvm"
|
461
|
-
version = "0.
|
448
|
+
version = "0.52.5"
|
462
449
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
463
|
-
checksum = "
|
450
|
+
checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263"
|
464
451
|
|
465
452
|
[[package]]
|
466
453
|
name = "windows_aarch64_msvc"
|
467
|
-
version = "0.
|
454
|
+
version = "0.52.5"
|
468
455
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
469
|
-
checksum = "
|
456
|
+
checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6"
|
470
457
|
|
471
458
|
[[package]]
|
472
459
|
name = "windows_i686_gnu"
|
473
|
-
version = "0.
|
460
|
+
version = "0.52.5"
|
461
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
462
|
+
checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670"
|
463
|
+
|
464
|
+
[[package]]
|
465
|
+
name = "windows_i686_gnullvm"
|
466
|
+
version = "0.52.5"
|
474
467
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
475
|
-
checksum = "
|
468
|
+
checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9"
|
476
469
|
|
477
470
|
[[package]]
|
478
471
|
name = "windows_i686_msvc"
|
479
|
-
version = "0.
|
472
|
+
version = "0.52.5"
|
480
473
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
481
|
-
checksum = "
|
474
|
+
checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf"
|
482
475
|
|
483
476
|
[[package]]
|
484
477
|
name = "windows_x86_64_gnu"
|
485
|
-
version = "0.
|
478
|
+
version = "0.52.5"
|
486
479
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
487
|
-
checksum = "
|
480
|
+
checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9"
|
488
481
|
|
489
482
|
[[package]]
|
490
483
|
name = "windows_x86_64_gnullvm"
|
491
|
-
version = "0.
|
484
|
+
version = "0.52.5"
|
492
485
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
493
|
-
checksum = "
|
486
|
+
checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596"
|
494
487
|
|
495
488
|
[[package]]
|
496
489
|
name = "windows_x86_64_msvc"
|
497
|
-
version = "0.
|
490
|
+
version = "0.52.5"
|
498
491
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
499
|
-
checksum = "
|
492
|
+
checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0"
|
data/Gemfile.lock
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
tiktoken_ruby (0.0.
|
5
|
-
rb_sys (
|
4
|
+
tiktoken_ruby (0.0.9)
|
5
|
+
rb_sys (= 0.9.87)
|
6
6
|
|
7
7
|
GEM
|
8
8
|
remote: https://rubygems.org/
|
@@ -22,7 +22,7 @@ GEM
|
|
22
22
|
rake (13.1.0)
|
23
23
|
rake-compiler (1.2.5)
|
24
24
|
rake
|
25
|
-
rb_sys (0.9.
|
25
|
+
rb_sys (0.9.87)
|
26
26
|
regexp_parser (2.9.0)
|
27
27
|
rexml (3.2.6)
|
28
28
|
rspec (3.12.0)
|
@@ -89,4 +89,4 @@ DEPENDENCIES
|
|
89
89
|
yard-doctest
|
90
90
|
|
91
91
|
BUNDLED WITH
|
92
|
-
2.4.
|
92
|
+
2.4.4
|
data/README.md
CHANGED
@@ -1,8 +1,14 @@
|
|
1
1
|
[![Gem Version](https://badge.fury.io/rb/tiktoken_ruby.svg)](https://badge.fury.io/rb/tiktoken_ruby)
|
2
|
+
|
2
3
|
# tiktoken_ruby
|
3
4
|
|
4
5
|
[Tiktoken](https://github.com/openai/tiktoken) is BPE tokenizer from OpenAI used with their GPT models.
|
5
|
-
This is a wrapper around it aimed primarily at enabling accurate counts of GPT model tokens used.
|
6
|
+
This is a wrapper around it aimed primarily at enabling accurate counts of GPT model tokens used.
|
7
|
+
|
8
|
+
## Request for maintainers
|
9
|
+
|
10
|
+
I can't really put substantial time into maintaining this. Probably nothing more than a couple hours every few months. If you have experience maintaining ruby gems and would like to
|
11
|
+
lend a hand please send me an email or reply to this [issue](https://github.com/IAPark/tiktoken_ruby/issues/26)
|
6
12
|
|
7
13
|
## Installation
|
8
14
|
|
@@ -15,17 +21,19 @@ If bundler is not being used to manage dependencies, install the gem by executin
|
|
15
21
|
$ gem install tiktoken_ruby
|
16
22
|
|
17
23
|
## Usage
|
24
|
+
|
18
25
|
Usage should be very similar to the python library. Here's a simple example
|
19
26
|
|
20
27
|
Encode and decode text
|
28
|
+
|
21
29
|
```ruby
|
22
30
|
require 'tiktoken_ruby'
|
23
|
-
|
24
31
|
enc = Tiktoken.get_encoding("cl100k_base")
|
25
32
|
enc.decode(enc.encode("hello world")) #=> "hello world"
|
26
33
|
```
|
27
34
|
|
28
35
|
Encoders can also be retrieved by model name
|
36
|
+
|
29
37
|
```ruby
|
30
38
|
require 'tiktoken_ruby'
|
31
39
|
|
@@ -53,7 +61,6 @@ bundle exec rake compile
|
|
53
61
|
bundle exec rake spec
|
54
62
|
```
|
55
63
|
|
56
|
-
|
57
64
|
## License
|
58
65
|
|
59
66
|
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
@@ -11,5 +11,5 @@ crate-type = ["cdylib"]
|
|
11
11
|
|
12
12
|
[dependencies]
|
13
13
|
magnus = { version = "0.6.1" }
|
14
|
-
rb-sys = { version = "
|
15
|
-
tiktoken-rs = {
|
14
|
+
rb-sys = { version = "0.9.87", features = ["stable-api-compiled-fallback"] }
|
15
|
+
tiktoken-rs = { version = "0.5.9" }
|
@@ -20,6 +20,11 @@ fn cl100k_base() -> CoreBPEWrapper {
|
|
20
20
|
CoreBPEWrapper::new(core_bpe)
|
21
21
|
}
|
22
22
|
|
23
|
+
fn o200k_base() -> CoreBPEWrapper {
|
24
|
+
let core_bpe = tiktoken_rs::o200k_base().unwrap();
|
25
|
+
CoreBPEWrapper::new(core_bpe)
|
26
|
+
}
|
27
|
+
|
23
28
|
fn module() -> Result<RModule, magnus::Error> {
|
24
29
|
define_module("Tiktoken")
|
25
30
|
}
|
@@ -37,6 +42,7 @@ fn init() -> Result<(), Error> {
|
|
37
42
|
factory_module.define_singleton_method("p50k_base", function!(p50k_base, 0))?;
|
38
43
|
factory_module.define_singleton_method("p50k_edit", function!(p50k_edit, 0))?;
|
39
44
|
factory_module.define_singleton_method("cl100k_base", function!(cl100k_base, 0))?;
|
45
|
+
factory_module.define_singleton_method("o200k_base", function!(o200k_base, 0))?;
|
40
46
|
|
41
47
|
let ext_module = module.define_module("Ext")?;
|
42
48
|
let bpe_class = ext_module.define_class("CoreBPE", class::object())?;
|
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
class Tiktoken::Encoding
|
4
|
+
CACHE_MUTEX = Mutex.new
|
5
|
+
|
4
6
|
attr_reader :name
|
5
7
|
|
6
8
|
# This returns a new Tiktoken::Encoding instance for the requested encoding
|
@@ -15,8 +17,10 @@ class Tiktoken::Encoding
|
|
15
17
|
# @param encoding [Symbol] The name of the encoding to load
|
16
18
|
# @return [Tiktoken::Encoding] The encoding instance
|
17
19
|
def self.for_name_cached(encoding)
|
18
|
-
|
19
|
-
|
20
|
+
CACHE_MUTEX.synchronize do
|
21
|
+
@encodings ||= {}
|
22
|
+
@encodings[encoding.to_sym] ||= Tiktoken::Encoding.for_name(encoding)
|
23
|
+
end
|
20
24
|
end
|
21
25
|
|
22
26
|
# Encodes the text as a list of integer tokens. This encoding will encode special non text tokens
|
data/lib/tiktoken_ruby.rb
CHANGED
@@ -28,7 +28,7 @@ module Tiktoken
|
|
28
28
|
|
29
29
|
# Gets the encoding for an OpenAI model
|
30
30
|
# @param model_name [Symbol|String] The name of the model to get the encoding for
|
31
|
-
# @return [Tiktoken::Encoding] The encoding instance
|
31
|
+
# @return [Tiktoken::Encoding, nil] The encoding instance, or nil if no encoding is found
|
32
32
|
# @example Count tokens for text
|
33
33
|
# enc = Tiktoken.encoding_for_model("gpt-4")
|
34
34
|
# enc.encode("hello world").length #=> 2
|
@@ -37,10 +37,12 @@ module Tiktoken
|
|
37
37
|
return get_encoding(MODEL_TO_ENCODING_NAME[model_name.to_sym])
|
38
38
|
end
|
39
39
|
|
40
|
-
MODEL_PREFIX_TO_ENCODING.
|
41
|
-
|
42
|
-
|
43
|
-
|
40
|
+
_prefix, encoding = MODEL_PREFIX_TO_ENCODING.find do |prefix, _encoding|
|
41
|
+
model_name.start_with?(prefix.to_s)
|
42
|
+
end
|
43
|
+
|
44
|
+
if encoding
|
45
|
+
get_encoding(encoding)
|
44
46
|
end
|
45
47
|
end
|
46
48
|
|
@@ -62,13 +64,15 @@ module Tiktoken
|
|
62
64
|
:r50k_base,
|
63
65
|
:p50k_base,
|
64
66
|
:p50k_edit,
|
65
|
-
:cl100k_base
|
67
|
+
:cl100k_base,
|
68
|
+
:o200k_base
|
66
69
|
]
|
67
70
|
|
68
71
|
# taken from the python library here https://github.com/openai/tiktoken/blob/main/tiktoken/model.py
|
69
72
|
# that is also MIT licensed but by OpenAI
|
70
73
|
MODEL_TO_ENCODING_NAME = {
|
71
74
|
# chat
|
75
|
+
"gpt-4o": "o200k_base",
|
72
76
|
"gpt-4": "cl100k_base",
|
73
77
|
"gpt-3.5-turbo": "cl100k_base",
|
74
78
|
"gpt-35-turbo": "cl100k_base", # Azure deployment name
|
@@ -118,6 +122,7 @@ module Tiktoken
|
|
118
122
|
|
119
123
|
MODEL_PREFIX_TO_ENCODING = {
|
120
124
|
# chat
|
125
|
+
"gpt-4o-": "o200k_base", # e.g., gpt-4o-2024-05-13, etc.
|
121
126
|
"gpt-4-": "cl100k_base", # e.g., gpt-4-0314, etc., plus gpt-4-32k
|
122
127
|
"gpt-3.5-turbo-": "cl100k_base", # e.g, gpt-3.5-turbo-0301, -0401, etc.
|
123
128
|
"gpt-35-turbo-": "cl100k_base", # Azure deployment name
|
data/tiktoken_ruby.gemspec
CHANGED
@@ -29,5 +29,5 @@ Gem::Specification.new do |spec|
|
|
29
29
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
30
30
|
spec.require_paths = ["lib"]
|
31
31
|
spec.extensions = ["ext/tiktoken_ruby/extconf.rb"]
|
32
|
-
spec.add_dependency "rb_sys", "
|
32
|
+
spec.add_dependency "rb_sys", "= 0.9.87"
|
33
33
|
end
|
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tiktoken_ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- IAPark
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-05-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rb_sys
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - '='
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.9.
|
19
|
+
version: 0.9.87
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - '='
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.9.
|
26
|
+
version: 0.9.87
|
27
27
|
description: An unofficial Ruby wrapper for Tiktoken, a BPE tokenizer written by and
|
28
28
|
used by OpenAI. It can be used to count the number of tokens in text before sending
|
29
29
|
it to OpenAI APIs.
|