tiktoken_ruby 0.0.7 → 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: de2d6e8e83771f2ef51351e019e9cebc7163a1775bfc0e812da58371574b9b63
4
- data.tar.gz: fe2c629e8b435a181bfa4524655bfa137f1660061ab4f780e0edc15a11a7538d
3
+ metadata.gz: abd5db9516cf5d26ace1790e1267038910af214b3315d157e35df851530b83cb
4
+ data.tar.gz: 88541080f80f27a52c8a7eb9bd9a2baf4dc3c67af4ba152bc03150ade1c89f72
5
5
  SHA512:
6
- metadata.gz: bbf721963e873464fae055d23308068fcdac8db4e27dea28653c3fc017f0803da59b4c549a0fb6b6339f79bc4379e2913e1c47fa25f9894ebde840b23aa81edb
7
- data.tar.gz: 1275369f56a2498ce39c5a2b259efdab0684ae834c6d769bfe23d7190c69917a22d46dc7b7675cd15c8a44dcfa8f7c3be2df850035236fa33f8e55afd31db42b
6
+ metadata.gz: bdba999ff6ee22d57a993e7cbff9f1f95c22540973e7af26a58ded2e49e77c9863daedd936255a86c9722598b8fcc442d746c502d9b295f2758e52774aa8fd7c
7
+ data.tar.gz: 8917f5a08dbed662c890d0102c9d4be3d24a34d5537804b7ceb563819dd01e36d5b580d5e9f68a20464057620e8f2497558e0e3f1e0faf2afc044d2a4feec7e2
data/Cargo.lock CHANGED
@@ -4,43 +4,43 @@ version = 3
4
4
 
5
5
  [[package]]
6
6
  name = "aho-corasick"
7
- version = "0.7.20"
7
+ version = "1.1.3"
8
8
  source = "registry+https://github.com/rust-lang/crates.io-index"
9
- checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac"
9
+ checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
10
10
  dependencies = [
11
11
  "memchr",
12
12
  ]
13
13
 
14
14
  [[package]]
15
15
  name = "anyhow"
16
- version = "1.0.70"
16
+ version = "1.0.83"
17
17
  source = "registry+https://github.com/rust-lang/crates.io-index"
18
- checksum = "7de8ce5e0f9f8d88245311066a578d72b7af3e7088f32783804676302df237e4"
18
+ checksum = "25bdb32cbbdce2b519a9cd7df3a678443100e265d5e25ca763b7572a5104f5f3"
19
19
 
20
20
  [[package]]
21
21
  name = "autocfg"
22
- version = "1.1.0"
22
+ version = "1.3.0"
23
23
  source = "registry+https://github.com/rust-lang/crates.io-index"
24
- checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
24
+ checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0"
25
25
 
26
26
  [[package]]
27
27
  name = "base64"
28
- version = "0.21.0"
28
+ version = "0.21.7"
29
29
  source = "registry+https://github.com/rust-lang/crates.io-index"
30
- checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a"
30
+ checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"
31
31
 
32
32
  [[package]]
33
33
  name = "bindgen"
34
- version = "0.66.1"
34
+ version = "0.69.4"
35
35
  source = "registry+https://github.com/rust-lang/crates.io-index"
36
- checksum = "f2b84e06fc203107bfbad243f4aba2af864eb7db3b1cf46ea0a023b0b433d2a7"
36
+ checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0"
37
37
  dependencies = [
38
- "bitflags 2.4.0",
38
+ "bitflags",
39
39
  "cexpr",
40
40
  "clang-sys",
41
+ "itertools",
41
42
  "lazy_static",
42
43
  "lazycell",
43
- "peeking_take_while",
44
44
  "proc-macro2",
45
45
  "quote",
46
46
  "regex",
@@ -66,24 +66,17 @@ checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
66
66
 
67
67
  [[package]]
68
68
  name = "bitflags"
69
- version = "1.3.2"
70
- source = "registry+https://github.com/rust-lang/crates.io-index"
71
- checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
72
-
73
- [[package]]
74
- name = "bitflags"
75
- version = "2.4.0"
69
+ version = "2.5.0"
76
70
  source = "registry+https://github.com/rust-lang/crates.io-index"
77
- checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635"
71
+ checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1"
78
72
 
79
73
  [[package]]
80
74
  name = "bstr"
81
- version = "1.4.0"
75
+ version = "1.9.1"
82
76
  source = "registry+https://github.com/rust-lang/crates.io-index"
83
- checksum = "c3d4260bcc2e8fc9df1eac4919a720effeb63a3f0952f5bf4944adfa18897f09"
77
+ checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706"
84
78
  dependencies = [
85
79
  "memchr",
86
- "once_cell",
87
80
  "regex-automata",
88
81
  "serde",
89
82
  ]
@@ -105,20 +98,26 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
105
98
 
106
99
  [[package]]
107
100
  name = "clang-sys"
108
- version = "1.6.0"
101
+ version = "1.7.0"
109
102
  source = "registry+https://github.com/rust-lang/crates.io-index"
110
- checksum = "77ed9a53e5d4d9c573ae844bfac6872b159cb1d1585a83b29e7a64b7eef7332a"
103
+ checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1"
111
104
  dependencies = [
112
105
  "glob",
113
106
  "libc",
114
107
  "libloading",
115
108
  ]
116
109
 
110
+ [[package]]
111
+ name = "either"
112
+ version = "1.11.0"
113
+ source = "registry+https://github.com/rust-lang/crates.io-index"
114
+ checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2"
115
+
117
116
  [[package]]
118
117
  name = "fancy-regex"
119
- version = "0.11.0"
118
+ version = "0.12.0"
120
119
  source = "registry+https://github.com/rust-lang/crates.io-index"
121
- checksum = "b95f7c0680e4142284cf8b22c14a476e87d61b004a3a0861872b32ef7ead40a2"
120
+ checksum = "7493d4c459da9f84325ad297371a6b2b8a162800873a22e3b6b6512e61d18c05"
122
121
  dependencies = [
123
122
  "bit-set",
124
123
  "regex",
@@ -130,6 +129,15 @@ version = "0.3.1"
130
129
  source = "registry+https://github.com/rust-lang/crates.io-index"
131
130
  checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
132
131
 
132
+ [[package]]
133
+ name = "itertools"
134
+ version = "0.12.1"
135
+ source = "registry+https://github.com/rust-lang/crates.io-index"
136
+ checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
137
+ dependencies = [
138
+ "either",
139
+ ]
140
+
133
141
  [[package]]
134
142
  name = "lazy_static"
135
143
  version = "1.4.0"
@@ -144,25 +152,25 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
144
152
 
145
153
  [[package]]
146
154
  name = "libc"
147
- version = "0.2.140"
155
+ version = "0.2.154"
148
156
  source = "registry+https://github.com/rust-lang/crates.io-index"
149
- checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c"
157
+ checksum = "ae743338b92ff9146ce83992f766a31066a91a8c84a45e0e9f21e7cf6de6d346"
150
158
 
151
159
  [[package]]
152
160
  name = "libloading"
153
- version = "0.7.4"
161
+ version = "0.8.3"
154
162
  source = "registry+https://github.com/rust-lang/crates.io-index"
155
- checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f"
163
+ checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19"
156
164
  dependencies = [
157
165
  "cfg-if",
158
- "winapi",
166
+ "windows-targets",
159
167
  ]
160
168
 
161
169
  [[package]]
162
170
  name = "lock_api"
163
- version = "0.4.9"
171
+ version = "0.4.12"
164
172
  source = "registry+https://github.com/rust-lang/crates.io-index"
165
- checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df"
173
+ checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
166
174
  dependencies = [
167
175
  "autocfg",
168
176
  "scopeguard",
@@ -170,9 +178,9 @@ dependencies = [
170
178
 
171
179
  [[package]]
172
180
  name = "magnus"
173
- version = "0.6.1"
181
+ version = "0.6.4"
174
182
  source = "registry+https://github.com/rust-lang/crates.io-index"
175
- checksum = "0516897a45f8ce8270a8910bcb94cd83538b19b6ae3a0c281a765df170b64695"
183
+ checksum = "b1597ef40aa8c36be098249e82c9a20cf7199278ac1c1a1a995eeead6a184479"
176
184
  dependencies = [
177
185
  "magnus-macros",
178
186
  "rb-sys",
@@ -193,9 +201,9 @@ dependencies = [
193
201
 
194
202
  [[package]]
195
203
  name = "memchr"
196
- version = "2.5.0"
204
+ version = "2.7.2"
197
205
  source = "registry+https://github.com/rust-lang/crates.io-index"
198
- checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
206
+ checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
199
207
 
200
208
  [[package]]
201
209
  name = "minimal-lexical"
@@ -213,17 +221,11 @@ dependencies = [
213
221
  "minimal-lexical",
214
222
  ]
215
223
 
216
- [[package]]
217
- name = "once_cell"
218
- version = "1.17.1"
219
- source = "registry+https://github.com/rust-lang/crates.io-index"
220
- checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"
221
-
222
224
  [[package]]
223
225
  name = "parking_lot"
224
- version = "0.12.1"
226
+ version = "0.12.2"
225
227
  source = "registry+https://github.com/rust-lang/crates.io-index"
226
- checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
228
+ checksum = "7e4af0ca4f6caed20e900d564c242b8e5d4903fdacf31d3daf527b66fe6f42fb"
227
229
  dependencies = [
228
230
  "lock_api",
229
231
  "parking_lot_core",
@@ -231,55 +233,49 @@ dependencies = [
231
233
 
232
234
  [[package]]
233
235
  name = "parking_lot_core"
234
- version = "0.9.7"
236
+ version = "0.9.10"
235
237
  source = "registry+https://github.com/rust-lang/crates.io-index"
236
- checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521"
238
+ checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8"
237
239
  dependencies = [
238
240
  "cfg-if",
239
241
  "libc",
240
242
  "redox_syscall",
241
243
  "smallvec",
242
- "windows-sys",
244
+ "windows-targets",
243
245
  ]
244
246
 
245
- [[package]]
246
- name = "peeking_take_while"
247
- version = "0.1.2"
248
- source = "registry+https://github.com/rust-lang/crates.io-index"
249
- checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
250
-
251
247
  [[package]]
252
248
  name = "proc-macro2"
253
- version = "1.0.66"
249
+ version = "1.0.82"
254
250
  source = "registry+https://github.com/rust-lang/crates.io-index"
255
- checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
251
+ checksum = "8ad3d49ab951a01fbaafe34f2ec74122942fe18a3f9814c3268f1bb72042131b"
256
252
  dependencies = [
257
253
  "unicode-ident",
258
254
  ]
259
255
 
260
256
  [[package]]
261
257
  name = "quote"
262
- version = "1.0.33"
258
+ version = "1.0.36"
263
259
  source = "registry+https://github.com/rust-lang/crates.io-index"
264
- checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
260
+ checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
265
261
  dependencies = [
266
262
  "proc-macro2",
267
263
  ]
268
264
 
269
265
  [[package]]
270
266
  name = "rb-sys"
271
- version = "0.9.81"
267
+ version = "0.9.97"
272
268
  source = "registry+https://github.com/rust-lang/crates.io-index"
273
- checksum = "a57240b308b155b09dce81e32829966a99f52d1088b45957e4283e526c5317a1"
269
+ checksum = "47d30bcad206b51f2f66121190ca678dce1fdf3a2eae0ac5d838d1818b19bdf5"
274
270
  dependencies = [
275
271
  "rb-sys-build",
276
272
  ]
277
273
 
278
274
  [[package]]
279
275
  name = "rb-sys-build"
280
- version = "0.9.81"
276
+ version = "0.9.97"
281
277
  source = "registry+https://github.com/rust-lang/crates.io-index"
282
- checksum = "f24ce877a4c5d07f06f6aa6fec3ac95e4b357b9f73b0f5445d8cbb7266d410e8"
278
+ checksum = "3cbd92f281615f3c2dcb9dcb0f0576624752afbf9a7f99173b37c4b55b62dd8a"
283
279
  dependencies = [
284
280
  "bindgen",
285
281
  "lazy_static",
@@ -298,35 +294,41 @@ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
298
294
 
299
295
  [[package]]
300
296
  name = "redox_syscall"
301
- version = "0.2.16"
297
+ version = "0.5.1"
302
298
  source = "registry+https://github.com/rust-lang/crates.io-index"
303
- checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
299
+ checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e"
304
300
  dependencies = [
305
- "bitflags 1.3.2",
301
+ "bitflags",
306
302
  ]
307
303
 
308
304
  [[package]]
309
305
  name = "regex"
310
- version = "1.7.1"
306
+ version = "1.10.4"
311
307
  source = "registry+https://github.com/rust-lang/crates.io-index"
312
- checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733"
308
+ checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c"
313
309
  dependencies = [
314
310
  "aho-corasick",
315
311
  "memchr",
312
+ "regex-automata",
316
313
  "regex-syntax",
317
314
  ]
318
315
 
319
316
  [[package]]
320
317
  name = "regex-automata"
321
- version = "0.1.10"
318
+ version = "0.4.6"
322
319
  source = "registry+https://github.com/rust-lang/crates.io-index"
323
- checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
320
+ checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
321
+ dependencies = [
322
+ "aho-corasick",
323
+ "memchr",
324
+ "regex-syntax",
325
+ ]
324
326
 
325
327
  [[package]]
326
328
  name = "regex-syntax"
327
- version = "0.6.28"
329
+ version = "0.8.3"
328
330
  source = "registry+https://github.com/rust-lang/crates.io-index"
329
- checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848"
331
+ checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56"
330
332
 
331
333
  [[package]]
332
334
  name = "rustc-hash"
@@ -336,9 +338,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
336
338
 
337
339
  [[package]]
338
340
  name = "scopeguard"
339
- version = "1.1.0"
341
+ version = "1.2.0"
340
342
  source = "registry+https://github.com/rust-lang/crates.io-index"
341
- checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
343
+ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
342
344
 
343
345
  [[package]]
344
346
  name = "seq-macro"
@@ -348,9 +350,23 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
348
350
 
349
351
  [[package]]
350
352
  name = "serde"
351
- version = "1.0.157"
353
+ version = "1.0.202"
352
354
  source = "registry+https://github.com/rust-lang/crates.io-index"
353
- checksum = "707de5fcf5df2b5788fca98dd7eab490bc2fd9b7ef1404defc462833b83f25ca"
355
+ checksum = "226b61a0d411b2ba5ff6d7f73a476ac4f8bb900373459cd00fab8512828ba395"
356
+ dependencies = [
357
+ "serde_derive",
358
+ ]
359
+
360
+ [[package]]
361
+ name = "serde_derive"
362
+ version = "1.0.202"
363
+ source = "registry+https://github.com/rust-lang/crates.io-index"
364
+ checksum = "6048858004bcff69094cd972ed40a32500f153bd3be9f716b2eed2e8217c4838"
365
+ dependencies = [
366
+ "proc-macro2",
367
+ "quote",
368
+ "syn",
369
+ ]
354
370
 
355
371
  [[package]]
356
372
  name = "shell-words"
@@ -360,21 +376,21 @@ checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde"
360
376
 
361
377
  [[package]]
362
378
  name = "shlex"
363
- version = "1.1.0"
379
+ version = "1.3.0"
364
380
  source = "registry+https://github.com/rust-lang/crates.io-index"
365
- checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"
381
+ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
366
382
 
367
383
  [[package]]
368
384
  name = "smallvec"
369
- version = "1.10.0"
385
+ version = "1.13.2"
370
386
  source = "registry+https://github.com/rust-lang/crates.io-index"
371
- checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
387
+ checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
372
388
 
373
389
  [[package]]
374
390
  name = "syn"
375
- version = "2.0.31"
391
+ version = "2.0.63"
376
392
  source = "registry+https://github.com/rust-lang/crates.io-index"
377
- checksum = "718fa2415bcb8d8bd775917a1bf12a7931b6dfa890753378538118181e0cb398"
393
+ checksum = "bf5be731623ca1a1fb7d8be6f261a3be6d3e2337b8a1f97be944d020c8fcb704"
378
394
  dependencies = [
379
395
  "proc-macro2",
380
396
  "quote",
@@ -383,8 +399,9 @@ dependencies = [
383
399
 
384
400
  [[package]]
385
401
  name = "tiktoken-rs"
386
- version = "0.3.2"
387
- source = "git+https://github.com/IAPark/tiktoken-rs.git#5231fbf4a91d9221a713522e755445d0dde341fa"
402
+ version = "0.5.9"
403
+ source = "registry+https://github.com/rust-lang/crates.io-index"
404
+ checksum = "c314e7ce51440f9e8f5a497394682a57b7c323d0f4d0a6b1b13c429056e0e234"
388
405
  dependencies = [
389
406
  "anyhow",
390
407
  "base64",
@@ -406,50 +423,20 @@ dependencies = [
406
423
 
407
424
  [[package]]
408
425
  name = "unicode-ident"
409
- version = "1.0.8"
410
- source = "registry+https://github.com/rust-lang/crates.io-index"
411
- checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4"
412
-
413
- [[package]]
414
- name = "winapi"
415
- version = "0.3.9"
426
+ version = "1.0.12"
416
427
  source = "registry+https://github.com/rust-lang/crates.io-index"
417
- checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
418
- dependencies = [
419
- "winapi-i686-pc-windows-gnu",
420
- "winapi-x86_64-pc-windows-gnu",
421
- ]
422
-
423
- [[package]]
424
- name = "winapi-i686-pc-windows-gnu"
425
- version = "0.4.0"
426
- source = "registry+https://github.com/rust-lang/crates.io-index"
427
- checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
428
-
429
- [[package]]
430
- name = "winapi-x86_64-pc-windows-gnu"
431
- version = "0.4.0"
432
- source = "registry+https://github.com/rust-lang/crates.io-index"
433
- checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
434
-
435
- [[package]]
436
- name = "windows-sys"
437
- version = "0.45.0"
438
- source = "registry+https://github.com/rust-lang/crates.io-index"
439
- checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
440
- dependencies = [
441
- "windows-targets",
442
- ]
428
+ checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
443
429
 
444
430
  [[package]]
445
431
  name = "windows-targets"
446
- version = "0.42.2"
432
+ version = "0.52.5"
447
433
  source = "registry+https://github.com/rust-lang/crates.io-index"
448
- checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071"
434
+ checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb"
449
435
  dependencies = [
450
436
  "windows_aarch64_gnullvm",
451
437
  "windows_aarch64_msvc",
452
438
  "windows_i686_gnu",
439
+ "windows_i686_gnullvm",
453
440
  "windows_i686_msvc",
454
441
  "windows_x86_64_gnu",
455
442
  "windows_x86_64_gnullvm",
@@ -458,42 +445,48 @@ dependencies = [
458
445
 
459
446
  [[package]]
460
447
  name = "windows_aarch64_gnullvm"
461
- version = "0.42.2"
448
+ version = "0.52.5"
462
449
  source = "registry+https://github.com/rust-lang/crates.io-index"
463
- checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8"
450
+ checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263"
464
451
 
465
452
  [[package]]
466
453
  name = "windows_aarch64_msvc"
467
- version = "0.42.2"
454
+ version = "0.52.5"
468
455
  source = "registry+https://github.com/rust-lang/crates.io-index"
469
- checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43"
456
+ checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6"
470
457
 
471
458
  [[package]]
472
459
  name = "windows_i686_gnu"
473
- version = "0.42.2"
460
+ version = "0.52.5"
461
+ source = "registry+https://github.com/rust-lang/crates.io-index"
462
+ checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670"
463
+
464
+ [[package]]
465
+ name = "windows_i686_gnullvm"
466
+ version = "0.52.5"
474
467
  source = "registry+https://github.com/rust-lang/crates.io-index"
475
- checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f"
468
+ checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9"
476
469
 
477
470
  [[package]]
478
471
  name = "windows_i686_msvc"
479
- version = "0.42.2"
472
+ version = "0.52.5"
480
473
  source = "registry+https://github.com/rust-lang/crates.io-index"
481
- checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060"
474
+ checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf"
482
475
 
483
476
  [[package]]
484
477
  name = "windows_x86_64_gnu"
485
- version = "0.42.2"
478
+ version = "0.52.5"
486
479
  source = "registry+https://github.com/rust-lang/crates.io-index"
487
- checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36"
480
+ checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9"
488
481
 
489
482
  [[package]]
490
483
  name = "windows_x86_64_gnullvm"
491
- version = "0.42.2"
484
+ version = "0.52.5"
492
485
  source = "registry+https://github.com/rust-lang/crates.io-index"
493
- checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3"
486
+ checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596"
494
487
 
495
488
  [[package]]
496
489
  name = "windows_x86_64_msvc"
497
- version = "0.42.2"
490
+ version = "0.52.5"
498
491
  source = "registry+https://github.com/rust-lang/crates.io-index"
499
- checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"
492
+ checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0"
data/Gemfile.lock CHANGED
@@ -1,8 +1,8 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- tiktoken_ruby (0.0.7)
5
- rb_sys (>= 0.9.86)
4
+ tiktoken_ruby (0.0.9)
5
+ rb_sys (= 0.9.87)
6
6
 
7
7
  GEM
8
8
  remote: https://rubygems.org/
@@ -22,7 +22,7 @@ GEM
22
22
  rake (13.1.0)
23
23
  rake-compiler (1.2.5)
24
24
  rake
25
- rb_sys (0.9.86)
25
+ rb_sys (0.9.87)
26
26
  regexp_parser (2.9.0)
27
27
  rexml (3.2.6)
28
28
  rspec (3.12.0)
@@ -89,4 +89,4 @@ DEPENDENCIES
89
89
  yard-doctest
90
90
 
91
91
  BUNDLED WITH
92
- 2.4.6
92
+ 2.4.4
data/README.md CHANGED
@@ -1,8 +1,14 @@
1
1
  [![Gem Version](https://badge.fury.io/rb/tiktoken_ruby.svg)](https://badge.fury.io/rb/tiktoken_ruby)
2
+
2
3
  # tiktoken_ruby
3
4
 
4
5
  [Tiktoken](https://github.com/openai/tiktoken) is BPE tokenizer from OpenAI used with their GPT models.
5
- This is a wrapper around it aimed primarily at enabling accurate counts of GPT model tokens used.
6
+ This is a wrapper around it aimed primarily at enabling accurate counts of GPT model tokens used.
7
+
8
+ ## Request for maintainers
9
+
10
+ I can't really put substantial time into maintaining this. Probably nothing more than a couple hours every few months. If you have experience maintaining ruby gems and would like to
11
+ lend a hand please send me an email or reply to this [issue](https://github.com/IAPark/tiktoken_ruby/issues/26)
6
12
 
7
13
  ## Installation
8
14
 
@@ -15,17 +21,19 @@ If bundler is not being used to manage dependencies, install the gem by executin
15
21
  $ gem install tiktoken_ruby
16
22
 
17
23
  ## Usage
24
+
18
25
  Usage should be very similar to the python library. Here's a simple example
19
26
 
20
27
  Encode and decode text
28
+
21
29
  ```ruby
22
30
  require 'tiktoken_ruby'
23
-
24
31
  enc = Tiktoken.get_encoding("cl100k_base")
25
32
  enc.decode(enc.encode("hello world")) #=> "hello world"
26
33
  ```
27
34
 
28
35
  Encoders can also be retrieved by model name
36
+
29
37
  ```ruby
30
38
  require 'tiktoken_ruby'
31
39
 
@@ -53,7 +61,6 @@ bundle exec rake compile
53
61
  bundle exec rake spec
54
62
  ```
55
63
 
56
-
57
64
  ## License
58
65
 
59
66
  The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -11,5 +11,5 @@ crate-type = ["cdylib"]
11
11
 
12
12
  [dependencies]
13
13
  magnus = { version = "0.6.1" }
14
- rb-sys = { version = "*", features = ["stable-api-compiled-fallback"] }
15
- tiktoken-rs = { git = "https://github.com/IAPark/tiktoken-rs.git" }
14
+ rb-sys = { version = "0.9.87", features = ["stable-api-compiled-fallback"] }
15
+ tiktoken-rs = { version = "0.5.9" }
@@ -20,6 +20,11 @@ fn cl100k_base() -> CoreBPEWrapper {
20
20
  CoreBPEWrapper::new(core_bpe)
21
21
  }
22
22
 
23
+ fn o200k_base() -> CoreBPEWrapper {
24
+ let core_bpe = tiktoken_rs::o200k_base().unwrap();
25
+ CoreBPEWrapper::new(core_bpe)
26
+ }
27
+
23
28
  fn module() -> Result<RModule, magnus::Error> {
24
29
  define_module("Tiktoken")
25
30
  }
@@ -37,6 +42,7 @@ fn init() -> Result<(), Error> {
37
42
  factory_module.define_singleton_method("p50k_base", function!(p50k_base, 0))?;
38
43
  factory_module.define_singleton_method("p50k_edit", function!(p50k_edit, 0))?;
39
44
  factory_module.define_singleton_method("cl100k_base", function!(cl100k_base, 0))?;
45
+ factory_module.define_singleton_method("o200k_base", function!(o200k_base, 0))?;
40
46
 
41
47
  let ext_module = module.define_module("Ext")?;
42
48
  let bpe_class = ext_module.define_class("CoreBPE", class::object())?;
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class Tiktoken::Encoding
4
+ CACHE_MUTEX = Mutex.new
5
+
4
6
  attr_reader :name
5
7
 
6
8
  # This returns a new Tiktoken::Encoding instance for the requested encoding
@@ -15,8 +17,10 @@ class Tiktoken::Encoding
15
17
  # @param encoding [Symbol] The name of the encoding to load
16
18
  # @return [Tiktoken::Encoding] The encoding instance
17
19
  def self.for_name_cached(encoding)
18
- @encodings ||= {}
19
- @encodings[encoding.to_sym] ||= Tiktoken::Encoding.for_name(encoding)
20
+ CACHE_MUTEX.synchronize do
21
+ @encodings ||= {}
22
+ @encodings[encoding.to_sym] ||= Tiktoken::Encoding.for_name(encoding)
23
+ end
20
24
  end
21
25
 
22
26
  # Encodes the text as a list of integer tokens. This encoding will encode special non text tokens
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Tiktoken
4
- VERSION = "0.0.7"
4
+ VERSION = "0.0.9"
5
5
  end
data/lib/tiktoken_ruby.rb CHANGED
@@ -28,7 +28,7 @@ module Tiktoken
28
28
 
29
29
  # Gets the encoding for an OpenAI model
30
30
  # @param model_name [Symbol|String] The name of the model to get the encoding for
31
- # @return [Tiktoken::Encoding] The encoding instance
31
+ # @return [Tiktoken::Encoding, nil] The encoding instance, or nil if no encoding is found
32
32
  # @example Count tokens for text
33
33
  # enc = Tiktoken.encoding_for_model("gpt-4")
34
34
  # enc.encode("hello world").length #=> 2
@@ -37,10 +37,12 @@ module Tiktoken
37
37
  return get_encoding(MODEL_TO_ENCODING_NAME[model_name.to_sym])
38
38
  end
39
39
 
40
- MODEL_PREFIX_TO_ENCODING.each do |prefix, encoding|
41
- if model_name.start_with?(prefix.to_s)
42
- return get_encoding(encoding)
43
- end
40
+ _prefix, encoding = MODEL_PREFIX_TO_ENCODING.find do |prefix, _encoding|
41
+ model_name.start_with?(prefix.to_s)
42
+ end
43
+
44
+ if encoding
45
+ get_encoding(encoding)
44
46
  end
45
47
  end
46
48
 
@@ -62,13 +64,15 @@ module Tiktoken
62
64
  :r50k_base,
63
65
  :p50k_base,
64
66
  :p50k_edit,
65
- :cl100k_base
67
+ :cl100k_base,
68
+ :o200k_base
66
69
  ]
67
70
 
68
71
  # taken from the python library here https://github.com/openai/tiktoken/blob/main/tiktoken/model.py
69
72
  # that is also MIT licensed but by OpenAI
70
73
  MODEL_TO_ENCODING_NAME = {
71
74
  # chat
75
+ "gpt-4o": "o200k_base",
72
76
  "gpt-4": "cl100k_base",
73
77
  "gpt-3.5-turbo": "cl100k_base",
74
78
  "gpt-35-turbo": "cl100k_base", # Azure deployment name
@@ -118,6 +122,7 @@ module Tiktoken
118
122
 
119
123
  MODEL_PREFIX_TO_ENCODING = {
120
124
  # chat
125
+ "gpt-4o-": "o200k_base", # e.g., gpt-4o-2024-05-13, etc.
121
126
  "gpt-4-": "cl100k_base", # e.g., gpt-4-0314, etc., plus gpt-4-32k
122
127
  "gpt-3.5-turbo-": "cl100k_base", # e.g, gpt-3.5-turbo-0301, -0401, etc.
123
128
  "gpt-35-turbo-": "cl100k_base", # Azure deployment name
@@ -29,5 +29,5 @@ Gem::Specification.new do |spec|
29
29
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
30
30
  spec.require_paths = ["lib"]
31
31
  spec.extensions = ["ext/tiktoken_ruby/extconf.rb"]
32
- spec.add_dependency "rb_sys", ">= 0.9.86"
32
+ spec.add_dependency "rb_sys", "= 0.9.87"
33
33
  end
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tiktoken_ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - IAPark
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-02-12 00:00:00.000000000 Z
11
+ date: 2024-05-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - '='
18
18
  - !ruby/object:Gem::Version
19
- version: 0.9.86
19
+ version: 0.9.87
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - '='
25
25
  - !ruby/object:Gem::Version
26
- version: 0.9.86
26
+ version: 0.9.87
27
27
  description: An unofficial Ruby wrapper for Tiktoken, a BPE tokenizer written by and
28
28
  used by OpenAI. It can be used to count the number of tokens in text before sending
29
29
  it to OpenAI APIs.