tiktoken_ruby 0.0.7 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: de2d6e8e83771f2ef51351e019e9cebc7163a1775bfc0e812da58371574b9b63
4
- data.tar.gz: fe2c629e8b435a181bfa4524655bfa137f1660061ab4f780e0edc15a11a7538d
3
+ metadata.gz: abd5db9516cf5d26ace1790e1267038910af214b3315d157e35df851530b83cb
4
+ data.tar.gz: 88541080f80f27a52c8a7eb9bd9a2baf4dc3c67af4ba152bc03150ade1c89f72
5
5
  SHA512:
6
- metadata.gz: bbf721963e873464fae055d23308068fcdac8db4e27dea28653c3fc017f0803da59b4c549a0fb6b6339f79bc4379e2913e1c47fa25f9894ebde840b23aa81edb
7
- data.tar.gz: 1275369f56a2498ce39c5a2b259efdab0684ae834c6d769bfe23d7190c69917a22d46dc7b7675cd15c8a44dcfa8f7c3be2df850035236fa33f8e55afd31db42b
6
+ metadata.gz: bdba999ff6ee22d57a993e7cbff9f1f95c22540973e7af26a58ded2e49e77c9863daedd936255a86c9722598b8fcc442d746c502d9b295f2758e52774aa8fd7c
7
+ data.tar.gz: 8917f5a08dbed662c890d0102c9d4be3d24a34d5537804b7ceb563819dd01e36d5b580d5e9f68a20464057620e8f2497558e0e3f1e0faf2afc044d2a4feec7e2
data/Cargo.lock CHANGED
@@ -4,43 +4,43 @@ version = 3
4
4
 
5
5
  [[package]]
6
6
  name = "aho-corasick"
7
- version = "0.7.20"
7
+ version = "1.1.3"
8
8
  source = "registry+https://github.com/rust-lang/crates.io-index"
9
- checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac"
9
+ checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
10
10
  dependencies = [
11
11
  "memchr",
12
12
  ]
13
13
 
14
14
  [[package]]
15
15
  name = "anyhow"
16
- version = "1.0.70"
16
+ version = "1.0.83"
17
17
  source = "registry+https://github.com/rust-lang/crates.io-index"
18
- checksum = "7de8ce5e0f9f8d88245311066a578d72b7af3e7088f32783804676302df237e4"
18
+ checksum = "25bdb32cbbdce2b519a9cd7df3a678443100e265d5e25ca763b7572a5104f5f3"
19
19
 
20
20
  [[package]]
21
21
  name = "autocfg"
22
- version = "1.1.0"
22
+ version = "1.3.0"
23
23
  source = "registry+https://github.com/rust-lang/crates.io-index"
24
- checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
24
+ checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0"
25
25
 
26
26
  [[package]]
27
27
  name = "base64"
28
- version = "0.21.0"
28
+ version = "0.21.7"
29
29
  source = "registry+https://github.com/rust-lang/crates.io-index"
30
- checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a"
30
+ checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"
31
31
 
32
32
  [[package]]
33
33
  name = "bindgen"
34
- version = "0.66.1"
34
+ version = "0.69.4"
35
35
  source = "registry+https://github.com/rust-lang/crates.io-index"
36
- checksum = "f2b84e06fc203107bfbad243f4aba2af864eb7db3b1cf46ea0a023b0b433d2a7"
36
+ checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0"
37
37
  dependencies = [
38
- "bitflags 2.4.0",
38
+ "bitflags",
39
39
  "cexpr",
40
40
  "clang-sys",
41
+ "itertools",
41
42
  "lazy_static",
42
43
  "lazycell",
43
- "peeking_take_while",
44
44
  "proc-macro2",
45
45
  "quote",
46
46
  "regex",
@@ -66,24 +66,17 @@ checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
66
66
 
67
67
  [[package]]
68
68
  name = "bitflags"
69
- version = "1.3.2"
70
- source = "registry+https://github.com/rust-lang/crates.io-index"
71
- checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
72
-
73
- [[package]]
74
- name = "bitflags"
75
- version = "2.4.0"
69
+ version = "2.5.0"
76
70
  source = "registry+https://github.com/rust-lang/crates.io-index"
77
- checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635"
71
+ checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1"
78
72
 
79
73
  [[package]]
80
74
  name = "bstr"
81
- version = "1.4.0"
75
+ version = "1.9.1"
82
76
  source = "registry+https://github.com/rust-lang/crates.io-index"
83
- checksum = "c3d4260bcc2e8fc9df1eac4919a720effeb63a3f0952f5bf4944adfa18897f09"
77
+ checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706"
84
78
  dependencies = [
85
79
  "memchr",
86
- "once_cell",
87
80
  "regex-automata",
88
81
  "serde",
89
82
  ]
@@ -105,20 +98,26 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
105
98
 
106
99
  [[package]]
107
100
  name = "clang-sys"
108
- version = "1.6.0"
101
+ version = "1.7.0"
109
102
  source = "registry+https://github.com/rust-lang/crates.io-index"
110
- checksum = "77ed9a53e5d4d9c573ae844bfac6872b159cb1d1585a83b29e7a64b7eef7332a"
103
+ checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1"
111
104
  dependencies = [
112
105
  "glob",
113
106
  "libc",
114
107
  "libloading",
115
108
  ]
116
109
 
110
+ [[package]]
111
+ name = "either"
112
+ version = "1.11.0"
113
+ source = "registry+https://github.com/rust-lang/crates.io-index"
114
+ checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2"
115
+
117
116
  [[package]]
118
117
  name = "fancy-regex"
119
- version = "0.11.0"
118
+ version = "0.12.0"
120
119
  source = "registry+https://github.com/rust-lang/crates.io-index"
121
- checksum = "b95f7c0680e4142284cf8b22c14a476e87d61b004a3a0861872b32ef7ead40a2"
120
+ checksum = "7493d4c459da9f84325ad297371a6b2b8a162800873a22e3b6b6512e61d18c05"
122
121
  dependencies = [
123
122
  "bit-set",
124
123
  "regex",
@@ -130,6 +129,15 @@ version = "0.3.1"
130
129
  source = "registry+https://github.com/rust-lang/crates.io-index"
131
130
  checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
132
131
 
132
+ [[package]]
133
+ name = "itertools"
134
+ version = "0.12.1"
135
+ source = "registry+https://github.com/rust-lang/crates.io-index"
136
+ checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
137
+ dependencies = [
138
+ "either",
139
+ ]
140
+
133
141
  [[package]]
134
142
  name = "lazy_static"
135
143
  version = "1.4.0"
@@ -144,25 +152,25 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
144
152
 
145
153
  [[package]]
146
154
  name = "libc"
147
- version = "0.2.140"
155
+ version = "0.2.154"
148
156
  source = "registry+https://github.com/rust-lang/crates.io-index"
149
- checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c"
157
+ checksum = "ae743338b92ff9146ce83992f766a31066a91a8c84a45e0e9f21e7cf6de6d346"
150
158
 
151
159
  [[package]]
152
160
  name = "libloading"
153
- version = "0.7.4"
161
+ version = "0.8.3"
154
162
  source = "registry+https://github.com/rust-lang/crates.io-index"
155
- checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f"
163
+ checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19"
156
164
  dependencies = [
157
165
  "cfg-if",
158
- "winapi",
166
+ "windows-targets",
159
167
  ]
160
168
 
161
169
  [[package]]
162
170
  name = "lock_api"
163
- version = "0.4.9"
171
+ version = "0.4.12"
164
172
  source = "registry+https://github.com/rust-lang/crates.io-index"
165
- checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df"
173
+ checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
166
174
  dependencies = [
167
175
  "autocfg",
168
176
  "scopeguard",
@@ -170,9 +178,9 @@ dependencies = [
170
178
 
171
179
  [[package]]
172
180
  name = "magnus"
173
- version = "0.6.1"
181
+ version = "0.6.4"
174
182
  source = "registry+https://github.com/rust-lang/crates.io-index"
175
- checksum = "0516897a45f8ce8270a8910bcb94cd83538b19b6ae3a0c281a765df170b64695"
183
+ checksum = "b1597ef40aa8c36be098249e82c9a20cf7199278ac1c1a1a995eeead6a184479"
176
184
  dependencies = [
177
185
  "magnus-macros",
178
186
  "rb-sys",
@@ -193,9 +201,9 @@ dependencies = [
193
201
 
194
202
  [[package]]
195
203
  name = "memchr"
196
- version = "2.5.0"
204
+ version = "2.7.2"
197
205
  source = "registry+https://github.com/rust-lang/crates.io-index"
198
- checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
206
+ checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
199
207
 
200
208
  [[package]]
201
209
  name = "minimal-lexical"
@@ -213,17 +221,11 @@ dependencies = [
213
221
  "minimal-lexical",
214
222
  ]
215
223
 
216
- [[package]]
217
- name = "once_cell"
218
- version = "1.17.1"
219
- source = "registry+https://github.com/rust-lang/crates.io-index"
220
- checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"
221
-
222
224
  [[package]]
223
225
  name = "parking_lot"
224
- version = "0.12.1"
226
+ version = "0.12.2"
225
227
  source = "registry+https://github.com/rust-lang/crates.io-index"
226
- checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
228
+ checksum = "7e4af0ca4f6caed20e900d564c242b8e5d4903fdacf31d3daf527b66fe6f42fb"
227
229
  dependencies = [
228
230
  "lock_api",
229
231
  "parking_lot_core",
@@ -231,55 +233,49 @@ dependencies = [
231
233
 
232
234
  [[package]]
233
235
  name = "parking_lot_core"
234
- version = "0.9.7"
236
+ version = "0.9.10"
235
237
  source = "registry+https://github.com/rust-lang/crates.io-index"
236
- checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521"
238
+ checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8"
237
239
  dependencies = [
238
240
  "cfg-if",
239
241
  "libc",
240
242
  "redox_syscall",
241
243
  "smallvec",
242
- "windows-sys",
244
+ "windows-targets",
243
245
  ]
244
246
 
245
- [[package]]
246
- name = "peeking_take_while"
247
- version = "0.1.2"
248
- source = "registry+https://github.com/rust-lang/crates.io-index"
249
- checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
250
-
251
247
  [[package]]
252
248
  name = "proc-macro2"
253
- version = "1.0.66"
249
+ version = "1.0.82"
254
250
  source = "registry+https://github.com/rust-lang/crates.io-index"
255
- checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
251
+ checksum = "8ad3d49ab951a01fbaafe34f2ec74122942fe18a3f9814c3268f1bb72042131b"
256
252
  dependencies = [
257
253
  "unicode-ident",
258
254
  ]
259
255
 
260
256
  [[package]]
261
257
  name = "quote"
262
- version = "1.0.33"
258
+ version = "1.0.36"
263
259
  source = "registry+https://github.com/rust-lang/crates.io-index"
264
- checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
260
+ checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
265
261
  dependencies = [
266
262
  "proc-macro2",
267
263
  ]
268
264
 
269
265
  [[package]]
270
266
  name = "rb-sys"
271
- version = "0.9.81"
267
+ version = "0.9.97"
272
268
  source = "registry+https://github.com/rust-lang/crates.io-index"
273
- checksum = "a57240b308b155b09dce81e32829966a99f52d1088b45957e4283e526c5317a1"
269
+ checksum = "47d30bcad206b51f2f66121190ca678dce1fdf3a2eae0ac5d838d1818b19bdf5"
274
270
  dependencies = [
275
271
  "rb-sys-build",
276
272
  ]
277
273
 
278
274
  [[package]]
279
275
  name = "rb-sys-build"
280
- version = "0.9.81"
276
+ version = "0.9.97"
281
277
  source = "registry+https://github.com/rust-lang/crates.io-index"
282
- checksum = "f24ce877a4c5d07f06f6aa6fec3ac95e4b357b9f73b0f5445d8cbb7266d410e8"
278
+ checksum = "3cbd92f281615f3c2dcb9dcb0f0576624752afbf9a7f99173b37c4b55b62dd8a"
283
279
  dependencies = [
284
280
  "bindgen",
285
281
  "lazy_static",
@@ -298,35 +294,41 @@ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
298
294
 
299
295
  [[package]]
300
296
  name = "redox_syscall"
301
- version = "0.2.16"
297
+ version = "0.5.1"
302
298
  source = "registry+https://github.com/rust-lang/crates.io-index"
303
- checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
299
+ checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e"
304
300
  dependencies = [
305
- "bitflags 1.3.2",
301
+ "bitflags",
306
302
  ]
307
303
 
308
304
  [[package]]
309
305
  name = "regex"
310
- version = "1.7.1"
306
+ version = "1.10.4"
311
307
  source = "registry+https://github.com/rust-lang/crates.io-index"
312
- checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733"
308
+ checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c"
313
309
  dependencies = [
314
310
  "aho-corasick",
315
311
  "memchr",
312
+ "regex-automata",
316
313
  "regex-syntax",
317
314
  ]
318
315
 
319
316
  [[package]]
320
317
  name = "regex-automata"
321
- version = "0.1.10"
318
+ version = "0.4.6"
322
319
  source = "registry+https://github.com/rust-lang/crates.io-index"
323
- checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
320
+ checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
321
+ dependencies = [
322
+ "aho-corasick",
323
+ "memchr",
324
+ "regex-syntax",
325
+ ]
324
326
 
325
327
  [[package]]
326
328
  name = "regex-syntax"
327
- version = "0.6.28"
329
+ version = "0.8.3"
328
330
  source = "registry+https://github.com/rust-lang/crates.io-index"
329
- checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848"
331
+ checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56"
330
332
 
331
333
  [[package]]
332
334
  name = "rustc-hash"
@@ -336,9 +338,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
336
338
 
337
339
  [[package]]
338
340
  name = "scopeguard"
339
- version = "1.1.0"
341
+ version = "1.2.0"
340
342
  source = "registry+https://github.com/rust-lang/crates.io-index"
341
- checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
343
+ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
342
344
 
343
345
  [[package]]
344
346
  name = "seq-macro"
@@ -348,9 +350,23 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
348
350
 
349
351
  [[package]]
350
352
  name = "serde"
351
- version = "1.0.157"
353
+ version = "1.0.202"
352
354
  source = "registry+https://github.com/rust-lang/crates.io-index"
353
- checksum = "707de5fcf5df2b5788fca98dd7eab490bc2fd9b7ef1404defc462833b83f25ca"
355
+ checksum = "226b61a0d411b2ba5ff6d7f73a476ac4f8bb900373459cd00fab8512828ba395"
356
+ dependencies = [
357
+ "serde_derive",
358
+ ]
359
+
360
+ [[package]]
361
+ name = "serde_derive"
362
+ version = "1.0.202"
363
+ source = "registry+https://github.com/rust-lang/crates.io-index"
364
+ checksum = "6048858004bcff69094cd972ed40a32500f153bd3be9f716b2eed2e8217c4838"
365
+ dependencies = [
366
+ "proc-macro2",
367
+ "quote",
368
+ "syn",
369
+ ]
354
370
 
355
371
  [[package]]
356
372
  name = "shell-words"
@@ -360,21 +376,21 @@ checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde"
360
376
 
361
377
  [[package]]
362
378
  name = "shlex"
363
- version = "1.1.0"
379
+ version = "1.3.0"
364
380
  source = "registry+https://github.com/rust-lang/crates.io-index"
365
- checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"
381
+ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
366
382
 
367
383
  [[package]]
368
384
  name = "smallvec"
369
- version = "1.10.0"
385
+ version = "1.13.2"
370
386
  source = "registry+https://github.com/rust-lang/crates.io-index"
371
- checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
387
+ checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
372
388
 
373
389
  [[package]]
374
390
  name = "syn"
375
- version = "2.0.31"
391
+ version = "2.0.63"
376
392
  source = "registry+https://github.com/rust-lang/crates.io-index"
377
- checksum = "718fa2415bcb8d8bd775917a1bf12a7931b6dfa890753378538118181e0cb398"
393
+ checksum = "bf5be731623ca1a1fb7d8be6f261a3be6d3e2337b8a1f97be944d020c8fcb704"
378
394
  dependencies = [
379
395
  "proc-macro2",
380
396
  "quote",
@@ -383,8 +399,9 @@ dependencies = [
383
399
 
384
400
  [[package]]
385
401
  name = "tiktoken-rs"
386
- version = "0.3.2"
387
- source = "git+https://github.com/IAPark/tiktoken-rs.git#5231fbf4a91d9221a713522e755445d0dde341fa"
402
+ version = "0.5.9"
403
+ source = "registry+https://github.com/rust-lang/crates.io-index"
404
+ checksum = "c314e7ce51440f9e8f5a497394682a57b7c323d0f4d0a6b1b13c429056e0e234"
388
405
  dependencies = [
389
406
  "anyhow",
390
407
  "base64",
@@ -406,50 +423,20 @@ dependencies = [
406
423
 
407
424
  [[package]]
408
425
  name = "unicode-ident"
409
- version = "1.0.8"
410
- source = "registry+https://github.com/rust-lang/crates.io-index"
411
- checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4"
412
-
413
- [[package]]
414
- name = "winapi"
415
- version = "0.3.9"
426
+ version = "1.0.12"
416
427
  source = "registry+https://github.com/rust-lang/crates.io-index"
417
- checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
418
- dependencies = [
419
- "winapi-i686-pc-windows-gnu",
420
- "winapi-x86_64-pc-windows-gnu",
421
- ]
422
-
423
- [[package]]
424
- name = "winapi-i686-pc-windows-gnu"
425
- version = "0.4.0"
426
- source = "registry+https://github.com/rust-lang/crates.io-index"
427
- checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
428
-
429
- [[package]]
430
- name = "winapi-x86_64-pc-windows-gnu"
431
- version = "0.4.0"
432
- source = "registry+https://github.com/rust-lang/crates.io-index"
433
- checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
434
-
435
- [[package]]
436
- name = "windows-sys"
437
- version = "0.45.0"
438
- source = "registry+https://github.com/rust-lang/crates.io-index"
439
- checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
440
- dependencies = [
441
- "windows-targets",
442
- ]
428
+ checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
443
429
 
444
430
  [[package]]
445
431
  name = "windows-targets"
446
- version = "0.42.2"
432
+ version = "0.52.5"
447
433
  source = "registry+https://github.com/rust-lang/crates.io-index"
448
- checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071"
434
+ checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb"
449
435
  dependencies = [
450
436
  "windows_aarch64_gnullvm",
451
437
  "windows_aarch64_msvc",
452
438
  "windows_i686_gnu",
439
+ "windows_i686_gnullvm",
453
440
  "windows_i686_msvc",
454
441
  "windows_x86_64_gnu",
455
442
  "windows_x86_64_gnullvm",
@@ -458,42 +445,48 @@ dependencies = [
458
445
 
459
446
  [[package]]
460
447
  name = "windows_aarch64_gnullvm"
461
- version = "0.42.2"
448
+ version = "0.52.5"
462
449
  source = "registry+https://github.com/rust-lang/crates.io-index"
463
- checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8"
450
+ checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263"
464
451
 
465
452
  [[package]]
466
453
  name = "windows_aarch64_msvc"
467
- version = "0.42.2"
454
+ version = "0.52.5"
468
455
  source = "registry+https://github.com/rust-lang/crates.io-index"
469
- checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43"
456
+ checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6"
470
457
 
471
458
  [[package]]
472
459
  name = "windows_i686_gnu"
473
- version = "0.42.2"
460
+ version = "0.52.5"
461
+ source = "registry+https://github.com/rust-lang/crates.io-index"
462
+ checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670"
463
+
464
+ [[package]]
465
+ name = "windows_i686_gnullvm"
466
+ version = "0.52.5"
474
467
  source = "registry+https://github.com/rust-lang/crates.io-index"
475
- checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f"
468
+ checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9"
476
469
 
477
470
  [[package]]
478
471
  name = "windows_i686_msvc"
479
- version = "0.42.2"
472
+ version = "0.52.5"
480
473
  source = "registry+https://github.com/rust-lang/crates.io-index"
481
- checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060"
474
+ checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf"
482
475
 
483
476
  [[package]]
484
477
  name = "windows_x86_64_gnu"
485
- version = "0.42.2"
478
+ version = "0.52.5"
486
479
  source = "registry+https://github.com/rust-lang/crates.io-index"
487
- checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36"
480
+ checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9"
488
481
 
489
482
  [[package]]
490
483
  name = "windows_x86_64_gnullvm"
491
- version = "0.42.2"
484
+ version = "0.52.5"
492
485
  source = "registry+https://github.com/rust-lang/crates.io-index"
493
- checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3"
486
+ checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596"
494
487
 
495
488
  [[package]]
496
489
  name = "windows_x86_64_msvc"
497
- version = "0.42.2"
490
+ version = "0.52.5"
498
491
  source = "registry+https://github.com/rust-lang/crates.io-index"
499
- checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"
492
+ checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0"
data/Gemfile.lock CHANGED
@@ -1,8 +1,8 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- tiktoken_ruby (0.0.7)
5
- rb_sys (>= 0.9.86)
4
+ tiktoken_ruby (0.0.9)
5
+ rb_sys (= 0.9.87)
6
6
 
7
7
  GEM
8
8
  remote: https://rubygems.org/
@@ -22,7 +22,7 @@ GEM
22
22
  rake (13.1.0)
23
23
  rake-compiler (1.2.5)
24
24
  rake
25
- rb_sys (0.9.86)
25
+ rb_sys (0.9.87)
26
26
  regexp_parser (2.9.0)
27
27
  rexml (3.2.6)
28
28
  rspec (3.12.0)
@@ -89,4 +89,4 @@ DEPENDENCIES
89
89
  yard-doctest
90
90
 
91
91
  BUNDLED WITH
92
- 2.4.6
92
+ 2.4.4
data/README.md CHANGED
@@ -1,8 +1,14 @@
1
1
  [![Gem Version](https://badge.fury.io/rb/tiktoken_ruby.svg)](https://badge.fury.io/rb/tiktoken_ruby)
2
+
2
3
  # tiktoken_ruby
3
4
 
4
5
  [Tiktoken](https://github.com/openai/tiktoken) is BPE tokenizer from OpenAI used with their GPT models.
5
- This is a wrapper around it aimed primarily at enabling accurate counts of GPT model tokens used.
6
+ This is a wrapper around it aimed primarily at enabling accurate counts of GPT model tokens used.
7
+
8
+ ## Request for maintainers
9
+
10
+ I can't really put substantial time into maintaining this. Probably nothing more than a couple hours every few months. If you have experience maintaining ruby gems and would like to
11
+ lend a hand please send me an email or reply to this [issue](https://github.com/IAPark/tiktoken_ruby/issues/26)
6
12
 
7
13
  ## Installation
8
14
 
@@ -15,17 +21,19 @@ If bundler is not being used to manage dependencies, install the gem by executin
15
21
  $ gem install tiktoken_ruby
16
22
 
17
23
  ## Usage
24
+
18
25
  Usage should be very similar to the python library. Here's a simple example
19
26
 
20
27
  Encode and decode text
28
+
21
29
  ```ruby
22
30
  require 'tiktoken_ruby'
23
-
24
31
  enc = Tiktoken.get_encoding("cl100k_base")
25
32
  enc.decode(enc.encode("hello world")) #=> "hello world"
26
33
  ```
27
34
 
28
35
  Encoders can also be retrieved by model name
36
+
29
37
  ```ruby
30
38
  require 'tiktoken_ruby'
31
39
 
@@ -53,7 +61,6 @@ bundle exec rake compile
53
61
  bundle exec rake spec
54
62
  ```
55
63
 
56
-
57
64
  ## License
58
65
 
59
66
  The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -11,5 +11,5 @@ crate-type = ["cdylib"]
11
11
 
12
12
  [dependencies]
13
13
  magnus = { version = "0.6.1" }
14
- rb-sys = { version = "*", features = ["stable-api-compiled-fallback"] }
15
- tiktoken-rs = { git = "https://github.com/IAPark/tiktoken-rs.git" }
14
+ rb-sys = { version = "0.9.87", features = ["stable-api-compiled-fallback"] }
15
+ tiktoken-rs = { version = "0.5.9" }
@@ -20,6 +20,11 @@ fn cl100k_base() -> CoreBPEWrapper {
20
20
  CoreBPEWrapper::new(core_bpe)
21
21
  }
22
22
 
23
+ fn o200k_base() -> CoreBPEWrapper {
24
+ let core_bpe = tiktoken_rs::o200k_base().unwrap();
25
+ CoreBPEWrapper::new(core_bpe)
26
+ }
27
+
23
28
  fn module() -> Result<RModule, magnus::Error> {
24
29
  define_module("Tiktoken")
25
30
  }
@@ -37,6 +42,7 @@ fn init() -> Result<(), Error> {
37
42
  factory_module.define_singleton_method("p50k_base", function!(p50k_base, 0))?;
38
43
  factory_module.define_singleton_method("p50k_edit", function!(p50k_edit, 0))?;
39
44
  factory_module.define_singleton_method("cl100k_base", function!(cl100k_base, 0))?;
45
+ factory_module.define_singleton_method("o200k_base", function!(o200k_base, 0))?;
40
46
 
41
47
  let ext_module = module.define_module("Ext")?;
42
48
  let bpe_class = ext_module.define_class("CoreBPE", class::object())?;
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class Tiktoken::Encoding
4
+ CACHE_MUTEX = Mutex.new
5
+
4
6
  attr_reader :name
5
7
 
6
8
  # This returns a new Tiktoken::Encoding instance for the requested encoding
@@ -15,8 +17,10 @@ class Tiktoken::Encoding
15
17
  # @param encoding [Symbol] The name of the encoding to load
16
18
  # @return [Tiktoken::Encoding] The encoding instance
17
19
  def self.for_name_cached(encoding)
18
- @encodings ||= {}
19
- @encodings[encoding.to_sym] ||= Tiktoken::Encoding.for_name(encoding)
20
+ CACHE_MUTEX.synchronize do
21
+ @encodings ||= {}
22
+ @encodings[encoding.to_sym] ||= Tiktoken::Encoding.for_name(encoding)
23
+ end
20
24
  end
21
25
 
22
26
  # Encodes the text as a list of integer tokens. This encoding will encode special non text tokens
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Tiktoken
4
- VERSION = "0.0.7"
4
+ VERSION = "0.0.9"
5
5
  end
data/lib/tiktoken_ruby.rb CHANGED
@@ -28,7 +28,7 @@ module Tiktoken
28
28
 
29
29
  # Gets the encoding for an OpenAI model
30
30
  # @param model_name [Symbol|String] The name of the model to get the encoding for
31
- # @return [Tiktoken::Encoding] The encoding instance
31
+ # @return [Tiktoken::Encoding, nil] The encoding instance, or nil if no encoding is found
32
32
  # @example Count tokens for text
33
33
  # enc = Tiktoken.encoding_for_model("gpt-4")
34
34
  # enc.encode("hello world").length #=> 2
@@ -37,10 +37,12 @@ module Tiktoken
37
37
  return get_encoding(MODEL_TO_ENCODING_NAME[model_name.to_sym])
38
38
  end
39
39
 
40
- MODEL_PREFIX_TO_ENCODING.each do |prefix, encoding|
41
- if model_name.start_with?(prefix.to_s)
42
- return get_encoding(encoding)
43
- end
40
+ _prefix, encoding = MODEL_PREFIX_TO_ENCODING.find do |prefix, _encoding|
41
+ model_name.start_with?(prefix.to_s)
42
+ end
43
+
44
+ if encoding
45
+ get_encoding(encoding)
44
46
  end
45
47
  end
46
48
 
@@ -62,13 +64,15 @@ module Tiktoken
62
64
  :r50k_base,
63
65
  :p50k_base,
64
66
  :p50k_edit,
65
- :cl100k_base
67
+ :cl100k_base,
68
+ :o200k_base
66
69
  ]
67
70
 
68
71
  # taken from the python library here https://github.com/openai/tiktoken/blob/main/tiktoken/model.py
69
72
  # that is also MIT licensed but by OpenAI
70
73
  MODEL_TO_ENCODING_NAME = {
71
74
  # chat
75
+ "gpt-4o": "o200k_base",
72
76
  "gpt-4": "cl100k_base",
73
77
  "gpt-3.5-turbo": "cl100k_base",
74
78
  "gpt-35-turbo": "cl100k_base", # Azure deployment name
@@ -118,6 +122,7 @@ module Tiktoken
118
122
 
119
123
  MODEL_PREFIX_TO_ENCODING = {
120
124
  # chat
125
+ "gpt-4o-": "o200k_base", # e.g., gpt-4o-2024-05-13, etc.
121
126
  "gpt-4-": "cl100k_base", # e.g., gpt-4-0314, etc., plus gpt-4-32k
122
127
  "gpt-3.5-turbo-": "cl100k_base", # e.g, gpt-3.5-turbo-0301, -0401, etc.
123
128
  "gpt-35-turbo-": "cl100k_base", # Azure deployment name
@@ -29,5 +29,5 @@ Gem::Specification.new do |spec|
29
29
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
30
30
  spec.require_paths = ["lib"]
31
31
  spec.extensions = ["ext/tiktoken_ruby/extconf.rb"]
32
- spec.add_dependency "rb_sys", ">= 0.9.86"
32
+ spec.add_dependency "rb_sys", "= 0.9.87"
33
33
  end
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tiktoken_ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - IAPark
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-02-12 00:00:00.000000000 Z
11
+ date: 2024-05-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - '='
18
18
  - !ruby/object:Gem::Version
19
- version: 0.9.86
19
+ version: 0.9.87
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - '='
25
25
  - !ruby/object:Gem::Version
26
- version: 0.9.86
26
+ version: 0.9.87
27
27
  description: An unofficial Ruby wrapper for Tiktoken, a BPE tokenizer written by and
28
28
  used by OpenAI. It can be used to count the number of tokens in text before sending
29
29
  it to OpenAI APIs.