tiktoken_ruby 0.0.8 → 0.0.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 347a10d045e27fca4cdfec03c4d2eac0150448b8f2125d5bcbcd1b92db83499a
4
- data.tar.gz: de048e8320daa15b27ffa7ccdd9f7ec618cb2a3ad96fe2bedf71e6f780fc8b6f
3
+ metadata.gz: 923d3291c75ea3e0d93b45c9a50e11323b421c3e6c93f140f8c5b64e708bc203
4
+ data.tar.gz: 2a1f843387a971b4b9735e52abf58471381bc592895af99b1b4091723b246266
5
5
  SHA512:
6
- metadata.gz: 7c587d4b18e777a0f7692aab857fec1fc3b3bcceceab158439197123786475c65d51f5356351bcf0c9c327b94e2ff15a83b2144c2db329aedea0456c1d763ff9
7
- data.tar.gz: abfdeb836d81555effa5ef6647a77b360dae2b975bb3dc23a22c779f6dd82256630942ddfef01a837ff5195896fa4251925e87dec0d245dce1c8e83719a488f1
6
+ metadata.gz: 98aa2b547a129a5377a838ddfae4df66aee8de87fea916787283dbd2227066d2f99808aa0b71a522e27b58dd59b214c000cc3b933ebf9b9ff2aad56c6cb43536
7
+ data.tar.gz: 3beb0aead95f22b024cbe4e7d105698a1c0dbacc247956d7c91dcf0e468103a577a4a0bb6bf8cb2402516eb9ad521acb3d3af1090f626ab59380eb886e6c06ec
@@ -0,0 +1,5 @@
1
+ {
2
+ "[ruby]": {
3
+ "editor.defaultFormatter": "Shopify.ruby-lsp"
4
+ }
5
+ }
data/Cargo.lock CHANGED
@@ -1,41 +1,41 @@
1
1
  # This file is automatically @generated by Cargo.
2
2
  # It is not intended for manual editing.
3
- version = 3
3
+ version = 4
4
4
 
5
5
  [[package]]
6
6
  name = "aho-corasick"
7
- version = "0.7.20"
7
+ version = "1.1.3"
8
8
  source = "registry+https://github.com/rust-lang/crates.io-index"
9
- checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac"
9
+ checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
10
10
  dependencies = [
11
11
  "memchr",
12
12
  ]
13
13
 
14
14
  [[package]]
15
15
  name = "anyhow"
16
- version = "1.0.70"
16
+ version = "1.0.95"
17
17
  source = "registry+https://github.com/rust-lang/crates.io-index"
18
- checksum = "7de8ce5e0f9f8d88245311066a578d72b7af3e7088f32783804676302df237e4"
18
+ checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04"
19
19
 
20
20
  [[package]]
21
21
  name = "autocfg"
22
- version = "1.1.0"
22
+ version = "1.4.0"
23
23
  source = "registry+https://github.com/rust-lang/crates.io-index"
24
- checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
24
+ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
25
25
 
26
26
  [[package]]
27
27
  name = "base64"
28
- version = "0.21.0"
28
+ version = "0.21.7"
29
29
  source = "registry+https://github.com/rust-lang/crates.io-index"
30
- checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a"
30
+ checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"
31
31
 
32
32
  [[package]]
33
33
  name = "bindgen"
34
- version = "0.69.4"
34
+ version = "0.69.5"
35
35
  source = "registry+https://github.com/rust-lang/crates.io-index"
36
- checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0"
36
+ checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088"
37
37
  dependencies = [
38
- "bitflags 2.4.0",
38
+ "bitflags",
39
39
  "cexpr",
40
40
  "clang-sys",
41
41
  "itertools",
@@ -66,24 +66,17 @@ checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
66
66
 
67
67
  [[package]]
68
68
  name = "bitflags"
69
- version = "1.3.2"
70
- source = "registry+https://github.com/rust-lang/crates.io-index"
71
- checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
72
-
73
- [[package]]
74
- name = "bitflags"
75
- version = "2.4.0"
69
+ version = "2.6.0"
76
70
  source = "registry+https://github.com/rust-lang/crates.io-index"
77
- checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635"
71
+ checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
78
72
 
79
73
  [[package]]
80
74
  name = "bstr"
81
- version = "1.4.0"
75
+ version = "1.11.1"
82
76
  source = "registry+https://github.com/rust-lang/crates.io-index"
83
- checksum = "c3d4260bcc2e8fc9df1eac4919a720effeb63a3f0952f5bf4944adfa18897f09"
77
+ checksum = "786a307d683a5bf92e6fd5fd69a7eb613751668d1d8d67d802846dfe367c62c8"
84
78
  dependencies = [
85
79
  "memchr",
86
- "once_cell",
87
80
  "regex-automata",
88
81
  "serde",
89
82
  ]
@@ -105,9 +98,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
105
98
 
106
99
  [[package]]
107
100
  name = "clang-sys"
108
- version = "1.6.0"
101
+ version = "1.8.1"
109
102
  source = "registry+https://github.com/rust-lang/crates.io-index"
110
- checksum = "77ed9a53e5d4d9c573ae844bfac6872b159cb1d1585a83b29e7a64b7eef7332a"
103
+ checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
111
104
  dependencies = [
112
105
  "glob",
113
106
  "libc",
@@ -116,25 +109,26 @@ dependencies = [
116
109
 
117
110
  [[package]]
118
111
  name = "either"
119
- version = "1.10.0"
112
+ version = "1.13.0"
120
113
  source = "registry+https://github.com/rust-lang/crates.io-index"
121
- checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a"
114
+ checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
122
115
 
123
116
  [[package]]
124
117
  name = "fancy-regex"
125
- version = "0.11.0"
118
+ version = "0.13.0"
126
119
  source = "registry+https://github.com/rust-lang/crates.io-index"
127
- checksum = "b95f7c0680e4142284cf8b22c14a476e87d61b004a3a0861872b32ef7ead40a2"
120
+ checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2"
128
121
  dependencies = [
129
122
  "bit-set",
130
- "regex",
123
+ "regex-automata",
124
+ "regex-syntax",
131
125
  ]
132
126
 
133
127
  [[package]]
134
128
  name = "glob"
135
- version = "0.3.1"
129
+ version = "0.3.2"
136
130
  source = "registry+https://github.com/rust-lang/crates.io-index"
137
- checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
131
+ checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
138
132
 
139
133
  [[package]]
140
134
  name = "itertools"
@@ -147,9 +141,9 @@ dependencies = [
147
141
 
148
142
  [[package]]
149
143
  name = "lazy_static"
150
- version = "1.4.0"
144
+ version = "1.5.0"
151
145
  source = "registry+https://github.com/rust-lang/crates.io-index"
152
- checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
146
+ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
153
147
 
154
148
  [[package]]
155
149
  name = "lazycell"
@@ -159,25 +153,25 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
159
153
 
160
154
  [[package]]
161
155
  name = "libc"
162
- version = "0.2.140"
156
+ version = "0.2.169"
163
157
  source = "registry+https://github.com/rust-lang/crates.io-index"
164
- checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c"
158
+ checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
165
159
 
166
160
  [[package]]
167
161
  name = "libloading"
168
- version = "0.7.4"
162
+ version = "0.8.6"
169
163
  source = "registry+https://github.com/rust-lang/crates.io-index"
170
- checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f"
164
+ checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34"
171
165
  dependencies = [
172
166
  "cfg-if",
173
- "winapi",
167
+ "windows-targets",
174
168
  ]
175
169
 
176
170
  [[package]]
177
171
  name = "lock_api"
178
- version = "0.4.9"
172
+ version = "0.4.12"
179
173
  source = "registry+https://github.com/rust-lang/crates.io-index"
180
- checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df"
174
+ checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
181
175
  dependencies = [
182
176
  "autocfg",
183
177
  "scopeguard",
@@ -185,9 +179,9 @@ dependencies = [
185
179
 
186
180
  [[package]]
187
181
  name = "magnus"
188
- version = "0.6.1"
182
+ version = "0.7.1"
189
183
  source = "registry+https://github.com/rust-lang/crates.io-index"
190
- checksum = "0516897a45f8ce8270a8910bcb94cd83538b19b6ae3a0c281a765df170b64695"
184
+ checksum = "3d87ae53030f3a22e83879e666cb94e58a7bdf31706878a0ba48752994146dab"
191
185
  dependencies = [
192
186
  "magnus-macros",
193
187
  "rb-sys",
@@ -208,9 +202,9 @@ dependencies = [
208
202
 
209
203
  [[package]]
210
204
  name = "memchr"
211
- version = "2.5.0"
205
+ version = "2.7.4"
212
206
  source = "registry+https://github.com/rust-lang/crates.io-index"
213
- checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
207
+ checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
214
208
 
215
209
  [[package]]
216
210
  name = "minimal-lexical"
@@ -228,17 +222,11 @@ dependencies = [
228
222
  "minimal-lexical",
229
223
  ]
230
224
 
231
- [[package]]
232
- name = "once_cell"
233
- version = "1.17.1"
234
- source = "registry+https://github.com/rust-lang/crates.io-index"
235
- checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"
236
-
237
225
  [[package]]
238
226
  name = "parking_lot"
239
- version = "0.12.1"
227
+ version = "0.12.3"
240
228
  source = "registry+https://github.com/rust-lang/crates.io-index"
241
- checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
229
+ checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27"
242
230
  dependencies = [
243
231
  "lock_api",
244
232
  "parking_lot_core",
@@ -246,49 +234,49 @@ dependencies = [
246
234
 
247
235
  [[package]]
248
236
  name = "parking_lot_core"
249
- version = "0.9.7"
237
+ version = "0.9.10"
250
238
  source = "registry+https://github.com/rust-lang/crates.io-index"
251
- checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521"
239
+ checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8"
252
240
  dependencies = [
253
241
  "cfg-if",
254
242
  "libc",
255
243
  "redox_syscall",
256
244
  "smallvec",
257
- "windows-sys",
245
+ "windows-targets",
258
246
  ]
259
247
 
260
248
  [[package]]
261
249
  name = "proc-macro2"
262
- version = "1.0.66"
250
+ version = "1.0.92"
263
251
  source = "registry+https://github.com/rust-lang/crates.io-index"
264
- checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
252
+ checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0"
265
253
  dependencies = [
266
254
  "unicode-ident",
267
255
  ]
268
256
 
269
257
  [[package]]
270
258
  name = "quote"
271
- version = "1.0.33"
259
+ version = "1.0.38"
272
260
  source = "registry+https://github.com/rust-lang/crates.io-index"
273
- checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
261
+ checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc"
274
262
  dependencies = [
275
263
  "proc-macro2",
276
264
  ]
277
265
 
278
266
  [[package]]
279
267
  name = "rb-sys"
280
- version = "0.9.87"
268
+ version = "0.9.106"
281
269
  source = "registry+https://github.com/rust-lang/crates.io-index"
282
- checksum = "225103e3d69bbfe8831f9fd0d2461335f3a9dd06aa6e88bcb6d6970383494d06"
270
+ checksum = "17b6efdbc8c1a22cb8b5d7ead0237c16c362c9ef6fbdc09e2d1040615b0f4cd0"
283
271
  dependencies = [
284
272
  "rb-sys-build",
285
273
  ]
286
274
 
287
275
  [[package]]
288
276
  name = "rb-sys-build"
289
- version = "0.9.87"
277
+ version = "0.9.106"
290
278
  source = "registry+https://github.com/rust-lang/crates.io-index"
291
- checksum = "bacce8095a5167d5ede618bbd9353e9d9e2f32ddaf54be911106f0ee6baacf09"
279
+ checksum = "e1d88c51e52f8636a5efc24ec5987056e64e48a91ed2a1af96cb5564686cc10f"
292
280
  dependencies = [
293
281
  "bindgen",
294
282
  "lazy_static",
@@ -307,35 +295,41 @@ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
307
295
 
308
296
  [[package]]
309
297
  name = "redox_syscall"
310
- version = "0.2.16"
298
+ version = "0.5.8"
311
299
  source = "registry+https://github.com/rust-lang/crates.io-index"
312
- checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
300
+ checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834"
313
301
  dependencies = [
314
- "bitflags 1.3.2",
302
+ "bitflags",
315
303
  ]
316
304
 
317
305
  [[package]]
318
306
  name = "regex"
319
- version = "1.7.1"
307
+ version = "1.11.1"
320
308
  source = "registry+https://github.com/rust-lang/crates.io-index"
321
- checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733"
309
+ checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
322
310
  dependencies = [
323
311
  "aho-corasick",
324
312
  "memchr",
313
+ "regex-automata",
325
314
  "regex-syntax",
326
315
  ]
327
316
 
328
317
  [[package]]
329
318
  name = "regex-automata"
330
- version = "0.1.10"
319
+ version = "0.4.9"
331
320
  source = "registry+https://github.com/rust-lang/crates.io-index"
332
- checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
321
+ checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
322
+ dependencies = [
323
+ "aho-corasick",
324
+ "memchr",
325
+ "regex-syntax",
326
+ ]
333
327
 
334
328
  [[package]]
335
329
  name = "regex-syntax"
336
- version = "0.6.28"
330
+ version = "0.8.5"
337
331
  source = "registry+https://github.com/rust-lang/crates.io-index"
338
- checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848"
332
+ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
339
333
 
340
334
  [[package]]
341
335
  name = "rustc-hash"
@@ -345,9 +339,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
345
339
 
346
340
  [[package]]
347
341
  name = "scopeguard"
348
- version = "1.1.0"
342
+ version = "1.2.0"
349
343
  source = "registry+https://github.com/rust-lang/crates.io-index"
350
- checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
344
+ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
351
345
 
352
346
  [[package]]
353
347
  name = "seq-macro"
@@ -357,9 +351,23 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
357
351
 
358
352
  [[package]]
359
353
  name = "serde"
360
- version = "1.0.157"
354
+ version = "1.0.217"
361
355
  source = "registry+https://github.com/rust-lang/crates.io-index"
362
- checksum = "707de5fcf5df2b5788fca98dd7eab490bc2fd9b7ef1404defc462833b83f25ca"
356
+ checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70"
357
+ dependencies = [
358
+ "serde_derive",
359
+ ]
360
+
361
+ [[package]]
362
+ name = "serde_derive"
363
+ version = "1.0.217"
364
+ source = "registry+https://github.com/rust-lang/crates.io-index"
365
+ checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0"
366
+ dependencies = [
367
+ "proc-macro2",
368
+ "quote",
369
+ "syn",
370
+ ]
363
371
 
364
372
  [[package]]
365
373
  name = "shell-words"
@@ -369,21 +377,21 @@ checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde"
369
377
 
370
378
  [[package]]
371
379
  name = "shlex"
372
- version = "1.1.0"
380
+ version = "1.3.0"
373
381
  source = "registry+https://github.com/rust-lang/crates.io-index"
374
- checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"
382
+ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
375
383
 
376
384
  [[package]]
377
385
  name = "smallvec"
378
- version = "1.10.0"
386
+ version = "1.13.2"
379
387
  source = "registry+https://github.com/rust-lang/crates.io-index"
380
- checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
388
+ checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
381
389
 
382
390
  [[package]]
383
391
  name = "syn"
384
- version = "2.0.31"
392
+ version = "2.0.93"
385
393
  source = "registry+https://github.com/rust-lang/crates.io-index"
386
- checksum = "718fa2415bcb8d8bd775917a1bf12a7931b6dfa890753378538118181e0cb398"
394
+ checksum = "9c786062daee0d6db1132800e623df74274a0a87322d8e183338e01b3d98d058"
387
395
  dependencies = [
388
396
  "proc-macro2",
389
397
  "quote",
@@ -392,8 +400,9 @@ dependencies = [
392
400
 
393
401
  [[package]]
394
402
  name = "tiktoken-rs"
395
- version = "0.3.2"
396
- source = "git+https://github.com/IAPark/tiktoken-rs.git#5231fbf4a91d9221a713522e755445d0dde341fa"
403
+ version = "0.6.0"
404
+ source = "registry+https://github.com/rust-lang/crates.io-index"
405
+ checksum = "44075987ee2486402f0808505dd65692163d243a337fc54363d49afac41087f6"
397
406
  dependencies = [
398
407
  "anyhow",
399
408
  "base64",
@@ -401,6 +410,7 @@ dependencies = [
401
410
  "fancy-regex",
402
411
  "lazy_static",
403
412
  "parking_lot",
413
+ "regex",
404
414
  "rustc-hash",
405
415
  ]
406
416
 
@@ -415,50 +425,20 @@ dependencies = [
415
425
 
416
426
  [[package]]
417
427
  name = "unicode-ident"
418
- version = "1.0.8"
419
- source = "registry+https://github.com/rust-lang/crates.io-index"
420
- checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4"
421
-
422
- [[package]]
423
- name = "winapi"
424
- version = "0.3.9"
428
+ version = "1.0.14"
425
429
  source = "registry+https://github.com/rust-lang/crates.io-index"
426
- checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
427
- dependencies = [
428
- "winapi-i686-pc-windows-gnu",
429
- "winapi-x86_64-pc-windows-gnu",
430
- ]
431
-
432
- [[package]]
433
- name = "winapi-i686-pc-windows-gnu"
434
- version = "0.4.0"
435
- source = "registry+https://github.com/rust-lang/crates.io-index"
436
- checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
437
-
438
- [[package]]
439
- name = "winapi-x86_64-pc-windows-gnu"
440
- version = "0.4.0"
441
- source = "registry+https://github.com/rust-lang/crates.io-index"
442
- checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
443
-
444
- [[package]]
445
- name = "windows-sys"
446
- version = "0.45.0"
447
- source = "registry+https://github.com/rust-lang/crates.io-index"
448
- checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
449
- dependencies = [
450
- "windows-targets",
451
- ]
430
+ checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
452
431
 
453
432
  [[package]]
454
433
  name = "windows-targets"
455
- version = "0.42.2"
434
+ version = "0.52.6"
456
435
  source = "registry+https://github.com/rust-lang/crates.io-index"
457
- checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071"
436
+ checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
458
437
  dependencies = [
459
438
  "windows_aarch64_gnullvm",
460
439
  "windows_aarch64_msvc",
461
440
  "windows_i686_gnu",
441
+ "windows_i686_gnullvm",
462
442
  "windows_i686_msvc",
463
443
  "windows_x86_64_gnu",
464
444
  "windows_x86_64_gnullvm",
@@ -467,42 +447,48 @@ dependencies = [
467
447
 
468
448
  [[package]]
469
449
  name = "windows_aarch64_gnullvm"
470
- version = "0.42.2"
450
+ version = "0.52.6"
471
451
  source = "registry+https://github.com/rust-lang/crates.io-index"
472
- checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8"
452
+ checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
473
453
 
474
454
  [[package]]
475
455
  name = "windows_aarch64_msvc"
476
- version = "0.42.2"
456
+ version = "0.52.6"
477
457
  source = "registry+https://github.com/rust-lang/crates.io-index"
478
- checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43"
458
+ checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
479
459
 
480
460
  [[package]]
481
461
  name = "windows_i686_gnu"
482
- version = "0.42.2"
462
+ version = "0.52.6"
463
+ source = "registry+https://github.com/rust-lang/crates.io-index"
464
+ checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
465
+
466
+ [[package]]
467
+ name = "windows_i686_gnullvm"
468
+ version = "0.52.6"
483
469
  source = "registry+https://github.com/rust-lang/crates.io-index"
484
- checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f"
470
+ checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
485
471
 
486
472
  [[package]]
487
473
  name = "windows_i686_msvc"
488
- version = "0.42.2"
474
+ version = "0.52.6"
489
475
  source = "registry+https://github.com/rust-lang/crates.io-index"
490
- checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060"
476
+ checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
491
477
 
492
478
  [[package]]
493
479
  name = "windows_x86_64_gnu"
494
- version = "0.42.2"
480
+ version = "0.52.6"
495
481
  source = "registry+https://github.com/rust-lang/crates.io-index"
496
- checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36"
482
+ checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
497
483
 
498
484
  [[package]]
499
485
  name = "windows_x86_64_gnullvm"
500
- version = "0.42.2"
486
+ version = "0.52.6"
501
487
  source = "registry+https://github.com/rust-lang/crates.io-index"
502
- checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3"
488
+ checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
503
489
 
504
490
  [[package]]
505
491
  name = "windows_x86_64_msvc"
506
- version = "0.42.2"
492
+ version = "0.52.6"
507
493
  source = "registry+https://github.com/rust-lang/crates.io-index"
508
- checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"
494
+ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
data/Gemfile.lock CHANGED
@@ -1,73 +1,73 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- tiktoken_ruby (0.0.8)
5
- rb_sys (>= 0.9.86)
4
+ tiktoken_ruby (0.0.11.1)
5
+ rb_sys (= 0.9.106)
6
6
 
7
7
  GEM
8
8
  remote: https://rubygems.org/
9
9
  specs:
10
10
  ast (2.4.2)
11
- diff-lcs (1.5.0)
12
- json (2.7.1)
11
+ diff-lcs (1.5.1)
12
+ json (2.9.1)
13
13
  language_server-protocol (3.17.0.3)
14
14
  lint_roller (1.1.0)
15
15
  minitest (5.21.2)
16
- parallel (1.24.0)
17
- parser (3.3.0.4)
16
+ parallel (1.26.3)
17
+ parser (3.3.6.0)
18
18
  ast (~> 2.4.1)
19
19
  racc
20
- racc (1.7.3)
20
+ racc (1.8.1)
21
21
  rainbow (3.1.1)
22
- rake (13.1.0)
23
- rake-compiler (1.2.5)
22
+ rake (13.2.1)
23
+ rake-compiler (1.2.9)
24
24
  rake
25
- rb_sys (0.9.86)
26
- regexp_parser (2.9.0)
27
- rexml (3.2.6)
28
- rspec (3.12.0)
29
- rspec-core (~> 3.12.0)
30
- rspec-expectations (~> 3.12.0)
31
- rspec-mocks (~> 3.12.0)
32
- rspec-core (3.12.2)
33
- rspec-support (~> 3.12.0)
34
- rspec-expectations (3.12.3)
25
+ rb_sys (0.9.106)
26
+ regexp_parser (2.10.0)
27
+ rspec (3.13.0)
28
+ rspec-core (~> 3.13.0)
29
+ rspec-expectations (~> 3.13.0)
30
+ rspec-mocks (~> 3.13.0)
31
+ rspec-core (3.13.2)
32
+ rspec-support (~> 3.13.0)
33
+ rspec-expectations (3.13.3)
35
34
  diff-lcs (>= 1.2.0, < 2.0)
36
- rspec-support (~> 3.12.0)
37
- rspec-mocks (3.12.6)
35
+ rspec-support (~> 3.13.0)
36
+ rspec-mocks (3.13.2)
38
37
  diff-lcs (>= 1.2.0, < 2.0)
39
- rspec-support (~> 3.12.0)
40
- rspec-support (3.12.1)
41
- rubocop (1.59.0)
38
+ rspec-support (~> 3.13.0)
39
+ rspec-support (3.13.2)
40
+ rubocop (1.69.2)
42
41
  json (~> 2.3)
43
42
  language_server-protocol (>= 3.17.0)
44
43
  parallel (~> 1.10)
45
- parser (>= 3.2.2.4)
44
+ parser (>= 3.3.0.2)
46
45
  rainbow (>= 2.2.2, < 4.0)
47
- regexp_parser (>= 1.8, < 3.0)
48
- rexml (>= 3.2.5, < 4.0)
49
- rubocop-ast (>= 1.30.0, < 2.0)
46
+ regexp_parser (>= 2.9.3, < 3.0)
47
+ rubocop-ast (>= 1.36.2, < 2.0)
50
48
  ruby-progressbar (~> 1.7)
51
- unicode-display_width (>= 2.4.0, < 3.0)
52
- rubocop-ast (1.30.0)
53
- parser (>= 3.2.1.0)
54
- rubocop-performance (1.20.2)
49
+ unicode-display_width (>= 2.4.0, < 4.0)
50
+ rubocop-ast (1.37.0)
51
+ parser (>= 3.3.1.0)
52
+ rubocop-performance (1.23.0)
55
53
  rubocop (>= 1.48.1, < 2.0)
56
- rubocop-ast (>= 1.30.0, < 2.0)
54
+ rubocop-ast (>= 1.31.1, < 2.0)
57
55
  ruby-progressbar (1.13.0)
58
- standard (1.33.0)
56
+ standard (1.43.0)
59
57
  language_server-protocol (~> 3.17.0.2)
60
58
  lint_roller (~> 1.0)
61
- rubocop (~> 1.59.0)
59
+ rubocop (~> 1.69.1)
62
60
  standard-custom (~> 1.0.0)
63
- standard-performance (~> 1.3)
61
+ standard-performance (~> 1.6)
64
62
  standard-custom (1.0.2)
65
63
  lint_roller (~> 1.0)
66
64
  rubocop (~> 1.50)
67
- standard-performance (1.3.1)
65
+ standard-performance (1.6.0)
68
66
  lint_roller (~> 1.1)
69
- rubocop-performance (~> 1.20.2)
70
- unicode-display_width (2.5.0)
67
+ rubocop-performance (~> 1.23.0)
68
+ unicode-display_width (3.1.3)
69
+ unicode-emoji (~> 4.0, >= 4.0.4)
70
+ unicode-emoji (4.0.4)
71
71
  yard (0.9.34)
72
72
  yard-doctest (0.1.17)
73
73
  minitest
@@ -89,4 +89,4 @@ DEPENDENCIES
89
89
  yard-doctest
90
90
 
91
91
  BUNDLED WITH
92
- 2.4.6
92
+ 2.5.18
data/Rakefile CHANGED
@@ -21,3 +21,9 @@ end
21
21
  task build: :compile
22
22
 
23
23
  task default: %i[compile spec standard]
24
+
25
+ # Packaging default (non-precompiled) gem
26
+ require "rubygems/package_task"
27
+ gem_path = Gem::PackageTask.new(GEMSPEC).define
28
+ desc "Package the Ruby gem"
29
+ task "package" => [gem_path]
@@ -10,6 +10,6 @@ publish = false
10
10
  crate-type = ["cdylib"]
11
11
 
12
12
  [dependencies]
13
- magnus = { version = "0.6.1" }
14
- rb-sys = { version = "*", features = ["stable-api-compiled-fallback"] }
15
- tiktoken-rs = { git = "https://github.com/IAPark/tiktoken-rs.git" }
13
+ magnus = { version = "0.7.1" }
14
+ rb-sys = { version = "0.9.106", features = ["stable-api-compiled-fallback"] }
15
+ tiktoken-rs = { version = "0.6.0" }
@@ -1,7 +1,8 @@
1
1
  use std::collections::HashSet;
2
2
 
3
- use crate::uncicode_error;
3
+ use tiktoken_rs::Rank;
4
4
 
5
+ use crate::uncicode_error;
5
6
 
6
7
  #[magnus::wrap(class = "Tiktoken::Ext::CoreBPE")]
7
8
  pub struct CoreBPEWrapper {
@@ -13,11 +14,15 @@ impl CoreBPEWrapper {
13
14
  Self { core_bpe }
14
15
  }
15
16
 
16
- pub fn encode_ordinary(&self, text: String) -> Vec<usize> {
17
+ pub fn encode_ordinary(&self, text: String) -> Vec<Rank> {
17
18
  self.core_bpe.encode_ordinary(text.as_str())
18
19
  }
19
20
 
20
- pub fn encode(&self, text: String, allowed_special: magnus::RArray) -> Result<Vec<usize>, magnus::Error> {
21
+ pub fn encode(
22
+ &self,
23
+ text: String,
24
+ allowed_special: magnus::RArray,
25
+ ) -> Result<Vec<Rank>, magnus::Error> {
21
26
  let allowed_special: Vec<String> = allowed_special.to_vec()?;
22
27
  let allowed_special: Vec<&str> = allowed_special.iter().map(|s| s.as_str()).collect();
23
28
  let allowed_special: HashSet<&str> = HashSet::from_iter(allowed_special.iter().cloned());
@@ -25,20 +30,18 @@ impl CoreBPEWrapper {
25
30
  Ok(self.core_bpe.encode(text.as_str(), allowed_special))
26
31
  }
27
32
 
28
- pub fn encode_with_special_tokens(&self, text: String) -> Vec<usize> {
33
+ pub fn encode_with_special_tokens(&self, text: String) -> Vec<Rank> {
29
34
  self.core_bpe.encode_with_special_tokens(text.as_str())
30
35
  }
31
36
 
32
- pub fn decode(&self, ids: Vec<usize>) -> Result<String, magnus::Error> {
33
- self.core_bpe.decode(ids)
34
- .map_err(|e| {
35
- let error = match uncicode_error() {
36
- Ok(error) => error,
37
- Err(e) => return e
38
- };
39
-
40
- magnus::Error::new(error, e.to_string())
41
- })
37
+ pub fn decode(&self, ids: Vec<Rank>) -> Result<String, magnus::Error> {
38
+ self.core_bpe.decode(ids).map_err(|e| {
39
+ let error = match uncicode_error() {
40
+ Ok(error) => error,
41
+ Err(e) => return e,
42
+ };
42
43
 
44
+ magnus::Error::new(error, e.to_string())
45
+ })
43
46
  }
44
47
  }
@@ -20,6 +20,11 @@ fn cl100k_base() -> CoreBPEWrapper {
20
20
  CoreBPEWrapper::new(core_bpe)
21
21
  }
22
22
 
23
+ fn o200k_base() -> CoreBPEWrapper {
24
+ let core_bpe = tiktoken_rs::o200k_base().unwrap();
25
+ CoreBPEWrapper::new(core_bpe)
26
+ }
27
+
23
28
  fn module() -> Result<RModule, magnus::Error> {
24
29
  define_module("Tiktoken")
25
30
  }
@@ -37,6 +42,7 @@ fn init() -> Result<(), Error> {
37
42
  factory_module.define_singleton_method("p50k_base", function!(p50k_base, 0))?;
38
43
  factory_module.define_singleton_method("p50k_edit", function!(p50k_edit, 0))?;
39
44
  factory_module.define_singleton_method("cl100k_base", function!(cl100k_base, 0))?;
45
+ factory_module.define_singleton_method("o200k_base", function!(o200k_base, 0))?;
40
46
 
41
47
  let ext_module = module.define_module("Ext")?;
42
48
  let bpe_class = ext_module.define_class("CoreBPE", class::object())?;
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Tiktoken
4
- VERSION = "0.0.8"
4
+ VERSION = "0.0.11.1"
5
5
  end
data/lib/tiktoken_ruby.rb CHANGED
@@ -64,13 +64,18 @@ module Tiktoken
64
64
  :r50k_base,
65
65
  :p50k_base,
66
66
  :p50k_edit,
67
- :cl100k_base
67
+ :cl100k_base,
68
+ :o200k_base
68
69
  ]
69
70
 
70
71
  # taken from the python library here https://github.com/openai/tiktoken/blob/main/tiktoken/model.py
71
- # that is also MIT licensed but by OpenAI
72
+ # that is also MIT licensed but by OpenAI;
73
+ # https://github.com/Congyuwang/tiktoken-rs/blob/main/tiktoken-rs/src/tokenizer.rs#L50
74
+ # is the source of the mapping for the Rust library
72
75
  MODEL_TO_ENCODING_NAME = {
73
76
  # chat
77
+ "chatgpt-4o-latest": "o200k_base",
78
+ "gpt-4o": "o200k_base",
74
79
  "gpt-4": "cl100k_base",
75
80
  "gpt-3.5-turbo": "cl100k_base",
76
81
  "gpt-35-turbo": "cl100k_base", # Azure deployment name
@@ -120,6 +125,7 @@ module Tiktoken
120
125
 
121
126
  MODEL_PREFIX_TO_ENCODING = {
122
127
  # chat
128
+ "gpt-4o-": "o200k_base", # e.g., gpt-4o-2024-05-13, etc.
123
129
  "gpt-4-": "cl100k_base", # e.g., gpt-4-0314, etc., plus gpt-4-32k
124
130
  "gpt-3.5-turbo-": "cl100k_base", # e.g, gpt-3.5-turbo-0301, -0401, etc.
125
131
  "gpt-35-turbo-": "cl100k_base", # Azure deployment name
data/script/release ADDED
@@ -0,0 +1,43 @@
1
+ #!/usr/bin/env bash
2
+
3
+ set -e
4
+
5
+ if [ -z "${TIKTOKEN_PUBLISH_KEY}" ]; then
6
+ echo "Error: TIKTOKEN_PUBLISH_KEY is not set. This is the RubyGems API key to push the gem."
7
+ exit 1
8
+ fi
9
+
10
+ run_id=""
11
+ # Parse arguments
12
+ while [[ "$#" -gt 0 ]]; do
13
+ case $1 in
14
+ --run-id)
15
+ run_id="$2"
16
+ shift 2
17
+ ;;
18
+ *)
19
+ echo "Unknown parameter passed: $1"
20
+ exit 1
21
+ ;;
22
+ esac
23
+ done
24
+
25
+ if [ -z "${run_id}" ]; then
26
+ echo "Error: --run-id is not provided. Please provide the GitHub Action run id for the cross-compile workflow."
27
+ exit 1
28
+ fi
29
+
30
+ version=$(grep VERSION lib/tiktoken_ruby/version.rb | head -n 1 | cut -d'"' -f2)
31
+ echo "Building tiktoken_ruby v$version, using artifacts from run $run_id"
32
+
33
+ rm -rf pkg/cross-compiled
34
+ gh run download "$run_id" -D pkg/cross-compiled
35
+
36
+ for gem in pkg/cross-compiled/cross-gem-*/tiktoken_ruby-"$version"*.gem ; do
37
+ echo "Publishing $gem"
38
+ GEM_HOST_API_KEY="${TIKTOKEN_PUBLISH_KEY}" gem push "$gem" --host https://rubygems.org
39
+ done
40
+
41
+ # last but not least, the uncompiled gem
42
+ bundle exec rake package
43
+ GEM_HOST_API_KEY="${TIKTOKEN_PUBLISH_KEY}" gem push "pkg/tiktoken_ruby-$version.gem" --host https://rubygems.org
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tiktoken_ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.11.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - IAPark
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-04-04 00:00:00.000000000 Z
11
+ date: 2025-01-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - '='
18
18
  - !ruby/object:Gem::Version
19
- version: 0.9.86
19
+ version: 0.9.106
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - '='
25
25
  - !ruby/object:Gem::Version
26
- version: 0.9.86
26
+ version: 0.9.106
27
27
  description: An unofficial Ruby wrapper for Tiktoken, a BPE tokenizer written by and
28
28
  used by OpenAI. It can be used to count the number of tokens in text before sending
29
29
  it to OpenAI APIs.
@@ -36,6 +36,7 @@ extra_rdoc_files: []
36
36
  files:
37
37
  - ".rspec"
38
38
  - ".standard.yml"
39
+ - ".vscode/settings.json"
39
40
  - Cargo.lock
40
41
  - Cargo.toml
41
42
  - Gemfile
@@ -51,8 +52,8 @@ files:
51
52
  - lib/tiktoken_ruby.rb
52
53
  - lib/tiktoken_ruby/encoding.rb
53
54
  - lib/tiktoken_ruby/version.rb
55
+ - script/release
54
56
  - sig/tiktoken_ruby.rbs
55
- - tiktoken_ruby.gemspec
56
57
  homepage: https://github.com/IAPark/tiktoken_ruby
57
58
  licenses:
58
59
  - MIT
@@ -68,14 +69,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
68
69
  requirements:
69
70
  - - ">="
70
71
  - !ruby/object:Gem::Version
71
- version: 2.7.0
72
+ version: 3.1.0
72
73
  required_rubygems_version: !ruby/object:Gem::Requirement
73
74
  requirements:
74
75
  - - ">="
75
76
  - !ruby/object:Gem::Version
76
- version: 3.1.0
77
+ version: 3.4.0
77
78
  requirements: []
78
- rubygems_version: 3.4.6
79
+ rubygems_version: 3.5.22
79
80
  signing_key:
80
81
  specification_version: 4
81
82
  summary: Ruby wrapper for Tiktoken
@@ -1,33 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative "lib/tiktoken_ruby/version"
4
-
5
- Gem::Specification.new do |spec|
6
- spec.name = "tiktoken_ruby"
7
- spec.version = Tiktoken::VERSION
8
- spec.authors = ["IAPark"]
9
- spec.email = ["isaac.a.park@gmail.com"]
10
- spec.summary = "Ruby wrapper for Tiktoken"
11
- spec.description = "An unofficial Ruby wrapper for Tiktoken, " \
12
- "a BPE tokenizer written by and used by OpenAI. It can be used to " \
13
- "count the number of tokens in text before sending it to OpenAI APIs."
14
- spec.homepage = "https://github.com/IAPark/tiktoken_ruby"
15
- spec.license = "MIT"
16
- spec.required_ruby_version = ">= 2.7.0"
17
- spec.required_rubygems_version = ">= 3.1.0"
18
- spec.platform = Gem::Platform::RUBY
19
-
20
- spec.metadata["homepage_uri"] = spec.homepage
21
- spec.metadata["source_code_uri"] = "https://github.com/IAPark/tiktoken_ruby"
22
- spec.metadata["documentation_uri"] = "https://rubydoc.info/github/IAPark/tiktoken_ruby/main"
23
- spec.files = Dir.chdir(__dir__) do
24
- `git ls-files -z`.split("\x0").reject do |f|
25
- (f == __FILE__) || f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|circleci)|appveyor)})
26
- end
27
- end
28
- spec.bindir = "exe"
29
- spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
30
- spec.require_paths = ["lib"]
31
- spec.extensions = ["ext/tiktoken_ruby/extconf.rb"]
32
- spec.add_dependency "rb_sys", ">= 0.9.86"
33
- end