tokenizers 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 258211e71ca06e96bb4ee01b15e29f6f74d3c70d04af246e95b178e10f093059
4
- data.tar.gz: 6e0b01c577830afdf1c7d677b1377191420d85e0f1f8638893f72cbb7ccef322
3
+ metadata.gz: 4846b5d3dc0fe8f5828ddffe46908b1f3812ebf6a03a939ca0395ad7748533bb
4
+ data.tar.gz: 259795bfa6b13a36f62ab2ffb65e9feabd460e01efe9f59e7d6017c6dcd9b9b0
5
5
  SHA512:
6
- metadata.gz: 4e0ea1f11dbab96b213190397ee8676d6233568f4fe013970a5a2c32105ed20ec06a5c8bc7379065799de315a0fc6d5f47807f9af47bc6f47926e4147c3eabcc
7
- data.tar.gz: ccd00b103577c6cff4dded6a3bc42394eccb3e24b950674a33eedf76df7c08bc89cda8219f076fce4cf20d90580da82c03e001a4e49ceb80e56ae4055b4617cf
6
+ metadata.gz: 90f55feb8ceec81815bb61b7773e6f67924eb6d47869e5da67d579e2ac9df6a48fddee5e97f5a028e3fa1e39941f3bfe6ec6c04cf49fd1b87f18bade54911231
7
+ data.tar.gz: 9c1895b43222494b393f3fbddaa6e78216e025f3d3dddebf0c0311d2d897b282a16b8b0044aacb2466790b0a93c8d01099b25d662750b96d5798d0a4a927267b
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.5.1 (2024-08-13)
2
+
3
+ - Updated Tokenizers to 0.20.0
4
+ - Added precompiled gem for Linux ARM MUSL
5
+
1
6
  ## 0.5.0 (2024-05-21)
2
7
 
3
8
  - Updated Tokenizers to 0.19.1
data/Cargo.lock CHANGED
@@ -4,19 +4,13 @@ version = 3
4
4
 
5
5
  [[package]]
6
6
  name = "aho-corasick"
7
- version = "1.1.1"
7
+ version = "1.1.3"
8
8
  source = "registry+https://github.com/rust-lang/crates.io-index"
9
- checksum = "ea5d730647d4fadd988536d06fecce94b7b4f2a7efdae548f1cf4b63205518ab"
9
+ checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
10
10
  dependencies = [
11
11
  "memchr",
12
12
  ]
13
13
 
14
- [[package]]
15
- name = "autocfg"
16
- version = "1.1.0"
17
- source = "registry+https://github.com/rust-lang/crates.io-index"
18
- checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
19
-
20
14
  [[package]]
21
15
  name = "base64"
22
16
  version = "0.13.1"
@@ -25,16 +19,16 @@ checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
25
19
 
26
20
  [[package]]
27
21
  name = "bindgen"
28
- version = "0.69.1"
22
+ version = "0.69.4"
29
23
  source = "registry+https://github.com/rust-lang/crates.io-index"
30
- checksum = "9ffcebc3849946a7170a05992aac39da343a90676ab392c51a4280981d6379c2"
24
+ checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0"
31
25
  dependencies = [
32
- "bitflags 2.4.1",
26
+ "bitflags 2.6.0",
33
27
  "cexpr",
34
28
  "clang-sys",
29
+ "itertools 0.12.1",
35
30
  "lazy_static",
36
31
  "lazycell",
37
- "peeking_take_while",
38
32
  "proc-macro2",
39
33
  "quote",
40
34
  "regex",
@@ -51,15 +45,21 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
51
45
 
52
46
  [[package]]
53
47
  name = "bitflags"
54
- version = "2.4.1"
48
+ version = "2.6.0"
55
49
  source = "registry+https://github.com/rust-lang/crates.io-index"
56
- checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
50
+ checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
51
+
52
+ [[package]]
53
+ name = "byteorder"
54
+ version = "1.5.0"
55
+ source = "registry+https://github.com/rust-lang/crates.io-index"
56
+ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
57
57
 
58
58
  [[package]]
59
59
  name = "cc"
60
- version = "1.0.79"
60
+ version = "1.1.8"
61
61
  source = "registry+https://github.com/rust-lang/crates.io-index"
62
- checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
62
+ checksum = "504bdec147f2cc13c8b57ed9401fd8a147cc66b67ad5cb241394244f2c947549"
63
63
 
64
64
  [[package]]
65
65
  name = "cexpr"
@@ -78,9 +78,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
78
78
 
79
79
  [[package]]
80
80
  name = "clang-sys"
81
- version = "1.6.1"
81
+ version = "1.8.1"
82
82
  source = "registry+https://github.com/rust-lang/crates.io-index"
83
- checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f"
83
+ checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
84
84
  dependencies = [
85
85
  "glob",
86
86
  "libc",
@@ -89,9 +89,9 @@ dependencies = [
89
89
 
90
90
  [[package]]
91
91
  name = "console"
92
- version = "0.15.5"
92
+ version = "0.15.8"
93
93
  source = "registry+https://github.com/rust-lang/crates.io-index"
94
- checksum = "c3d79fbe8970a77e3e34151cc13d3b3e248aa0faaecb9f6091fa07ebefe5ad60"
94
+ checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb"
95
95
  dependencies = [
96
96
  "encode_unicode",
97
97
  "lazy_static",
@@ -102,42 +102,34 @@ dependencies = [
102
102
 
103
103
  [[package]]
104
104
  name = "crossbeam-deque"
105
- version = "0.8.3"
105
+ version = "0.8.5"
106
106
  source = "registry+https://github.com/rust-lang/crates.io-index"
107
- checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef"
107
+ checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
108
108
  dependencies = [
109
- "cfg-if",
110
109
  "crossbeam-epoch",
111
110
  "crossbeam-utils",
112
111
  ]
113
112
 
114
113
  [[package]]
115
114
  name = "crossbeam-epoch"
116
- version = "0.9.14"
115
+ version = "0.9.18"
117
116
  source = "registry+https://github.com/rust-lang/crates.io-index"
118
- checksum = "46bd5f3f85273295a9d14aedfb86f6aadbff6d8f5295c4a9edb08e819dcf5695"
117
+ checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
119
118
  dependencies = [
120
- "autocfg",
121
- "cfg-if",
122
119
  "crossbeam-utils",
123
- "memoffset",
124
- "scopeguard",
125
120
  ]
126
121
 
127
122
  [[package]]
128
123
  name = "crossbeam-utils"
129
- version = "0.8.15"
124
+ version = "0.8.20"
130
125
  source = "registry+https://github.com/rust-lang/crates.io-index"
131
- checksum = "3c063cd8cc95f5c377ed0d4b49a4b21f632396ff690e8470c29b3359b346984b"
132
- dependencies = [
133
- "cfg-if",
134
- ]
126
+ checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
135
127
 
136
128
  [[package]]
137
129
  name = "darling"
138
- version = "0.20.8"
130
+ version = "0.20.10"
139
131
  source = "registry+https://github.com/rust-lang/crates.io-index"
140
- checksum = "54e36fcd13ed84ffdfda6f5be89b31287cbb80c439841fe69e04841435464391"
132
+ checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989"
141
133
  dependencies = [
142
134
  "darling_core",
143
135
  "darling_macro",
@@ -145,9 +137,9 @@ dependencies = [
145
137
 
146
138
  [[package]]
147
139
  name = "darling_core"
148
- version = "0.20.8"
140
+ version = "0.20.10"
149
141
  source = "registry+https://github.com/rust-lang/crates.io-index"
150
- checksum = "9c2cf1c23a687a1feeb728783b993c4e1ad83d99f351801977dd809b48d0a70f"
142
+ checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5"
151
143
  dependencies = [
152
144
  "fnv",
153
145
  "ident_case",
@@ -159,9 +151,9 @@ dependencies = [
159
151
 
160
152
  [[package]]
161
153
  name = "darling_macro"
162
- version = "0.20.8"
154
+ version = "0.20.10"
163
155
  source = "registry+https://github.com/rust-lang/crates.io-index"
164
- checksum = "a668eda54683121533a393014d8692171709ff57a7d61f187b6e782719f8933f"
156
+ checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806"
165
157
  dependencies = [
166
158
  "darling_core",
167
159
  "quote",
@@ -201,9 +193,9 @@ dependencies = [
201
193
 
202
194
  [[package]]
203
195
  name = "either"
204
- version = "1.8.1"
196
+ version = "1.13.0"
205
197
  source = "registry+https://github.com/rust-lang/crates.io-index"
206
- checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
198
+ checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
207
199
 
208
200
  [[package]]
209
201
  name = "encode_unicode"
@@ -228,9 +220,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
228
220
 
229
221
  [[package]]
230
222
  name = "getrandom"
231
- version = "0.2.10"
223
+ version = "0.2.15"
232
224
  source = "registry+https://github.com/rust-lang/crates.io-index"
233
- checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427"
225
+ checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
234
226
  dependencies = [
235
227
  "cfg-if",
236
228
  "libc",
@@ -251,9 +243,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
251
243
 
252
244
  [[package]]
253
245
  name = "indicatif"
254
- version = "0.17.7"
246
+ version = "0.17.8"
255
247
  source = "registry+https://github.com/rust-lang/crates.io-index"
256
- checksum = "fb28741c9db9a713d93deb3bb9515c20788cef5815265bee4980e87bde7e0f25"
248
+ checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3"
257
249
  dependencies = [
258
250
  "console",
259
251
  "instant",
@@ -264,9 +256,9 @@ dependencies = [
264
256
 
265
257
  [[package]]
266
258
  name = "instant"
267
- version = "0.1.12"
259
+ version = "0.1.13"
268
260
  source = "registry+https://github.com/rust-lang/crates.io-index"
269
- checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
261
+ checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
270
262
  dependencies = [
271
263
  "cfg-if",
272
264
  ]
@@ -291,15 +283,15 @@ dependencies = [
291
283
 
292
284
  [[package]]
293
285
  name = "itoa"
294
- version = "1.0.6"
286
+ version = "1.0.11"
295
287
  source = "registry+https://github.com/rust-lang/crates.io-index"
296
- checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6"
288
+ checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
297
289
 
298
290
  [[package]]
299
291
  name = "lazy_static"
300
- version = "1.4.0"
292
+ version = "1.5.0"
301
293
  source = "registry+https://github.com/rust-lang/crates.io-index"
302
- checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
294
+ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
303
295
 
304
296
  [[package]]
305
297
  name = "lazycell"
@@ -309,28 +301,25 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
309
301
 
310
302
  [[package]]
311
303
  name = "libc"
312
- version = "0.2.149"
304
+ version = "0.2.155"
313
305
  source = "registry+https://github.com/rust-lang/crates.io-index"
314
- checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b"
306
+ checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
315
307
 
316
308
  [[package]]
317
309
  name = "libloading"
318
- version = "0.7.4"
310
+ version = "0.8.5"
319
311
  source = "registry+https://github.com/rust-lang/crates.io-index"
320
- checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f"
312
+ checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4"
321
313
  dependencies = [
322
314
  "cfg-if",
323
- "winapi",
315
+ "windows-targets",
324
316
  ]
325
317
 
326
318
  [[package]]
327
319
  name = "log"
328
- version = "0.4.17"
320
+ version = "0.4.22"
329
321
  source = "registry+https://github.com/rust-lang/crates.io-index"
330
- checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
331
- dependencies = [
332
- "cfg-if",
333
- ]
322
+ checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
334
323
 
335
324
  [[package]]
336
325
  name = "macro_rules_attribute"
@@ -350,9 +339,9 @@ checksum = "b8dd856d451cc0da70e2ef2ce95a18e39a93b7558bedf10201ad28503f918568"
350
339
 
351
340
  [[package]]
352
341
  name = "magnus"
353
- version = "0.6.4"
342
+ version = "0.7.1"
354
343
  source = "registry+https://github.com/rust-lang/crates.io-index"
355
- checksum = "b1597ef40aa8c36be098249e82c9a20cf7199278ac1c1a1a995eeead6a184479"
344
+ checksum = "3d87ae53030f3a22e83879e666cb94e58a7bdf31706878a0ba48752994146dab"
356
345
  dependencies = [
357
346
  "magnus-macros",
358
347
  "rb-sys",
@@ -373,18 +362,9 @@ dependencies = [
373
362
 
374
363
  [[package]]
375
364
  name = "memchr"
376
- version = "2.6.3"
365
+ version = "2.7.4"
377
366
  source = "registry+https://github.com/rust-lang/crates.io-index"
378
- checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c"
379
-
380
- [[package]]
381
- name = "memoffset"
382
- version = "0.8.0"
383
- source = "registry+https://github.com/rust-lang/crates.io-index"
384
- checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1"
385
- dependencies = [
386
- "autocfg",
387
- ]
367
+ checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
388
368
 
389
369
  [[package]]
390
370
  name = "minimal-lexical"
@@ -394,9 +374,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
394
374
 
395
375
  [[package]]
396
376
  name = "monostate"
397
- version = "0.1.12"
377
+ version = "0.1.13"
398
378
  source = "registry+https://github.com/rust-lang/crates.io-index"
399
- checksum = "a20fffcd8ca4c69d31e036a71abc400147b41f90895df4edcb36497a1f8af8bf"
379
+ checksum = "0d208407d7552cd041d8cdb69a1bc3303e029c598738177a3d87082004dc0e1e"
400
380
  dependencies = [
401
381
  "monostate-impl",
402
382
  "serde",
@@ -404,9 +384,9 @@ dependencies = [
404
384
 
405
385
  [[package]]
406
386
  name = "monostate-impl"
407
- version = "0.1.12"
387
+ version = "0.1.13"
408
388
  source = "registry+https://github.com/rust-lang/crates.io-index"
409
- checksum = "bf307cbbbd777a9c10cec88ddafee572b3484caad5cce0c9236523c3803105a6"
389
+ checksum = "a7ce64b975ed4f123575d11afd9491f2e37bbd5813fbfbc0f09ae1fbddea74e0"
410
390
  dependencies = [
411
391
  "proc-macro2",
412
392
  "quote",
@@ -431,9 +411,9 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
431
411
 
432
412
  [[package]]
433
413
  name = "once_cell"
434
- version = "1.17.1"
414
+ version = "1.19.0"
435
415
  source = "registry+https://github.com/rust-lang/crates.io-index"
436
- checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"
416
+ checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
437
417
 
438
418
  [[package]]
439
419
  name = "onig"
@@ -459,39 +439,36 @@ dependencies = [
459
439
 
460
440
  [[package]]
461
441
  name = "paste"
462
- version = "1.0.14"
442
+ version = "1.0.15"
463
443
  source = "registry+https://github.com/rust-lang/crates.io-index"
464
- checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c"
465
-
466
- [[package]]
467
- name = "peeking_take_while"
468
- version = "0.1.2"
469
- source = "registry+https://github.com/rust-lang/crates.io-index"
470
- checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
444
+ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
471
445
 
472
446
  [[package]]
473
447
  name = "pkg-config"
474
- version = "0.3.26"
448
+ version = "0.3.30"
475
449
  source = "registry+https://github.com/rust-lang/crates.io-index"
476
- checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160"
450
+ checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec"
477
451
 
478
452
  [[package]]
479
453
  name = "portable-atomic"
480
- version = "1.4.3"
454
+ version = "1.7.0"
481
455
  source = "registry+https://github.com/rust-lang/crates.io-index"
482
- checksum = "31114a898e107c51bb1609ffaf55a0e011cf6a4d7f1170d0015a165082c0338b"
456
+ checksum = "da544ee218f0d287a911e9c99a39a8c9bc8fcad3cb8db5959940044ecfc67265"
483
457
 
484
458
  [[package]]
485
459
  name = "ppv-lite86"
486
- version = "0.2.17"
460
+ version = "0.2.20"
487
461
  source = "registry+https://github.com/rust-lang/crates.io-index"
488
- checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
462
+ checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04"
463
+ dependencies = [
464
+ "zerocopy",
465
+ ]
489
466
 
490
467
  [[package]]
491
468
  name = "proc-macro2"
492
- version = "1.0.81"
469
+ version = "1.0.86"
493
470
  source = "registry+https://github.com/rust-lang/crates.io-index"
494
- checksum = "3d1597b0c024618f09a9c3b8655b7e430397a36d23fdafec26d6965e9eec3eba"
471
+ checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
495
472
  dependencies = [
496
473
  "unicode-ident",
497
474
  ]
@@ -568,18 +545,18 @@ dependencies = [
568
545
 
569
546
  [[package]]
570
547
  name = "rb-sys"
571
- version = "0.9.97"
548
+ version = "0.9.100"
572
549
  source = "registry+https://github.com/rust-lang/crates.io-index"
573
- checksum = "47d30bcad206b51f2f66121190ca678dce1fdf3a2eae0ac5d838d1818b19bdf5"
550
+ checksum = "87f2ba20be84b32fad6b0ce397764bcdd0f2dca4431cf7035f6a6721e5747565"
574
551
  dependencies = [
575
552
  "rb-sys-build",
576
553
  ]
577
554
 
578
555
  [[package]]
579
556
  name = "rb-sys-build"
580
- version = "0.9.97"
557
+ version = "0.9.100"
581
558
  source = "registry+https://github.com/rust-lang/crates.io-index"
582
- checksum = "3cbd92f281615f3c2dcb9dcb0f0576624752afbf9a7f99173b37c4b55b62dd8a"
559
+ checksum = "7ecae2bdcb118ee721d9a3929f89e8578237fade298dfcf8c928609aa88abc48"
583
560
  dependencies = [
584
561
  "bindgen",
585
562
  "lazy_static",
@@ -598,9 +575,9 @@ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
598
575
 
599
576
  [[package]]
600
577
  name = "regex"
601
- version = "1.10.4"
578
+ version = "1.10.6"
602
579
  source = "registry+https://github.com/rust-lang/crates.io-index"
603
- checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c"
580
+ checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619"
604
581
  dependencies = [
605
582
  "aho-corasick",
606
583
  "memchr",
@@ -610,9 +587,9 @@ dependencies = [
610
587
 
611
588
  [[package]]
612
589
  name = "regex-automata"
613
- version = "0.4.6"
590
+ version = "0.4.7"
614
591
  source = "registry+https://github.com/rust-lang/crates.io-index"
615
- checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
592
+ checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
616
593
  dependencies = [
617
594
  "aho-corasick",
618
595
  "memchr",
@@ -621,9 +598,9 @@ dependencies = [
621
598
 
622
599
  [[package]]
623
600
  name = "regex-syntax"
624
- version = "0.8.2"
601
+ version = "0.8.4"
625
602
  source = "registry+https://github.com/rust-lang/crates.io-index"
626
- checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
603
+ checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
627
604
 
628
605
  [[package]]
629
606
  name = "rustc-hash"
@@ -633,15 +610,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
633
610
 
634
611
  [[package]]
635
612
  name = "ryu"
636
- version = "1.0.13"
613
+ version = "1.0.18"
637
614
  source = "registry+https://github.com/rust-lang/crates.io-index"
638
- checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041"
639
-
640
- [[package]]
641
- name = "scopeguard"
642
- version = "1.1.0"
643
- source = "registry+https://github.com/rust-lang/crates.io-index"
644
- checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
615
+ checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
645
616
 
646
617
  [[package]]
647
618
  name = "seq-macro"
@@ -651,18 +622,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
651
622
 
652
623
  [[package]]
653
624
  name = "serde"
654
- version = "1.0.188"
625
+ version = "1.0.205"
655
626
  source = "registry+https://github.com/rust-lang/crates.io-index"
656
- checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e"
627
+ checksum = "e33aedb1a7135da52b7c21791455563facbbcc43d0f0f66165b42c21b3dfb150"
657
628
  dependencies = [
658
629
  "serde_derive",
659
630
  ]
660
631
 
661
632
  [[package]]
662
633
  name = "serde_derive"
663
- version = "1.0.188"
634
+ version = "1.0.205"
664
635
  source = "registry+https://github.com/rust-lang/crates.io-index"
665
- checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2"
636
+ checksum = "692d6f5ac90220161d6774db30c662202721e64aed9058d2c394f451261420c1"
666
637
  dependencies = [
667
638
  "proc-macro2",
668
639
  "quote",
@@ -671,11 +642,12 @@ dependencies = [
671
642
 
672
643
  [[package]]
673
644
  name = "serde_json"
674
- version = "1.0.95"
645
+ version = "1.0.122"
675
646
  source = "registry+https://github.com/rust-lang/crates.io-index"
676
- checksum = "d721eca97ac802aa7777b701877c8004d950fc142651367300d21c1cc0194744"
647
+ checksum = "784b6203951c57ff748476b126ccb5e8e2959a5c19e5c617ab1956be3dbc68da"
677
648
  dependencies = [
678
649
  "itoa",
650
+ "memchr",
679
651
  "ryu",
680
652
  "serde",
681
653
  ]
@@ -688,15 +660,15 @@ checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde"
688
660
 
689
661
  [[package]]
690
662
  name = "shlex"
691
- version = "1.1.0"
663
+ version = "1.3.0"
692
664
  source = "registry+https://github.com/rust-lang/crates.io-index"
693
- checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"
665
+ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
694
666
 
695
667
  [[package]]
696
668
  name = "smallvec"
697
- version = "1.10.0"
669
+ version = "1.13.2"
698
670
  source = "registry+https://github.com/rust-lang/crates.io-index"
699
- checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
671
+ checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
700
672
 
701
673
  [[package]]
702
674
  name = "spm_precompiled"
@@ -712,15 +684,15 @@ dependencies = [
712
684
 
713
685
  [[package]]
714
686
  name = "strsim"
715
- version = "0.10.0"
687
+ version = "0.11.1"
716
688
  source = "registry+https://github.com/rust-lang/crates.io-index"
717
- checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
689
+ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
718
690
 
719
691
  [[package]]
720
692
  name = "syn"
721
- version = "2.0.59"
693
+ version = "2.0.72"
722
694
  source = "registry+https://github.com/rust-lang/crates.io-index"
723
- checksum = "4a6531ffc7b071655e4ce2e04bd464c4830bb585a61cabb96cf808f05172615a"
695
+ checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af"
724
696
  dependencies = [
725
697
  "proc-macro2",
726
698
  "quote",
@@ -729,18 +701,18 @@ dependencies = [
729
701
 
730
702
  [[package]]
731
703
  name = "thiserror"
732
- version = "1.0.49"
704
+ version = "1.0.63"
733
705
  source = "registry+https://github.com/rust-lang/crates.io-index"
734
- checksum = "1177e8c6d7ede7afde3585fd2513e611227efd6481bd78d2e82ba1ce16557ed4"
706
+ checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724"
735
707
  dependencies = [
736
708
  "thiserror-impl",
737
709
  ]
738
710
 
739
711
  [[package]]
740
712
  name = "thiserror-impl"
741
- version = "1.0.49"
713
+ version = "1.0.63"
742
714
  source = "registry+https://github.com/rust-lang/crates.io-index"
743
- checksum = "10712f02019e9288794769fba95cd6847df9874d49d871d062172f9dd41bc4cc"
715
+ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
744
716
  dependencies = [
745
717
  "proc-macro2",
746
718
  "quote",
@@ -749,19 +721,19 @@ dependencies = [
749
721
 
750
722
  [[package]]
751
723
  name = "tokenizers"
752
- version = "0.5.0"
724
+ version = "0.5.1"
753
725
  dependencies = [
754
726
  "magnus",
755
727
  "onig",
756
728
  "serde",
757
- "tokenizers 0.19.1",
729
+ "tokenizers 0.20.0",
758
730
  ]
759
731
 
760
732
  [[package]]
761
733
  name = "tokenizers"
762
- version = "0.19.1"
734
+ version = "0.20.0"
763
735
  source = "registry+https://github.com/rust-lang/crates.io-index"
764
- checksum = "e500fad1dd3af3d626327e6a3fe5050e664a6eaa4708b8ca92f1794aaf73e6fd"
736
+ checksum = "c8a24d7f7d6be5b9d1377418b893ab1808af0074f5d1bb2c64784452ddd2aa70"
765
737
  dependencies = [
766
738
  "aho-corasick",
767
739
  "derive_builder",
@@ -791,9 +763,9 @@ dependencies = [
791
763
 
792
764
  [[package]]
793
765
  name = "unicode-ident"
794
- version = "1.0.8"
766
+ version = "1.0.12"
795
767
  source = "registry+https://github.com/rust-lang/crates.io-index"
796
- checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4"
768
+ checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
797
769
 
798
770
  [[package]]
799
771
  name = "unicode-normalization-alignments"
@@ -812,9 +784,9 @@ checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
812
784
 
813
785
  [[package]]
814
786
  name = "unicode-width"
815
- version = "0.1.10"
787
+ version = "0.1.13"
816
788
  source = "registry+https://github.com/rust-lang/crates.io-index"
817
- checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
789
+ checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d"
818
790
 
819
791
  [[package]]
820
792
  name = "unicode_categories"
@@ -829,36 +801,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
829
801
  checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
830
802
 
831
803
  [[package]]
832
- name = "winapi"
833
- version = "0.3.9"
804
+ name = "windows-sys"
805
+ version = "0.52.0"
834
806
  source = "registry+https://github.com/rust-lang/crates.io-index"
835
- checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
807
+ checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
836
808
  dependencies = [
837
- "winapi-i686-pc-windows-gnu",
838
- "winapi-x86_64-pc-windows-gnu",
809
+ "windows-targets",
839
810
  ]
840
811
 
841
812
  [[package]]
842
- name = "winapi-i686-pc-windows-gnu"
843
- version = "0.4.0"
813
+ name = "windows-targets"
814
+ version = "0.52.6"
844
815
  source = "registry+https://github.com/rust-lang/crates.io-index"
845
- checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
846
-
847
- [[package]]
848
- name = "winapi-x86_64-pc-windows-gnu"
849
- version = "0.4.0"
850
- source = "registry+https://github.com/rust-lang/crates.io-index"
851
- checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
852
-
853
- [[package]]
854
- name = "windows-sys"
855
- version = "0.42.0"
856
- source = "registry+https://github.com/rust-lang/crates.io-index"
857
- checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7"
816
+ checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
858
817
  dependencies = [
859
818
  "windows_aarch64_gnullvm",
860
819
  "windows_aarch64_msvc",
861
820
  "windows_i686_gnu",
821
+ "windows_i686_gnullvm",
862
822
  "windows_i686_msvc",
863
823
  "windows_x86_64_gnu",
864
824
  "windows_x86_64_gnullvm",
@@ -867,42 +827,69 @@ dependencies = [
867
827
 
868
828
  [[package]]
869
829
  name = "windows_aarch64_gnullvm"
870
- version = "0.42.2"
830
+ version = "0.52.6"
871
831
  source = "registry+https://github.com/rust-lang/crates.io-index"
872
- checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8"
832
+ checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
873
833
 
874
834
  [[package]]
875
835
  name = "windows_aarch64_msvc"
876
- version = "0.42.2"
836
+ version = "0.52.6"
877
837
  source = "registry+https://github.com/rust-lang/crates.io-index"
878
- checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43"
838
+ checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
879
839
 
880
840
  [[package]]
881
841
  name = "windows_i686_gnu"
882
- version = "0.42.2"
842
+ version = "0.52.6"
883
843
  source = "registry+https://github.com/rust-lang/crates.io-index"
884
- checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f"
844
+ checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
845
+
846
+ [[package]]
847
+ name = "windows_i686_gnullvm"
848
+ version = "0.52.6"
849
+ source = "registry+https://github.com/rust-lang/crates.io-index"
850
+ checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
885
851
 
886
852
  [[package]]
887
853
  name = "windows_i686_msvc"
888
- version = "0.42.2"
854
+ version = "0.52.6"
889
855
  source = "registry+https://github.com/rust-lang/crates.io-index"
890
- checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060"
856
+ checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
891
857
 
892
858
  [[package]]
893
859
  name = "windows_x86_64_gnu"
894
- version = "0.42.2"
860
+ version = "0.52.6"
895
861
  source = "registry+https://github.com/rust-lang/crates.io-index"
896
- checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36"
862
+ checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
897
863
 
898
864
  [[package]]
899
865
  name = "windows_x86_64_gnullvm"
900
- version = "0.42.2"
866
+ version = "0.52.6"
901
867
  source = "registry+https://github.com/rust-lang/crates.io-index"
902
- checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3"
868
+ checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
903
869
 
904
870
  [[package]]
905
871
  name = "windows_x86_64_msvc"
906
- version = "0.42.2"
872
+ version = "0.52.6"
907
873
  source = "registry+https://github.com/rust-lang/crates.io-index"
908
- checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"
874
+ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
875
+
876
+ [[package]]
877
+ name = "zerocopy"
878
+ version = "0.7.35"
879
+ source = "registry+https://github.com/rust-lang/crates.io-index"
880
+ checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
881
+ dependencies = [
882
+ "byteorder",
883
+ "zerocopy-derive",
884
+ ]
885
+
886
+ [[package]]
887
+ name = "zerocopy-derive"
888
+ version = "0.7.35"
889
+ source = "registry+https://github.com/rust-lang/crates.io-index"
890
+ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
891
+ dependencies = [
892
+ "proc-macro2",
893
+ "quote",
894
+ "syn",
895
+ ]
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "tokenizers"
3
- version = "0.5.0"
3
+ version = "0.5.1"
4
4
  license = "Apache-2.0"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -11,11 +11,11 @@ publish = false
11
11
  crate-type = ["cdylib"]
12
12
 
13
13
  [dependencies]
14
- magnus = "0.6"
14
+ magnus = "0.7"
15
15
  onig = { version = "6", default-features = false }
16
16
  serde = { version = "1", features = ["rc", "derive"] }
17
17
 
18
18
  [dependencies.tokenizers]
19
- version = "=0.19.1" # also update in from_pretrained.rb
19
+ version = "=0.20.0" # also update in from_pretrained.rb
20
20
  default-features = false
21
21
  features = ["progressbar", "onig", "esaxx_fast"]
@@ -222,8 +222,8 @@ pub struct RbSequence {}
222
222
  impl RbSequence {
223
223
  fn new(normalizers: RArray) -> RbResult<RbNormalizer> {
224
224
  let mut sequence = Vec::with_capacity(normalizers.len());
225
- for n in normalizers.each() {
226
- let normalizer: &RbNormalizer = TryConvert::try_convert(n?)?;
225
+ for n in normalizers.into_iter() {
226
+ let normalizer: &RbNormalizer = TryConvert::try_convert(n)?;
227
227
  match &normalizer.normalizer {
228
228
  RbNormalizerTypeWrapper::Sequence(inner) => sequence.extend(inner.iter().cloned()),
229
229
  RbNormalizerTypeWrapper::Single(inner) => sequence.push(inner.clone()),
@@ -258,8 +258,8 @@ pub struct RbSequence {}
258
258
  impl RbSequence {
259
259
  fn new(pre_tokenizers: RArray) -> RbResult<RbPreTokenizer> {
260
260
  let mut sequence = Vec::with_capacity(pre_tokenizers.len());
261
- for n in pre_tokenizers.each() {
262
- let pretokenizer: &RbPreTokenizer = TryConvert::try_convert(n?)?;
261
+ for n in pre_tokenizers.into_iter() {
262
+ let pretokenizer: &RbPreTokenizer = TryConvert::try_convert(n)?;
263
263
  match &pretokenizer.pretok {
264
264
  RbPreTokenizerTypeWrapper::Sequence(inner) => {
265
265
  sequence.extend(inner.iter().cloned())
@@ -282,12 +282,12 @@ impl RbTokenizer {
282
282
  add_special_tokens: bool,
283
283
  ) -> RbResult<RArray> {
284
284
  let input: Vec<tk::EncodeInput> = input
285
- .each()
285
+ .into_iter()
286
286
  .map(|o| {
287
287
  let input: tk::EncodeInput = if is_pretokenized {
288
- PreTokenizedEncodeInput::try_convert(o?)?.into()
288
+ PreTokenizedEncodeInput::try_convert(o)?.into()
289
289
  } else {
290
- TextEncodeInput::try_convert(o?)?.into()
290
+ TextEncodeInput::try_convert(o)?.into()
291
291
  };
292
292
  Ok(input)
293
293
  })
@@ -319,26 +319,26 @@ impl RbTokenizer {
319
319
  .map_err(RbError::from)
320
320
  }
321
321
 
322
- pub fn set_decoder(&self, decoder: &RbDecoder) {
323
- self.tokenizer.borrow_mut().with_decoder(decoder.clone());
322
+ pub fn set_decoder(&self, decoder: Option<&RbDecoder>) {
323
+ self.tokenizer.borrow_mut().with_decoder(decoder.cloned());
324
324
  }
325
325
 
326
- pub fn set_pre_tokenizer(&self, pretok: &RbPreTokenizer) {
326
+ pub fn set_pre_tokenizer(&self, pretok: Option<&RbPreTokenizer>) {
327
327
  self.tokenizer
328
328
  .borrow_mut()
329
- .with_pre_tokenizer(pretok.clone());
329
+ .with_pre_tokenizer(pretok.cloned());
330
330
  }
331
331
 
332
- pub fn set_post_processor(&self, processor: &RbPostProcessor) {
332
+ pub fn set_post_processor(&self, processor: Option<&RbPostProcessor>) {
333
333
  self.tokenizer
334
334
  .borrow_mut()
335
- .with_post_processor(processor.clone());
335
+ .with_post_processor(processor.cloned());
336
336
  }
337
337
 
338
- pub fn set_normalizer(&self, normalizer: &RbNormalizer) {
338
+ pub fn set_normalizer(&self, normalizer: Option<&RbNormalizer>) {
339
339
  self.tokenizer
340
340
  .borrow_mut()
341
- .with_normalizer(normalizer.clone());
341
+ .with_normalizer(normalizer.cloned());
342
342
  }
343
343
 
344
344
  pub fn token_to_id(&self, token: String) -> Option<u32> {
@@ -110,9 +110,9 @@ impl RbTrainer {
110
110
  BpeTrainer,
111
111
  special_tokens,
112
112
  special_tokens
113
- .each()
113
+ .into_iter()
114
114
  .map(|token| {
115
- if let Ok(content) = String::try_convert(token?) {
115
+ if let Ok(content) = String::try_convert(token) {
116
116
  Ok(RbAddedToken::from(content, Some(true)).get_token())
117
117
  } else {
118
118
  todo!()
@@ -197,9 +197,9 @@ impl RbTrainer {
197
197
  UnigramTrainer,
198
198
  special_tokens,
199
199
  special_tokens
200
- .each()
200
+ .into_iter()
201
201
  .map(|token| {
202
- if let Ok(content) = String::try_convert(token?) {
202
+ if let Ok(content) = String::try_convert(token) {
203
203
  Ok(RbAddedToken::from(content, Some(true)).get_token())
204
204
  } else {
205
205
  todo!()
@@ -268,9 +268,9 @@ impl RbTrainer {
268
268
  WordLevelTrainer,
269
269
  special_tokens,
270
270
  special_tokens
271
- .each()
271
+ .into_iter()
272
272
  .map(|token| {
273
- if let Ok(content) = String::try_convert(token?) {
273
+ if let Ok(content) = String::try_convert(token) {
274
274
  Ok(RbAddedToken::from(content, Some(true)).get_token())
275
275
  } else {
276
276
  todo!()
@@ -322,9 +322,9 @@ impl RbTrainer {
322
322
  WordPieceTrainer,
323
323
  @set_special_tokens,
324
324
  special_tokens
325
- .each()
325
+ .into_iter()
326
326
  .map(|token| {
327
- if let Ok(content) = String::try_convert(token?) {
327
+ if let Ok(content) = String::try_convert(token) {
328
328
  Ok(RbAddedToken::from(content, Some(true)).get_token())
329
329
  } else {
330
330
  todo!()
@@ -398,9 +398,9 @@ impl RbBpeTrainer {
398
398
  if !value.is_nil() {
399
399
  builder = builder.special_tokens(
400
400
  RArray::try_convert(value)?
401
- .each()
401
+ .into_iter()
402
402
  .map(|token| {
403
- if let Ok(content) = String::try_convert(token?) {
403
+ if let Ok(content) = String::try_convert(token) {
404
404
  Ok(RbAddedToken::from(content, Some(true)).get_token())
405
405
  } else {
406
406
  todo!()
@@ -466,9 +466,9 @@ impl RbUnigramTrainer {
466
466
  if !value.is_nil() {
467
467
  builder.special_tokens(
468
468
  RArray::try_convert(value)?
469
- .each()
469
+ .into_iter()
470
470
  .map(|token| {
471
- if let Ok(content) = String::try_convert(token?) {
471
+ if let Ok(content) = String::try_convert(token) {
472
472
  Ok(RbAddedToken::from(content, Some(true)).get_token())
473
473
  } else {
474
474
  todo!()
@@ -540,9 +540,9 @@ impl RbWordLevelTrainer {
540
540
  if !value.is_nil() {
541
541
  builder.special_tokens(
542
542
  RArray::try_convert(value)?
543
- .each()
543
+ .into_iter()
544
544
  .map(|token| {
545
- if let Ok(content) = String::try_convert(token?) {
545
+ if let Ok(content) = String::try_convert(token) {
546
546
  Ok(RbAddedToken::from(content, Some(true)).get_token())
547
547
  } else {
548
548
  todo!()
@@ -581,9 +581,9 @@ impl RbWordPieceTrainer {
581
581
  if !value.is_nil() {
582
582
  builder = builder.special_tokens(
583
583
  RArray::try_convert(value)?
584
- .each()
584
+ .into_iter()
585
585
  .map(|token| {
586
- if let Ok(content) = String::try_convert(token?) {
586
+ if let Ok(content) = String::try_convert(token) {
587
587
  Ok(RbAddedToken::from(content, Some(true)).get_token())
588
588
  } else {
589
589
  todo!()
@@ -1,7 +1,7 @@
1
1
  module Tokenizers
2
2
  module FromPretrained
3
3
  # for user agent
4
- TOKENIZERS_VERSION = "0.19.1"
4
+ TOKENIZERS_VERSION = "0.20.0"
5
5
 
6
6
  # use Ruby for downloads
7
7
  # this avoids the need to vendor OpenSSL on Linux
@@ -67,7 +67,7 @@ module Tokenizers
67
67
  end
68
68
  end
69
69
 
70
- options[:content_length_proc] = -> (_) { puts "Downloading..." }
70
+ options[:content_length_proc] = ->(_) { puts "Downloading..." }
71
71
 
72
72
  # string options are headers
73
73
  tempfile = URI.parse(url).open(headers.merge(options))
@@ -1,3 +1,3 @@
1
1
  module Tokenizers
2
- VERSION = "0.5.0"
2
+ VERSION = "0.5.1"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tokenizers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-05-21 00:00:00.000000000 Z
11
+ date: 2024-08-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -100,7 +100,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
100
100
  - !ruby/object:Gem::Version
101
101
  version: '0'
102
102
  requirements: []
103
- rubygems_version: 3.5.9
103
+ rubygems_version: 3.5.11
104
104
  signing_key:
105
105
  specification_version: 4
106
106
  summary: Fast state-of-the-art tokenizers for Ruby