tokenizers 0.5.0 → 0.5.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 258211e71ca06e96bb4ee01b15e29f6f74d3c70d04af246e95b178e10f093059
4
- data.tar.gz: 6e0b01c577830afdf1c7d677b1377191420d85e0f1f8638893f72cbb7ccef322
3
+ metadata.gz: 4846b5d3dc0fe8f5828ddffe46908b1f3812ebf6a03a939ca0395ad7748533bb
4
+ data.tar.gz: 259795bfa6b13a36f62ab2ffb65e9feabd460e01efe9f59e7d6017c6dcd9b9b0
5
5
  SHA512:
6
- metadata.gz: 4e0ea1f11dbab96b213190397ee8676d6233568f4fe013970a5a2c32105ed20ec06a5c8bc7379065799de315a0fc6d5f47807f9af47bc6f47926e4147c3eabcc
7
- data.tar.gz: ccd00b103577c6cff4dded6a3bc42394eccb3e24b950674a33eedf76df7c08bc89cda8219f076fce4cf20d90580da82c03e001a4e49ceb80e56ae4055b4617cf
6
+ metadata.gz: 90f55feb8ceec81815bb61b7773e6f67924eb6d47869e5da67d579e2ac9df6a48fddee5e97f5a028e3fa1e39941f3bfe6ec6c04cf49fd1b87f18bade54911231
7
+ data.tar.gz: 9c1895b43222494b393f3fbddaa6e78216e025f3d3dddebf0c0311d2d897b282a16b8b0044aacb2466790b0a93c8d01099b25d662750b96d5798d0a4a927267b
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.5.1 (2024-08-13)
2
+
3
+ - Updated Tokenizers to 0.20.0
4
+ - Added precompiled gem for Linux ARM MUSL
5
+
1
6
  ## 0.5.0 (2024-05-21)
2
7
 
3
8
  - Updated Tokenizers to 0.19.1
data/Cargo.lock CHANGED
@@ -4,19 +4,13 @@ version = 3
4
4
 
5
5
  [[package]]
6
6
  name = "aho-corasick"
7
- version = "1.1.1"
7
+ version = "1.1.3"
8
8
  source = "registry+https://github.com/rust-lang/crates.io-index"
9
- checksum = "ea5d730647d4fadd988536d06fecce94b7b4f2a7efdae548f1cf4b63205518ab"
9
+ checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
10
10
  dependencies = [
11
11
  "memchr",
12
12
  ]
13
13
 
14
- [[package]]
15
- name = "autocfg"
16
- version = "1.1.0"
17
- source = "registry+https://github.com/rust-lang/crates.io-index"
18
- checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
19
-
20
14
  [[package]]
21
15
  name = "base64"
22
16
  version = "0.13.1"
@@ -25,16 +19,16 @@ checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
25
19
 
26
20
  [[package]]
27
21
  name = "bindgen"
28
- version = "0.69.1"
22
+ version = "0.69.4"
29
23
  source = "registry+https://github.com/rust-lang/crates.io-index"
30
- checksum = "9ffcebc3849946a7170a05992aac39da343a90676ab392c51a4280981d6379c2"
24
+ checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0"
31
25
  dependencies = [
32
- "bitflags 2.4.1",
26
+ "bitflags 2.6.0",
33
27
  "cexpr",
34
28
  "clang-sys",
29
+ "itertools 0.12.1",
35
30
  "lazy_static",
36
31
  "lazycell",
37
- "peeking_take_while",
38
32
  "proc-macro2",
39
33
  "quote",
40
34
  "regex",
@@ -51,15 +45,21 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
51
45
 
52
46
  [[package]]
53
47
  name = "bitflags"
54
- version = "2.4.1"
48
+ version = "2.6.0"
55
49
  source = "registry+https://github.com/rust-lang/crates.io-index"
56
- checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
50
+ checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
51
+
52
+ [[package]]
53
+ name = "byteorder"
54
+ version = "1.5.0"
55
+ source = "registry+https://github.com/rust-lang/crates.io-index"
56
+ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
57
57
 
58
58
  [[package]]
59
59
  name = "cc"
60
- version = "1.0.79"
60
+ version = "1.1.8"
61
61
  source = "registry+https://github.com/rust-lang/crates.io-index"
62
- checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
62
+ checksum = "504bdec147f2cc13c8b57ed9401fd8a147cc66b67ad5cb241394244f2c947549"
63
63
 
64
64
  [[package]]
65
65
  name = "cexpr"
@@ -78,9 +78,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
78
78
 
79
79
  [[package]]
80
80
  name = "clang-sys"
81
- version = "1.6.1"
81
+ version = "1.8.1"
82
82
  source = "registry+https://github.com/rust-lang/crates.io-index"
83
- checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f"
83
+ checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
84
84
  dependencies = [
85
85
  "glob",
86
86
  "libc",
@@ -89,9 +89,9 @@ dependencies = [
89
89
 
90
90
  [[package]]
91
91
  name = "console"
92
- version = "0.15.5"
92
+ version = "0.15.8"
93
93
  source = "registry+https://github.com/rust-lang/crates.io-index"
94
- checksum = "c3d79fbe8970a77e3e34151cc13d3b3e248aa0faaecb9f6091fa07ebefe5ad60"
94
+ checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb"
95
95
  dependencies = [
96
96
  "encode_unicode",
97
97
  "lazy_static",
@@ -102,42 +102,34 @@ dependencies = [
102
102
 
103
103
  [[package]]
104
104
  name = "crossbeam-deque"
105
- version = "0.8.3"
105
+ version = "0.8.5"
106
106
  source = "registry+https://github.com/rust-lang/crates.io-index"
107
- checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef"
107
+ checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
108
108
  dependencies = [
109
- "cfg-if",
110
109
  "crossbeam-epoch",
111
110
  "crossbeam-utils",
112
111
  ]
113
112
 
114
113
  [[package]]
115
114
  name = "crossbeam-epoch"
116
- version = "0.9.14"
115
+ version = "0.9.18"
117
116
  source = "registry+https://github.com/rust-lang/crates.io-index"
118
- checksum = "46bd5f3f85273295a9d14aedfb86f6aadbff6d8f5295c4a9edb08e819dcf5695"
117
+ checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
119
118
  dependencies = [
120
- "autocfg",
121
- "cfg-if",
122
119
  "crossbeam-utils",
123
- "memoffset",
124
- "scopeguard",
125
120
  ]
126
121
 
127
122
  [[package]]
128
123
  name = "crossbeam-utils"
129
- version = "0.8.15"
124
+ version = "0.8.20"
130
125
  source = "registry+https://github.com/rust-lang/crates.io-index"
131
- checksum = "3c063cd8cc95f5c377ed0d4b49a4b21f632396ff690e8470c29b3359b346984b"
132
- dependencies = [
133
- "cfg-if",
134
- ]
126
+ checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
135
127
 
136
128
  [[package]]
137
129
  name = "darling"
138
- version = "0.20.8"
130
+ version = "0.20.10"
139
131
  source = "registry+https://github.com/rust-lang/crates.io-index"
140
- checksum = "54e36fcd13ed84ffdfda6f5be89b31287cbb80c439841fe69e04841435464391"
132
+ checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989"
141
133
  dependencies = [
142
134
  "darling_core",
143
135
  "darling_macro",
@@ -145,9 +137,9 @@ dependencies = [
145
137
 
146
138
  [[package]]
147
139
  name = "darling_core"
148
- version = "0.20.8"
140
+ version = "0.20.10"
149
141
  source = "registry+https://github.com/rust-lang/crates.io-index"
150
- checksum = "9c2cf1c23a687a1feeb728783b993c4e1ad83d99f351801977dd809b48d0a70f"
142
+ checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5"
151
143
  dependencies = [
152
144
  "fnv",
153
145
  "ident_case",
@@ -159,9 +151,9 @@ dependencies = [
159
151
 
160
152
  [[package]]
161
153
  name = "darling_macro"
162
- version = "0.20.8"
154
+ version = "0.20.10"
163
155
  source = "registry+https://github.com/rust-lang/crates.io-index"
164
- checksum = "a668eda54683121533a393014d8692171709ff57a7d61f187b6e782719f8933f"
156
+ checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806"
165
157
  dependencies = [
166
158
  "darling_core",
167
159
  "quote",
@@ -201,9 +193,9 @@ dependencies = [
201
193
 
202
194
  [[package]]
203
195
  name = "either"
204
- version = "1.8.1"
196
+ version = "1.13.0"
205
197
  source = "registry+https://github.com/rust-lang/crates.io-index"
206
- checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
198
+ checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
207
199
 
208
200
  [[package]]
209
201
  name = "encode_unicode"
@@ -228,9 +220,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
228
220
 
229
221
  [[package]]
230
222
  name = "getrandom"
231
- version = "0.2.10"
223
+ version = "0.2.15"
232
224
  source = "registry+https://github.com/rust-lang/crates.io-index"
233
- checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427"
225
+ checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
234
226
  dependencies = [
235
227
  "cfg-if",
236
228
  "libc",
@@ -251,9 +243,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
251
243
 
252
244
  [[package]]
253
245
  name = "indicatif"
254
- version = "0.17.7"
246
+ version = "0.17.8"
255
247
  source = "registry+https://github.com/rust-lang/crates.io-index"
256
- checksum = "fb28741c9db9a713d93deb3bb9515c20788cef5815265bee4980e87bde7e0f25"
248
+ checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3"
257
249
  dependencies = [
258
250
  "console",
259
251
  "instant",
@@ -264,9 +256,9 @@ dependencies = [
264
256
 
265
257
  [[package]]
266
258
  name = "instant"
267
- version = "0.1.12"
259
+ version = "0.1.13"
268
260
  source = "registry+https://github.com/rust-lang/crates.io-index"
269
- checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
261
+ checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
270
262
  dependencies = [
271
263
  "cfg-if",
272
264
  ]
@@ -291,15 +283,15 @@ dependencies = [
291
283
 
292
284
  [[package]]
293
285
  name = "itoa"
294
- version = "1.0.6"
286
+ version = "1.0.11"
295
287
  source = "registry+https://github.com/rust-lang/crates.io-index"
296
- checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6"
288
+ checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
297
289
 
298
290
  [[package]]
299
291
  name = "lazy_static"
300
- version = "1.4.0"
292
+ version = "1.5.0"
301
293
  source = "registry+https://github.com/rust-lang/crates.io-index"
302
- checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
294
+ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
303
295
 
304
296
  [[package]]
305
297
  name = "lazycell"
@@ -309,28 +301,25 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
309
301
 
310
302
  [[package]]
311
303
  name = "libc"
312
- version = "0.2.149"
304
+ version = "0.2.155"
313
305
  source = "registry+https://github.com/rust-lang/crates.io-index"
314
- checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b"
306
+ checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
315
307
 
316
308
  [[package]]
317
309
  name = "libloading"
318
- version = "0.7.4"
310
+ version = "0.8.5"
319
311
  source = "registry+https://github.com/rust-lang/crates.io-index"
320
- checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f"
312
+ checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4"
321
313
  dependencies = [
322
314
  "cfg-if",
323
- "winapi",
315
+ "windows-targets",
324
316
  ]
325
317
 
326
318
  [[package]]
327
319
  name = "log"
328
- version = "0.4.17"
320
+ version = "0.4.22"
329
321
  source = "registry+https://github.com/rust-lang/crates.io-index"
330
- checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
331
- dependencies = [
332
- "cfg-if",
333
- ]
322
+ checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
334
323
 
335
324
  [[package]]
336
325
  name = "macro_rules_attribute"
@@ -350,9 +339,9 @@ checksum = "b8dd856d451cc0da70e2ef2ce95a18e39a93b7558bedf10201ad28503f918568"
350
339
 
351
340
  [[package]]
352
341
  name = "magnus"
353
- version = "0.6.4"
342
+ version = "0.7.1"
354
343
  source = "registry+https://github.com/rust-lang/crates.io-index"
355
- checksum = "b1597ef40aa8c36be098249e82c9a20cf7199278ac1c1a1a995eeead6a184479"
344
+ checksum = "3d87ae53030f3a22e83879e666cb94e58a7bdf31706878a0ba48752994146dab"
356
345
  dependencies = [
357
346
  "magnus-macros",
358
347
  "rb-sys",
@@ -373,18 +362,9 @@ dependencies = [
373
362
 
374
363
  [[package]]
375
364
  name = "memchr"
376
- version = "2.6.3"
365
+ version = "2.7.4"
377
366
  source = "registry+https://github.com/rust-lang/crates.io-index"
378
- checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c"
379
-
380
- [[package]]
381
- name = "memoffset"
382
- version = "0.8.0"
383
- source = "registry+https://github.com/rust-lang/crates.io-index"
384
- checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1"
385
- dependencies = [
386
- "autocfg",
387
- ]
367
+ checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
388
368
 
389
369
  [[package]]
390
370
  name = "minimal-lexical"
@@ -394,9 +374,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
394
374
 
395
375
  [[package]]
396
376
  name = "monostate"
397
- version = "0.1.12"
377
+ version = "0.1.13"
398
378
  source = "registry+https://github.com/rust-lang/crates.io-index"
399
- checksum = "a20fffcd8ca4c69d31e036a71abc400147b41f90895df4edcb36497a1f8af8bf"
379
+ checksum = "0d208407d7552cd041d8cdb69a1bc3303e029c598738177a3d87082004dc0e1e"
400
380
  dependencies = [
401
381
  "monostate-impl",
402
382
  "serde",
@@ -404,9 +384,9 @@ dependencies = [
404
384
 
405
385
  [[package]]
406
386
  name = "monostate-impl"
407
- version = "0.1.12"
387
+ version = "0.1.13"
408
388
  source = "registry+https://github.com/rust-lang/crates.io-index"
409
- checksum = "bf307cbbbd777a9c10cec88ddafee572b3484caad5cce0c9236523c3803105a6"
389
+ checksum = "a7ce64b975ed4f123575d11afd9491f2e37bbd5813fbfbc0f09ae1fbddea74e0"
410
390
  dependencies = [
411
391
  "proc-macro2",
412
392
  "quote",
@@ -431,9 +411,9 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
431
411
 
432
412
  [[package]]
433
413
  name = "once_cell"
434
- version = "1.17.1"
414
+ version = "1.19.0"
435
415
  source = "registry+https://github.com/rust-lang/crates.io-index"
436
- checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"
416
+ checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
437
417
 
438
418
  [[package]]
439
419
  name = "onig"
@@ -459,39 +439,36 @@ dependencies = [
459
439
 
460
440
  [[package]]
461
441
  name = "paste"
462
- version = "1.0.14"
442
+ version = "1.0.15"
463
443
  source = "registry+https://github.com/rust-lang/crates.io-index"
464
- checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c"
465
-
466
- [[package]]
467
- name = "peeking_take_while"
468
- version = "0.1.2"
469
- source = "registry+https://github.com/rust-lang/crates.io-index"
470
- checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
444
+ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
471
445
 
472
446
  [[package]]
473
447
  name = "pkg-config"
474
- version = "0.3.26"
448
+ version = "0.3.30"
475
449
  source = "registry+https://github.com/rust-lang/crates.io-index"
476
- checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160"
450
+ checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec"
477
451
 
478
452
  [[package]]
479
453
  name = "portable-atomic"
480
- version = "1.4.3"
454
+ version = "1.7.0"
481
455
  source = "registry+https://github.com/rust-lang/crates.io-index"
482
- checksum = "31114a898e107c51bb1609ffaf55a0e011cf6a4d7f1170d0015a165082c0338b"
456
+ checksum = "da544ee218f0d287a911e9c99a39a8c9bc8fcad3cb8db5959940044ecfc67265"
483
457
 
484
458
  [[package]]
485
459
  name = "ppv-lite86"
486
- version = "0.2.17"
460
+ version = "0.2.20"
487
461
  source = "registry+https://github.com/rust-lang/crates.io-index"
488
- checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
462
+ checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04"
463
+ dependencies = [
464
+ "zerocopy",
465
+ ]
489
466
 
490
467
  [[package]]
491
468
  name = "proc-macro2"
492
- version = "1.0.81"
469
+ version = "1.0.86"
493
470
  source = "registry+https://github.com/rust-lang/crates.io-index"
494
- checksum = "3d1597b0c024618f09a9c3b8655b7e430397a36d23fdafec26d6965e9eec3eba"
471
+ checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
495
472
  dependencies = [
496
473
  "unicode-ident",
497
474
  ]
@@ -568,18 +545,18 @@ dependencies = [
568
545
 
569
546
  [[package]]
570
547
  name = "rb-sys"
571
- version = "0.9.97"
548
+ version = "0.9.100"
572
549
  source = "registry+https://github.com/rust-lang/crates.io-index"
573
- checksum = "47d30bcad206b51f2f66121190ca678dce1fdf3a2eae0ac5d838d1818b19bdf5"
550
+ checksum = "87f2ba20be84b32fad6b0ce397764bcdd0f2dca4431cf7035f6a6721e5747565"
574
551
  dependencies = [
575
552
  "rb-sys-build",
576
553
  ]
577
554
 
578
555
  [[package]]
579
556
  name = "rb-sys-build"
580
- version = "0.9.97"
557
+ version = "0.9.100"
581
558
  source = "registry+https://github.com/rust-lang/crates.io-index"
582
- checksum = "3cbd92f281615f3c2dcb9dcb0f0576624752afbf9a7f99173b37c4b55b62dd8a"
559
+ checksum = "7ecae2bdcb118ee721d9a3929f89e8578237fade298dfcf8c928609aa88abc48"
583
560
  dependencies = [
584
561
  "bindgen",
585
562
  "lazy_static",
@@ -598,9 +575,9 @@ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
598
575
 
599
576
  [[package]]
600
577
  name = "regex"
601
- version = "1.10.4"
578
+ version = "1.10.6"
602
579
  source = "registry+https://github.com/rust-lang/crates.io-index"
603
- checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c"
580
+ checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619"
604
581
  dependencies = [
605
582
  "aho-corasick",
606
583
  "memchr",
@@ -610,9 +587,9 @@ dependencies = [
610
587
 
611
588
  [[package]]
612
589
  name = "regex-automata"
613
- version = "0.4.6"
590
+ version = "0.4.7"
614
591
  source = "registry+https://github.com/rust-lang/crates.io-index"
615
- checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
592
+ checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
616
593
  dependencies = [
617
594
  "aho-corasick",
618
595
  "memchr",
@@ -621,9 +598,9 @@ dependencies = [
621
598
 
622
599
  [[package]]
623
600
  name = "regex-syntax"
624
- version = "0.8.2"
601
+ version = "0.8.4"
625
602
  source = "registry+https://github.com/rust-lang/crates.io-index"
626
- checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
603
+ checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
627
604
 
628
605
  [[package]]
629
606
  name = "rustc-hash"
@@ -633,15 +610,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
633
610
 
634
611
  [[package]]
635
612
  name = "ryu"
636
- version = "1.0.13"
613
+ version = "1.0.18"
637
614
  source = "registry+https://github.com/rust-lang/crates.io-index"
638
- checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041"
639
-
640
- [[package]]
641
- name = "scopeguard"
642
- version = "1.1.0"
643
- source = "registry+https://github.com/rust-lang/crates.io-index"
644
- checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
615
+ checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
645
616
 
646
617
  [[package]]
647
618
  name = "seq-macro"
@@ -651,18 +622,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
651
622
 
652
623
  [[package]]
653
624
  name = "serde"
654
- version = "1.0.188"
625
+ version = "1.0.205"
655
626
  source = "registry+https://github.com/rust-lang/crates.io-index"
656
- checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e"
627
+ checksum = "e33aedb1a7135da52b7c21791455563facbbcc43d0f0f66165b42c21b3dfb150"
657
628
  dependencies = [
658
629
  "serde_derive",
659
630
  ]
660
631
 
661
632
  [[package]]
662
633
  name = "serde_derive"
663
- version = "1.0.188"
634
+ version = "1.0.205"
664
635
  source = "registry+https://github.com/rust-lang/crates.io-index"
665
- checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2"
636
+ checksum = "692d6f5ac90220161d6774db30c662202721e64aed9058d2c394f451261420c1"
666
637
  dependencies = [
667
638
  "proc-macro2",
668
639
  "quote",
@@ -671,11 +642,12 @@ dependencies = [
671
642
 
672
643
  [[package]]
673
644
  name = "serde_json"
674
- version = "1.0.95"
645
+ version = "1.0.122"
675
646
  source = "registry+https://github.com/rust-lang/crates.io-index"
676
- checksum = "d721eca97ac802aa7777b701877c8004d950fc142651367300d21c1cc0194744"
647
+ checksum = "784b6203951c57ff748476b126ccb5e8e2959a5c19e5c617ab1956be3dbc68da"
677
648
  dependencies = [
678
649
  "itoa",
650
+ "memchr",
679
651
  "ryu",
680
652
  "serde",
681
653
  ]
@@ -688,15 +660,15 @@ checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde"
688
660
 
689
661
  [[package]]
690
662
  name = "shlex"
691
- version = "1.1.0"
663
+ version = "1.3.0"
692
664
  source = "registry+https://github.com/rust-lang/crates.io-index"
693
- checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"
665
+ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
694
666
 
695
667
  [[package]]
696
668
  name = "smallvec"
697
- version = "1.10.0"
669
+ version = "1.13.2"
698
670
  source = "registry+https://github.com/rust-lang/crates.io-index"
699
- checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
671
+ checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
700
672
 
701
673
  [[package]]
702
674
  name = "spm_precompiled"
@@ -712,15 +684,15 @@ dependencies = [
712
684
 
713
685
  [[package]]
714
686
  name = "strsim"
715
- version = "0.10.0"
687
+ version = "0.11.1"
716
688
  source = "registry+https://github.com/rust-lang/crates.io-index"
717
- checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
689
+ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
718
690
 
719
691
  [[package]]
720
692
  name = "syn"
721
- version = "2.0.59"
693
+ version = "2.0.72"
722
694
  source = "registry+https://github.com/rust-lang/crates.io-index"
723
- checksum = "4a6531ffc7b071655e4ce2e04bd464c4830bb585a61cabb96cf808f05172615a"
695
+ checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af"
724
696
  dependencies = [
725
697
  "proc-macro2",
726
698
  "quote",
@@ -729,18 +701,18 @@ dependencies = [
729
701
 
730
702
  [[package]]
731
703
  name = "thiserror"
732
- version = "1.0.49"
704
+ version = "1.0.63"
733
705
  source = "registry+https://github.com/rust-lang/crates.io-index"
734
- checksum = "1177e8c6d7ede7afde3585fd2513e611227efd6481bd78d2e82ba1ce16557ed4"
706
+ checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724"
735
707
  dependencies = [
736
708
  "thiserror-impl",
737
709
  ]
738
710
 
739
711
  [[package]]
740
712
  name = "thiserror-impl"
741
- version = "1.0.49"
713
+ version = "1.0.63"
742
714
  source = "registry+https://github.com/rust-lang/crates.io-index"
743
- checksum = "10712f02019e9288794769fba95cd6847df9874d49d871d062172f9dd41bc4cc"
715
+ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
744
716
  dependencies = [
745
717
  "proc-macro2",
746
718
  "quote",
@@ -749,19 +721,19 @@ dependencies = [
749
721
 
750
722
  [[package]]
751
723
  name = "tokenizers"
752
- version = "0.5.0"
724
+ version = "0.5.1"
753
725
  dependencies = [
754
726
  "magnus",
755
727
  "onig",
756
728
  "serde",
757
- "tokenizers 0.19.1",
729
+ "tokenizers 0.20.0",
758
730
  ]
759
731
 
760
732
  [[package]]
761
733
  name = "tokenizers"
762
- version = "0.19.1"
734
+ version = "0.20.0"
763
735
  source = "registry+https://github.com/rust-lang/crates.io-index"
764
- checksum = "e500fad1dd3af3d626327e6a3fe5050e664a6eaa4708b8ca92f1794aaf73e6fd"
736
+ checksum = "c8a24d7f7d6be5b9d1377418b893ab1808af0074f5d1bb2c64784452ddd2aa70"
765
737
  dependencies = [
766
738
  "aho-corasick",
767
739
  "derive_builder",
@@ -791,9 +763,9 @@ dependencies = [
791
763
 
792
764
  [[package]]
793
765
  name = "unicode-ident"
794
- version = "1.0.8"
766
+ version = "1.0.12"
795
767
  source = "registry+https://github.com/rust-lang/crates.io-index"
796
- checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4"
768
+ checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
797
769
 
798
770
  [[package]]
799
771
  name = "unicode-normalization-alignments"
@@ -812,9 +784,9 @@ checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
812
784
 
813
785
  [[package]]
814
786
  name = "unicode-width"
815
- version = "0.1.10"
787
+ version = "0.1.13"
816
788
  source = "registry+https://github.com/rust-lang/crates.io-index"
817
- checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
789
+ checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d"
818
790
 
819
791
  [[package]]
820
792
  name = "unicode_categories"
@@ -829,36 +801,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
829
801
  checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
830
802
 
831
803
  [[package]]
832
- name = "winapi"
833
- version = "0.3.9"
804
+ name = "windows-sys"
805
+ version = "0.52.0"
834
806
  source = "registry+https://github.com/rust-lang/crates.io-index"
835
- checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
807
+ checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
836
808
  dependencies = [
837
- "winapi-i686-pc-windows-gnu",
838
- "winapi-x86_64-pc-windows-gnu",
809
+ "windows-targets",
839
810
  ]
840
811
 
841
812
  [[package]]
842
- name = "winapi-i686-pc-windows-gnu"
843
- version = "0.4.0"
813
+ name = "windows-targets"
814
+ version = "0.52.6"
844
815
  source = "registry+https://github.com/rust-lang/crates.io-index"
845
- checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
846
-
847
- [[package]]
848
- name = "winapi-x86_64-pc-windows-gnu"
849
- version = "0.4.0"
850
- source = "registry+https://github.com/rust-lang/crates.io-index"
851
- checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
852
-
853
- [[package]]
854
- name = "windows-sys"
855
- version = "0.42.0"
856
- source = "registry+https://github.com/rust-lang/crates.io-index"
857
- checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7"
816
+ checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
858
817
  dependencies = [
859
818
  "windows_aarch64_gnullvm",
860
819
  "windows_aarch64_msvc",
861
820
  "windows_i686_gnu",
821
+ "windows_i686_gnullvm",
862
822
  "windows_i686_msvc",
863
823
  "windows_x86_64_gnu",
864
824
  "windows_x86_64_gnullvm",
@@ -867,42 +827,69 @@ dependencies = [
867
827
 
868
828
  [[package]]
869
829
  name = "windows_aarch64_gnullvm"
870
- version = "0.42.2"
830
+ version = "0.52.6"
871
831
  source = "registry+https://github.com/rust-lang/crates.io-index"
872
- checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8"
832
+ checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
873
833
 
874
834
  [[package]]
875
835
  name = "windows_aarch64_msvc"
876
- version = "0.42.2"
836
+ version = "0.52.6"
877
837
  source = "registry+https://github.com/rust-lang/crates.io-index"
878
- checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43"
838
+ checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
879
839
 
880
840
  [[package]]
881
841
  name = "windows_i686_gnu"
882
- version = "0.42.2"
842
+ version = "0.52.6"
883
843
  source = "registry+https://github.com/rust-lang/crates.io-index"
884
- checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f"
844
+ checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
845
+
846
+ [[package]]
847
+ name = "windows_i686_gnullvm"
848
+ version = "0.52.6"
849
+ source = "registry+https://github.com/rust-lang/crates.io-index"
850
+ checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
885
851
 
886
852
  [[package]]
887
853
  name = "windows_i686_msvc"
888
- version = "0.42.2"
854
+ version = "0.52.6"
889
855
  source = "registry+https://github.com/rust-lang/crates.io-index"
890
- checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060"
856
+ checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
891
857
 
892
858
  [[package]]
893
859
  name = "windows_x86_64_gnu"
894
- version = "0.42.2"
860
+ version = "0.52.6"
895
861
  source = "registry+https://github.com/rust-lang/crates.io-index"
896
- checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36"
862
+ checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
897
863
 
898
864
  [[package]]
899
865
  name = "windows_x86_64_gnullvm"
900
- version = "0.42.2"
866
+ version = "0.52.6"
901
867
  source = "registry+https://github.com/rust-lang/crates.io-index"
902
- checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3"
868
+ checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
903
869
 
904
870
  [[package]]
905
871
  name = "windows_x86_64_msvc"
906
- version = "0.42.2"
872
+ version = "0.52.6"
907
873
  source = "registry+https://github.com/rust-lang/crates.io-index"
908
- checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"
874
+ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
875
+
876
+ [[package]]
877
+ name = "zerocopy"
878
+ version = "0.7.35"
879
+ source = "registry+https://github.com/rust-lang/crates.io-index"
880
+ checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
881
+ dependencies = [
882
+ "byteorder",
883
+ "zerocopy-derive",
884
+ ]
885
+
886
+ [[package]]
887
+ name = "zerocopy-derive"
888
+ version = "0.7.35"
889
+ source = "registry+https://github.com/rust-lang/crates.io-index"
890
+ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
891
+ dependencies = [
892
+ "proc-macro2",
893
+ "quote",
894
+ "syn",
895
+ ]
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "tokenizers"
3
- version = "0.5.0"
3
+ version = "0.5.1"
4
4
  license = "Apache-2.0"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -11,11 +11,11 @@ publish = false
11
11
  crate-type = ["cdylib"]
12
12
 
13
13
  [dependencies]
14
- magnus = "0.6"
14
+ magnus = "0.7"
15
15
  onig = { version = "6", default-features = false }
16
16
  serde = { version = "1", features = ["rc", "derive"] }
17
17
 
18
18
  [dependencies.tokenizers]
19
- version = "=0.19.1" # also update in from_pretrained.rb
19
+ version = "=0.20.0" # also update in from_pretrained.rb
20
20
  default-features = false
21
21
  features = ["progressbar", "onig", "esaxx_fast"]
@@ -222,8 +222,8 @@ pub struct RbSequence {}
222
222
  impl RbSequence {
223
223
  fn new(normalizers: RArray) -> RbResult<RbNormalizer> {
224
224
  let mut sequence = Vec::with_capacity(normalizers.len());
225
- for n in normalizers.each() {
226
- let normalizer: &RbNormalizer = TryConvert::try_convert(n?)?;
225
+ for n in normalizers.into_iter() {
226
+ let normalizer: &RbNormalizer = TryConvert::try_convert(n)?;
227
227
  match &normalizer.normalizer {
228
228
  RbNormalizerTypeWrapper::Sequence(inner) => sequence.extend(inner.iter().cloned()),
229
229
  RbNormalizerTypeWrapper::Single(inner) => sequence.push(inner.clone()),
@@ -258,8 +258,8 @@ pub struct RbSequence {}
258
258
  impl RbSequence {
259
259
  fn new(pre_tokenizers: RArray) -> RbResult<RbPreTokenizer> {
260
260
  let mut sequence = Vec::with_capacity(pre_tokenizers.len());
261
- for n in pre_tokenizers.each() {
262
- let pretokenizer: &RbPreTokenizer = TryConvert::try_convert(n?)?;
261
+ for n in pre_tokenizers.into_iter() {
262
+ let pretokenizer: &RbPreTokenizer = TryConvert::try_convert(n)?;
263
263
  match &pretokenizer.pretok {
264
264
  RbPreTokenizerTypeWrapper::Sequence(inner) => {
265
265
  sequence.extend(inner.iter().cloned())
@@ -282,12 +282,12 @@ impl RbTokenizer {
282
282
  add_special_tokens: bool,
283
283
  ) -> RbResult<RArray> {
284
284
  let input: Vec<tk::EncodeInput> = input
285
- .each()
285
+ .into_iter()
286
286
  .map(|o| {
287
287
  let input: tk::EncodeInput = if is_pretokenized {
288
- PreTokenizedEncodeInput::try_convert(o?)?.into()
288
+ PreTokenizedEncodeInput::try_convert(o)?.into()
289
289
  } else {
290
- TextEncodeInput::try_convert(o?)?.into()
290
+ TextEncodeInput::try_convert(o)?.into()
291
291
  };
292
292
  Ok(input)
293
293
  })
@@ -319,26 +319,26 @@ impl RbTokenizer {
319
319
  .map_err(RbError::from)
320
320
  }
321
321
 
322
- pub fn set_decoder(&self, decoder: &RbDecoder) {
323
- self.tokenizer.borrow_mut().with_decoder(decoder.clone());
322
+ pub fn set_decoder(&self, decoder: Option<&RbDecoder>) {
323
+ self.tokenizer.borrow_mut().with_decoder(decoder.cloned());
324
324
  }
325
325
 
326
- pub fn set_pre_tokenizer(&self, pretok: &RbPreTokenizer) {
326
+ pub fn set_pre_tokenizer(&self, pretok: Option<&RbPreTokenizer>) {
327
327
  self.tokenizer
328
328
  .borrow_mut()
329
- .with_pre_tokenizer(pretok.clone());
329
+ .with_pre_tokenizer(pretok.cloned());
330
330
  }
331
331
 
332
- pub fn set_post_processor(&self, processor: &RbPostProcessor) {
332
+ pub fn set_post_processor(&self, processor: Option<&RbPostProcessor>) {
333
333
  self.tokenizer
334
334
  .borrow_mut()
335
- .with_post_processor(processor.clone());
335
+ .with_post_processor(processor.cloned());
336
336
  }
337
337
 
338
- pub fn set_normalizer(&self, normalizer: &RbNormalizer) {
338
+ pub fn set_normalizer(&self, normalizer: Option<&RbNormalizer>) {
339
339
  self.tokenizer
340
340
  .borrow_mut()
341
- .with_normalizer(normalizer.clone());
341
+ .with_normalizer(normalizer.cloned());
342
342
  }
343
343
 
344
344
  pub fn token_to_id(&self, token: String) -> Option<u32> {
@@ -110,9 +110,9 @@ impl RbTrainer {
110
110
  BpeTrainer,
111
111
  special_tokens,
112
112
  special_tokens
113
- .each()
113
+ .into_iter()
114
114
  .map(|token| {
115
- if let Ok(content) = String::try_convert(token?) {
115
+ if let Ok(content) = String::try_convert(token) {
116
116
  Ok(RbAddedToken::from(content, Some(true)).get_token())
117
117
  } else {
118
118
  todo!()
@@ -197,9 +197,9 @@ impl RbTrainer {
197
197
  UnigramTrainer,
198
198
  special_tokens,
199
199
  special_tokens
200
- .each()
200
+ .into_iter()
201
201
  .map(|token| {
202
- if let Ok(content) = String::try_convert(token?) {
202
+ if let Ok(content) = String::try_convert(token) {
203
203
  Ok(RbAddedToken::from(content, Some(true)).get_token())
204
204
  } else {
205
205
  todo!()
@@ -268,9 +268,9 @@ impl RbTrainer {
268
268
  WordLevelTrainer,
269
269
  special_tokens,
270
270
  special_tokens
271
- .each()
271
+ .into_iter()
272
272
  .map(|token| {
273
- if let Ok(content) = String::try_convert(token?) {
273
+ if let Ok(content) = String::try_convert(token) {
274
274
  Ok(RbAddedToken::from(content, Some(true)).get_token())
275
275
  } else {
276
276
  todo!()
@@ -322,9 +322,9 @@ impl RbTrainer {
322
322
  WordPieceTrainer,
323
323
  @set_special_tokens,
324
324
  special_tokens
325
- .each()
325
+ .into_iter()
326
326
  .map(|token| {
327
- if let Ok(content) = String::try_convert(token?) {
327
+ if let Ok(content) = String::try_convert(token) {
328
328
  Ok(RbAddedToken::from(content, Some(true)).get_token())
329
329
  } else {
330
330
  todo!()
@@ -398,9 +398,9 @@ impl RbBpeTrainer {
398
398
  if !value.is_nil() {
399
399
  builder = builder.special_tokens(
400
400
  RArray::try_convert(value)?
401
- .each()
401
+ .into_iter()
402
402
  .map(|token| {
403
- if let Ok(content) = String::try_convert(token?) {
403
+ if let Ok(content) = String::try_convert(token) {
404
404
  Ok(RbAddedToken::from(content, Some(true)).get_token())
405
405
  } else {
406
406
  todo!()
@@ -466,9 +466,9 @@ impl RbUnigramTrainer {
466
466
  if !value.is_nil() {
467
467
  builder.special_tokens(
468
468
  RArray::try_convert(value)?
469
- .each()
469
+ .into_iter()
470
470
  .map(|token| {
471
- if let Ok(content) = String::try_convert(token?) {
471
+ if let Ok(content) = String::try_convert(token) {
472
472
  Ok(RbAddedToken::from(content, Some(true)).get_token())
473
473
  } else {
474
474
  todo!()
@@ -540,9 +540,9 @@ impl RbWordLevelTrainer {
540
540
  if !value.is_nil() {
541
541
  builder.special_tokens(
542
542
  RArray::try_convert(value)?
543
- .each()
543
+ .into_iter()
544
544
  .map(|token| {
545
- if let Ok(content) = String::try_convert(token?) {
545
+ if let Ok(content) = String::try_convert(token) {
546
546
  Ok(RbAddedToken::from(content, Some(true)).get_token())
547
547
  } else {
548
548
  todo!()
@@ -581,9 +581,9 @@ impl RbWordPieceTrainer {
581
581
  if !value.is_nil() {
582
582
  builder = builder.special_tokens(
583
583
  RArray::try_convert(value)?
584
- .each()
584
+ .into_iter()
585
585
  .map(|token| {
586
- if let Ok(content) = String::try_convert(token?) {
586
+ if let Ok(content) = String::try_convert(token) {
587
587
  Ok(RbAddedToken::from(content, Some(true)).get_token())
588
588
  } else {
589
589
  todo!()
@@ -1,7 +1,7 @@
1
1
  module Tokenizers
2
2
  module FromPretrained
3
3
  # for user agent
4
- TOKENIZERS_VERSION = "0.19.1"
4
+ TOKENIZERS_VERSION = "0.20.0"
5
5
 
6
6
  # use Ruby for downloads
7
7
  # this avoids the need to vendor OpenSSL on Linux
@@ -67,7 +67,7 @@ module Tokenizers
67
67
  end
68
68
  end
69
69
 
70
- options[:content_length_proc] = -> (_) { puts "Downloading..." }
70
+ options[:content_length_proc] = ->(_) { puts "Downloading..." }
71
71
 
72
72
  # string options are headers
73
73
  tempfile = URI.parse(url).open(headers.merge(options))
@@ -1,3 +1,3 @@
1
1
  module Tokenizers
2
- VERSION = "0.5.0"
2
+ VERSION = "0.5.1"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tokenizers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-05-21 00:00:00.000000000 Z
11
+ date: 2024-08-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -100,7 +100,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
100
100
  - !ruby/object:Gem::Version
101
101
  version: '0'
102
102
  requirements: []
103
- rubygems_version: 3.5.9
103
+ rubygems_version: 3.5.11
104
104
  signing_key:
105
105
  specification_version: 4
106
106
  summary: Fast state-of-the-art tokenizers for Ruby