tokenizers 0.5.3 → 0.5.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 30c4558340092b3fe4b60adbfed6b042810e27df9a62bd8fe828c3a2c9b5cf7a
4
- data.tar.gz: bc97136598b82cdb47b0d50de4ead4b5afd8500dc52b487496f3179dd48ecee8
3
+ metadata.gz: 8394d394a6ebaa502c53d08508586044c1f6e0ea8cd8c6629e6a7c2bed38518e
4
+ data.tar.gz: 1b54fa285fb6799c2cbc411c21c0f951db1fdacc3291b1da1971f420ada07820
5
5
  SHA512:
6
- metadata.gz: '0184d588343d823b0a2942828c0a496e131b5dfbae475d46ed7ebb2f3e89e5fd6d420705e88b31293331b247920c209653d0590b3aad618aab583a6a9ff49c8a'
7
- data.tar.gz: a7c590677a968516ae075fb46a5153e301b93e2bd13cf372d5cf020c4bd0c9c0cde7a7118e708e853a61c42a8957fcec73afa32e8a2eebd517943254905d0621
6
+ metadata.gz: f75af568151b9aa3fb9b57c2020464e5d1270174a25adff113f9058b8a92c11288f4df22c1f382af47c9312c1cca83f90214b1875a37334e232bd35bbfd4785e
7
+ data.tar.gz: a5b9d665dd2f985f03ea1056887169318c679a4fd30bc768f00109bd2d225958233f57f254a7c1e505c0b3b3fdb94b8a2fcd4027c55405088c6c1e88e3fd24be
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.5.4 (2024-12-28)
2
+
3
+ - Updated Tokenizers to 0.21.0
4
+ - Added support for Ruby 3.4
5
+
1
6
  ## 0.5.3 (2024-09-17)
2
7
 
3
8
  - Added `AddedToken` class
data/Cargo.lock CHANGED
@@ -19,9 +19,9 @@ checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
19
19
 
20
20
  [[package]]
21
21
  name = "bindgen"
22
- version = "0.69.4"
22
+ version = "0.69.5"
23
23
  source = "registry+https://github.com/rust-lang/crates.io-index"
24
- checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0"
24
+ checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088"
25
25
  dependencies = [
26
26
  "bitflags 2.6.0",
27
27
  "cexpr",
@@ -49,6 +49,12 @@ version = "2.6.0"
49
49
  source = "registry+https://github.com/rust-lang/crates.io-index"
50
50
  checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
51
51
 
52
+ [[package]]
53
+ name = "bumpalo"
54
+ version = "3.16.0"
55
+ source = "registry+https://github.com/rust-lang/crates.io-index"
56
+ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
57
+
52
58
  [[package]]
53
59
  name = "byteorder"
54
60
  version = "1.5.0"
@@ -57,9 +63,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
57
63
 
58
64
  [[package]]
59
65
  name = "cc"
60
- version = "1.1.15"
66
+ version = "1.2.6"
61
67
  source = "registry+https://github.com/rust-lang/crates.io-index"
62
- checksum = "57b6a275aa2903740dc87da01c62040406b8812552e97129a63ea8850a17c6e6"
68
+ checksum = "8d6dbb628b8f8555f86d0323c2eb39e3ec81901f4b83e091db8a6a76d316a333"
63
69
  dependencies = [
64
70
  "shlex",
65
71
  ]
@@ -92,22 +98,22 @@ dependencies = [
92
98
 
93
99
  [[package]]
94
100
  name = "console"
95
- version = "0.15.8"
101
+ version = "0.15.10"
96
102
  source = "registry+https://github.com/rust-lang/crates.io-index"
97
- checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb"
103
+ checksum = "ea3c6ecd8059b57859df5c69830340ed3c41d30e3da0c1cbed90a96ac853041b"
98
104
  dependencies = [
99
105
  "encode_unicode",
100
- "lazy_static",
101
106
  "libc",
107
+ "once_cell",
102
108
  "unicode-width",
103
109
  "windows-sys",
104
110
  ]
105
111
 
106
112
  [[package]]
107
113
  name = "crossbeam-deque"
108
- version = "0.8.5"
114
+ version = "0.8.6"
109
115
  source = "registry+https://github.com/rust-lang/crates.io-index"
110
- checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
116
+ checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
111
117
  dependencies = [
112
118
  "crossbeam-epoch",
113
119
  "crossbeam-utils",
@@ -124,9 +130,9 @@ dependencies = [
124
130
 
125
131
  [[package]]
126
132
  name = "crossbeam-utils"
127
- version = "0.8.20"
133
+ version = "0.8.21"
128
134
  source = "registry+https://github.com/rust-lang/crates.io-index"
129
- checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
135
+ checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
130
136
 
131
137
  [[package]]
132
138
  name = "darling"
@@ -165,18 +171,18 @@ dependencies = [
165
171
 
166
172
  [[package]]
167
173
  name = "derive_builder"
168
- version = "0.20.0"
174
+ version = "0.20.2"
169
175
  source = "registry+https://github.com/rust-lang/crates.io-index"
170
- checksum = "0350b5cb0331628a5916d6c5c0b72e97393b8b6b03b47a9284f4e7f5a405ffd7"
176
+ checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947"
171
177
  dependencies = [
172
178
  "derive_builder_macro",
173
179
  ]
174
180
 
175
181
  [[package]]
176
182
  name = "derive_builder_core"
177
- version = "0.20.0"
183
+ version = "0.20.2"
178
184
  source = "registry+https://github.com/rust-lang/crates.io-index"
179
- checksum = "d48cda787f839151732d396ac69e3473923d54312c070ee21e9effcaa8ca0b1d"
185
+ checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8"
180
186
  dependencies = [
181
187
  "darling",
182
188
  "proc-macro2",
@@ -186,9 +192,9 @@ dependencies = [
186
192
 
187
193
  [[package]]
188
194
  name = "derive_builder_macro"
189
- version = "0.20.0"
195
+ version = "0.20.2"
190
196
  source = "registry+https://github.com/rust-lang/crates.io-index"
191
- checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b"
197
+ checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
192
198
  dependencies = [
193
199
  "derive_builder_core",
194
200
  "syn",
@@ -202,9 +208,9 @@ checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
202
208
 
203
209
  [[package]]
204
210
  name = "encode_unicode"
205
- version = "0.3.6"
211
+ version = "1.0.0"
206
212
  source = "registry+https://github.com/rust-lang/crates.io-index"
207
- checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
213
+ checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
208
214
 
209
215
  [[package]]
210
216
  name = "esaxx-rs"
@@ -234,9 +240,9 @@ dependencies = [
234
240
 
235
241
  [[package]]
236
242
  name = "glob"
237
- version = "0.3.1"
243
+ version = "0.3.2"
238
244
  source = "registry+https://github.com/rust-lang/crates.io-index"
239
- checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
245
+ checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
240
246
 
241
247
  [[package]]
242
248
  name = "ident_case"
@@ -246,24 +252,15 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
246
252
 
247
253
  [[package]]
248
254
  name = "indicatif"
249
- version = "0.17.8"
255
+ version = "0.17.9"
250
256
  source = "registry+https://github.com/rust-lang/crates.io-index"
251
- checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3"
257
+ checksum = "cbf675b85ed934d3c67b5c5469701eec7db22689d0a2139d856e0925fa28b281"
252
258
  dependencies = [
253
259
  "console",
254
- "instant",
255
260
  "number_prefix",
256
261
  "portable-atomic",
257
262
  "unicode-width",
258
- ]
259
-
260
- [[package]]
261
- name = "instant"
262
- version = "0.1.13"
263
- source = "registry+https://github.com/rust-lang/crates.io-index"
264
- checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
265
- dependencies = [
266
- "cfg-if",
263
+ "web-time",
267
264
  ]
268
265
 
269
266
  [[package]]
@@ -286,9 +283,19 @@ dependencies = [
286
283
 
287
284
  [[package]]
288
285
  name = "itoa"
289
- version = "1.0.11"
286
+ version = "1.0.14"
290
287
  source = "registry+https://github.com/rust-lang/crates.io-index"
291
- checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
288
+ checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674"
289
+
290
+ [[package]]
291
+ name = "js-sys"
292
+ version = "0.3.76"
293
+ source = "registry+https://github.com/rust-lang/crates.io-index"
294
+ checksum = "6717b6b5b077764fb5966237269cb3c64edddde4b14ce42647430a78ced9e7b7"
295
+ dependencies = [
296
+ "once_cell",
297
+ "wasm-bindgen",
298
+ ]
292
299
 
293
300
  [[package]]
294
301
  name = "lazy_static"
@@ -304,15 +311,15 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
304
311
 
305
312
  [[package]]
306
313
  name = "libc"
307
- version = "0.2.158"
314
+ version = "0.2.169"
308
315
  source = "registry+https://github.com/rust-lang/crates.io-index"
309
- checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439"
316
+ checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
310
317
 
311
318
  [[package]]
312
319
  name = "libloading"
313
- version = "0.8.5"
320
+ version = "0.8.6"
314
321
  source = "registry+https://github.com/rust-lang/crates.io-index"
315
- checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4"
322
+ checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34"
316
323
  dependencies = [
317
324
  "cfg-if",
318
325
  "windows-targets",
@@ -414,9 +421,9 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
414
421
 
415
422
  [[package]]
416
423
  name = "once_cell"
417
- version = "1.19.0"
424
+ version = "1.20.2"
418
425
  source = "registry+https://github.com/rust-lang/crates.io-index"
419
- checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
426
+ checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
420
427
 
421
428
  [[package]]
422
429
  name = "onig"
@@ -448,15 +455,15 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
448
455
 
449
456
  [[package]]
450
457
  name = "pkg-config"
451
- version = "0.3.30"
458
+ version = "0.3.31"
452
459
  source = "registry+https://github.com/rust-lang/crates.io-index"
453
- checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec"
460
+ checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2"
454
461
 
455
462
  [[package]]
456
463
  name = "portable-atomic"
457
- version = "1.7.0"
464
+ version = "1.10.0"
458
465
  source = "registry+https://github.com/rust-lang/crates.io-index"
459
- checksum = "da544ee218f0d287a911e9c99a39a8c9bc8fcad3cb8db5959940044ecfc67265"
466
+ checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6"
460
467
 
461
468
  [[package]]
462
469
  name = "ppv-lite86"
@@ -469,18 +476,18 @@ dependencies = [
469
476
 
470
477
  [[package]]
471
478
  name = "proc-macro2"
472
- version = "1.0.86"
479
+ version = "1.0.92"
473
480
  source = "registry+https://github.com/rust-lang/crates.io-index"
474
- checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
481
+ checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0"
475
482
  dependencies = [
476
483
  "unicode-ident",
477
484
  ]
478
485
 
479
486
  [[package]]
480
487
  name = "quote"
481
- version = "1.0.37"
488
+ version = "1.0.38"
482
489
  source = "registry+https://github.com/rust-lang/crates.io-index"
483
- checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af"
490
+ checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc"
484
491
  dependencies = [
485
492
  "proc-macro2",
486
493
  ]
@@ -548,18 +555,18 @@ dependencies = [
548
555
 
549
556
  [[package]]
550
557
  name = "rb-sys"
551
- version = "0.9.102"
558
+ version = "0.9.105"
552
559
  source = "registry+https://github.com/rust-lang/crates.io-index"
553
- checksum = "df4dec4b1d304c3b308a2cd86b1216ea45dd4361f4e9fa056f108332d0a450c1"
560
+ checksum = "4b3a1f3ce8e7c36d777d52fe7a99039fe4fea7c8ec355a4c4f3a17f92a14029f"
554
561
  dependencies = [
555
562
  "rb-sys-build",
556
563
  ]
557
564
 
558
565
  [[package]]
559
566
  name = "rb-sys-build"
560
- version = "0.9.102"
567
+ version = "0.9.105"
561
568
  source = "registry+https://github.com/rust-lang/crates.io-index"
562
- checksum = "1d71de3e29d174b8fb17b5d4470f27d7aa2605f8a9d05fda0d3aeff30e05a570"
569
+ checksum = "3e6b246c29c0809e1cbe60a1ba9e093da72a4676d02adc68469297d1e589bbf0"
563
570
  dependencies = [
564
571
  "bindgen",
565
572
  "lazy_static",
@@ -578,9 +585,9 @@ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
578
585
 
579
586
  [[package]]
580
587
  name = "regex"
581
- version = "1.10.6"
588
+ version = "1.11.1"
582
589
  source = "registry+https://github.com/rust-lang/crates.io-index"
583
- checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619"
590
+ checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
584
591
  dependencies = [
585
592
  "aho-corasick",
586
593
  "memchr",
@@ -590,9 +597,9 @@ dependencies = [
590
597
 
591
598
  [[package]]
592
599
  name = "regex-automata"
593
- version = "0.4.7"
600
+ version = "0.4.9"
594
601
  source = "registry+https://github.com/rust-lang/crates.io-index"
595
- checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
602
+ checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
596
603
  dependencies = [
597
604
  "aho-corasick",
598
605
  "memchr",
@@ -601,9 +608,9 @@ dependencies = [
601
608
 
602
609
  [[package]]
603
610
  name = "regex-syntax"
604
- version = "0.8.4"
611
+ version = "0.8.5"
605
612
  source = "registry+https://github.com/rust-lang/crates.io-index"
606
- checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
613
+ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
607
614
 
608
615
  [[package]]
609
616
  name = "rustc-hash"
@@ -625,18 +632,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
625
632
 
626
633
  [[package]]
627
634
  name = "serde"
628
- version = "1.0.209"
635
+ version = "1.0.217"
629
636
  source = "registry+https://github.com/rust-lang/crates.io-index"
630
- checksum = "99fce0ffe7310761ca6bf9faf5115afbc19688edd00171d81b1bb1b116c63e09"
637
+ checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70"
631
638
  dependencies = [
632
639
  "serde_derive",
633
640
  ]
634
641
 
635
642
  [[package]]
636
643
  name = "serde_derive"
637
- version = "1.0.209"
644
+ version = "1.0.217"
638
645
  source = "registry+https://github.com/rust-lang/crates.io-index"
639
- checksum = "a5831b979fd7b5439637af1752d535ff49f4860c0f341d1baeb6faf0f4242170"
646
+ checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0"
640
647
  dependencies = [
641
648
  "proc-macro2",
642
649
  "quote",
@@ -645,9 +652,9 @@ dependencies = [
645
652
 
646
653
  [[package]]
647
654
  name = "serde_json"
648
- version = "1.0.127"
655
+ version = "1.0.134"
649
656
  source = "registry+https://github.com/rust-lang/crates.io-index"
650
- checksum = "8043c06d9f82bd7271361ed64f415fe5e12a77fdb52e573e7f06a516dea329ad"
657
+ checksum = "d00f4175c42ee48b15416f6193a959ba3a0d67fc699a0db9ad12df9f83991c7d"
651
658
  dependencies = [
652
659
  "itoa",
653
660
  "memchr",
@@ -693,9 +700,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
693
700
 
694
701
  [[package]]
695
702
  name = "syn"
696
- version = "2.0.76"
703
+ version = "2.0.93"
697
704
  source = "registry+https://github.com/rust-lang/crates.io-index"
698
- checksum = "578e081a14e0cefc3279b0472138c513f37b41a08d5a3cca9b6e4e8ceb6cd525"
705
+ checksum = "9c786062daee0d6db1132800e623df74274a0a87322d8e183338e01b3d98d058"
699
706
  dependencies = [
700
707
  "proc-macro2",
701
708
  "quote",
@@ -704,18 +711,18 @@ dependencies = [
704
711
 
705
712
  [[package]]
706
713
  name = "thiserror"
707
- version = "1.0.63"
714
+ version = "1.0.69"
708
715
  source = "registry+https://github.com/rust-lang/crates.io-index"
709
- checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724"
716
+ checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
710
717
  dependencies = [
711
718
  "thiserror-impl",
712
719
  ]
713
720
 
714
721
  [[package]]
715
722
  name = "thiserror-impl"
716
- version = "1.0.63"
723
+ version = "1.0.69"
717
724
  source = "registry+https://github.com/rust-lang/crates.io-index"
718
- checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
725
+ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
719
726
  dependencies = [
720
727
  "proc-macro2",
721
728
  "quote",
@@ -724,19 +731,19 @@ dependencies = [
724
731
 
725
732
  [[package]]
726
733
  name = "tokenizers"
727
- version = "0.5.3"
734
+ version = "0.5.4"
728
735
  dependencies = [
729
736
  "magnus",
730
737
  "onig",
731
738
  "serde",
732
- "tokenizers 0.20.0",
739
+ "tokenizers 0.21.0",
733
740
  ]
734
741
 
735
742
  [[package]]
736
743
  name = "tokenizers"
737
- version = "0.20.0"
744
+ version = "0.21.0"
738
745
  source = "registry+https://github.com/rust-lang/crates.io-index"
739
- checksum = "c8a24d7f7d6be5b9d1377418b893ab1808af0074f5d1bb2c64784452ddd2aa70"
746
+ checksum = "9ecededfed68a69bc657e486510089e255e53c3d38cc7d4d59c8742668ca2cae"
740
747
  dependencies = [
741
748
  "aho-corasick",
742
749
  "derive_builder",
@@ -766,9 +773,9 @@ dependencies = [
766
773
 
767
774
  [[package]]
768
775
  name = "unicode-ident"
769
- version = "1.0.12"
776
+ version = "1.0.14"
770
777
  source = "registry+https://github.com/rust-lang/crates.io-index"
771
- checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
778
+ checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
772
779
 
773
780
  [[package]]
774
781
  name = "unicode-normalization-alignments"
@@ -781,15 +788,15 @@ dependencies = [
781
788
 
782
789
  [[package]]
783
790
  name = "unicode-segmentation"
784
- version = "1.11.0"
791
+ version = "1.12.0"
785
792
  source = "registry+https://github.com/rust-lang/crates.io-index"
786
- checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
793
+ checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
787
794
 
788
795
  [[package]]
789
796
  name = "unicode-width"
790
- version = "0.1.13"
797
+ version = "0.2.0"
791
798
  source = "registry+https://github.com/rust-lang/crates.io-index"
792
- checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d"
799
+ checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd"
793
800
 
794
801
  [[package]]
795
802
  name = "unicode_categories"
@@ -803,11 +810,75 @@ version = "0.11.0+wasi-snapshot-preview1"
803
810
  source = "registry+https://github.com/rust-lang/crates.io-index"
804
811
  checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
805
812
 
813
+ [[package]]
814
+ name = "wasm-bindgen"
815
+ version = "0.2.99"
816
+ source = "registry+https://github.com/rust-lang/crates.io-index"
817
+ checksum = "a474f6281d1d70c17ae7aa6a613c87fce69a127e2624002df63dcb39d6cf6396"
818
+ dependencies = [
819
+ "cfg-if",
820
+ "once_cell",
821
+ "wasm-bindgen-macro",
822
+ ]
823
+
824
+ [[package]]
825
+ name = "wasm-bindgen-backend"
826
+ version = "0.2.99"
827
+ source = "registry+https://github.com/rust-lang/crates.io-index"
828
+ checksum = "5f89bb38646b4f81674e8f5c3fb81b562be1fd936d84320f3264486418519c79"
829
+ dependencies = [
830
+ "bumpalo",
831
+ "log",
832
+ "proc-macro2",
833
+ "quote",
834
+ "syn",
835
+ "wasm-bindgen-shared",
836
+ ]
837
+
838
+ [[package]]
839
+ name = "wasm-bindgen-macro"
840
+ version = "0.2.99"
841
+ source = "registry+https://github.com/rust-lang/crates.io-index"
842
+ checksum = "2cc6181fd9a7492eef6fef1f33961e3695e4579b9872a6f7c83aee556666d4fe"
843
+ dependencies = [
844
+ "quote",
845
+ "wasm-bindgen-macro-support",
846
+ ]
847
+
848
+ [[package]]
849
+ name = "wasm-bindgen-macro-support"
850
+ version = "0.2.99"
851
+ source = "registry+https://github.com/rust-lang/crates.io-index"
852
+ checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2"
853
+ dependencies = [
854
+ "proc-macro2",
855
+ "quote",
856
+ "syn",
857
+ "wasm-bindgen-backend",
858
+ "wasm-bindgen-shared",
859
+ ]
860
+
861
+ [[package]]
862
+ name = "wasm-bindgen-shared"
863
+ version = "0.2.99"
864
+ source = "registry+https://github.com/rust-lang/crates.io-index"
865
+ checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6"
866
+
867
+ [[package]]
868
+ name = "web-time"
869
+ version = "1.1.0"
870
+ source = "registry+https://github.com/rust-lang/crates.io-index"
871
+ checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
872
+ dependencies = [
873
+ "js-sys",
874
+ "wasm-bindgen",
875
+ ]
876
+
806
877
  [[package]]
807
878
  name = "windows-sys"
808
- version = "0.52.0"
879
+ version = "0.59.0"
809
880
  source = "registry+https://github.com/rust-lang/crates.io-index"
810
- checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
881
+ checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
811
882
  dependencies = [
812
883
  "windows-targets",
813
884
  ]
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "tokenizers"
3
- version = "0.5.3"
3
+ version = "0.5.4"
4
4
  license = "Apache-2.0"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -16,6 +16,6 @@ onig = { version = "6", default-features = false }
16
16
  serde = { version = "1", features = ["rc", "derive"] }
17
17
 
18
18
  [dependencies.tokenizers]
19
- version = "=0.20.0" # also update in from_pretrained.rb
19
+ version = "=0.21.0" # also update in from_pretrained.rb
20
20
  default-features = false
21
21
  features = ["progressbar", "onig", "esaxx_fast"]
@@ -3,8 +3,8 @@ use std::sync::{Arc, RwLock};
3
3
  use crate::pre_tokenizers::from_string;
4
4
  use magnus::value::Lazy;
5
5
  use magnus::{
6
- data_type_builder, function, method, Class, DataType, DataTypeFunctions, Module, Object, RClass, RModule,
7
- Ruby, TypedData,
6
+ data_type_builder, function, method, Class, DataType, DataTypeFunctions, Module, Object,
7
+ RClass, RModule, Ruby, TypedData,
8
8
  };
9
9
  use serde::{Deserialize, Serialize};
10
10
  use tk::decoders::bpe::BPEDecoder;
@@ -16,11 +16,11 @@ use tk::decoders::metaspace::{Metaspace, PrependScheme};
16
16
  use tk::decoders::strip::Strip;
17
17
  use tk::decoders::wordpiece::WordPiece;
18
18
  use tk::decoders::DecoderWrapper;
19
- use tk::Decoder;
20
19
  use tk::normalizers::replace::Replace;
20
+ use tk::Decoder;
21
21
 
22
22
  use super::utils::*;
23
- use super::{DECODERS, RbError, RbResult};
23
+ use super::{RbError, RbResult, DECODERS};
24
24
 
25
25
  #[derive(DataTypeFunctions, Clone, Deserialize, Serialize)]
26
26
  pub struct RbDecoder {
@@ -106,7 +106,7 @@ impl RbDecoder {
106
106
  }
107
107
 
108
108
  fn strip_set_content(&self, content: char) {
109
- setter!(self, Strip, content, content)
109
+ setter!(self, Strip, content, content);
110
110
  }
111
111
 
112
112
  fn strip_start(&self) -> usize {
@@ -114,7 +114,7 @@ impl RbDecoder {
114
114
  }
115
115
 
116
116
  fn strip_set_start(&self, start: usize) {
117
- setter!(self, Strip, start, start)
117
+ setter!(self, Strip, start, start);
118
118
  }
119
119
 
120
120
  fn strip_stop(&self) -> usize {
@@ -122,7 +122,7 @@ impl RbDecoder {
122
122
  }
123
123
 
124
124
  fn strip_set_stop(&self, stop: usize) {
125
- setter!(self, Strip, stop, stop)
125
+ setter!(self, Strip, stop, stop);
126
126
  }
127
127
 
128
128
  pub fn metaspace_replacement(&self) -> char {
@@ -228,7 +228,9 @@ pub struct RbReplaceDecoder {}
228
228
 
229
229
  impl RbReplaceDecoder {
230
230
  pub fn new(pattern: RbPattern, content: String) -> RbResult<RbDecoder> {
231
- Replace::new(pattern, content).map(|v| v.into()).map_err(RbError::from)
231
+ Replace::new(pattern, content)
232
+ .map(|v| v.into())
233
+ .map_err(RbError::from)
232
234
  }
233
235
  }
234
236
 
@@ -295,7 +297,8 @@ unsafe impl TypedData for RbDecoder {
295
297
  }
296
298
 
297
299
  fn data_type() -> &'static DataType {
298
- static DATA_TYPE: DataType = data_type_builder!(RbDecoder, "Tokenizers::Decoders::Decoder").build();
300
+ static DATA_TYPE: DataType =
301
+ data_type_builder!(RbDecoder, "Tokenizers::Decoders::Decoder").build();
299
302
  &DATA_TYPE
300
303
  }
301
304
 
@@ -383,18 +386,33 @@ pub fn init_decoders(ruby: &Ruby, module: &RModule) -> RbResult<()> {
383
386
  class.define_method("cleanup=", method!(RbDecoder::ctc_set_cleanup, 1))?;
384
387
  class.define_method("pad_token", method!(RbDecoder::ctc_pad_token, 0))?;
385
388
  class.define_method("pad_token=", method!(RbDecoder::ctc_set_pad_token, 1))?;
386
- class.define_method("word_delimiter_token", method!(RbDecoder::ctc_word_delimiter_token, 0))?;
387
- class.define_method("word_delimiter_token=", method!(RbDecoder::ctc_set_word_delimiter_token, 1))?;
389
+ class.define_method(
390
+ "word_delimiter_token",
391
+ method!(RbDecoder::ctc_word_delimiter_token, 0),
392
+ )?;
393
+ class.define_method(
394
+ "word_delimiter_token=",
395
+ method!(RbDecoder::ctc_set_word_delimiter_token, 1),
396
+ )?;
388
397
 
389
398
  let class = module.define_class("Fuse", decoder)?;
390
399
  class.define_singleton_method("new", function!(RbFuse::new, 0))?;
391
400
 
392
401
  let class = module.define_class("Metaspace", decoder)?;
393
402
  class.define_singleton_method("_new", function!(RbMetaspaceDecoder::new, 3))?;
394
- class.define_method("prepend_scheme", method!(RbDecoder::metaspace_prepend_scheme, 0))?;
395
- class.define_method("prepend_scheme=", method!(RbDecoder::metaspace_set_prepend_scheme, 1))?;
403
+ class.define_method(
404
+ "prepend_scheme",
405
+ method!(RbDecoder::metaspace_prepend_scheme, 0),
406
+ )?;
407
+ class.define_method(
408
+ "prepend_scheme=",
409
+ method!(RbDecoder::metaspace_set_prepend_scheme, 1),
410
+ )?;
396
411
  class.define_method("replacement", method!(RbDecoder::metaspace_replacement, 0))?;
397
- class.define_method("replacement=", method!(RbDecoder::metaspace_set_replacement, 1))?;
412
+ class.define_method(
413
+ "replacement=",
414
+ method!(RbDecoder::metaspace_set_replacement, 1),
415
+ )?;
398
416
  class.define_method("split", method!(RbDecoder::metaspace_split, 0))?;
399
417
  class.define_method("split=", method!(RbDecoder::metaspace_set_split, 1))?;
400
418
 
@@ -9,9 +9,14 @@ impl RbError {
9
9
  pub fn from(e: Box<dyn std::error::Error + Send + Sync>) -> Error {
10
10
  Error::new(error(), e.to_string())
11
11
  }
12
+
13
+ pub fn new_err(s: String) -> Error {
14
+ Error::new(error(), s)
15
+ }
12
16
  }
13
17
 
14
- static ERROR: Lazy<ExceptionClass> = Lazy::new(|ruby| ruby.get_inner(&TOKENIZERS).const_get("Error").unwrap());
18
+ static ERROR: Lazy<ExceptionClass> =
19
+ Lazy::new(|ruby| ruby.get_inner(&TOKENIZERS).const_get("Error").unwrap());
15
20
 
16
21
  fn error() -> ExceptionClass {
17
22
  Ruby::get().unwrap().get_inner(&ERROR)