tokenizers 0.5.3 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 30c4558340092b3fe4b60adbfed6b042810e27df9a62bd8fe828c3a2c9b5cf7a
4
- data.tar.gz: bc97136598b82cdb47b0d50de4ead4b5afd8500dc52b487496f3179dd48ecee8
3
+ metadata.gz: 8394d394a6ebaa502c53d08508586044c1f6e0ea8cd8c6629e6a7c2bed38518e
4
+ data.tar.gz: 1b54fa285fb6799c2cbc411c21c0f951db1fdacc3291b1da1971f420ada07820
5
5
  SHA512:
6
- metadata.gz: '0184d588343d823b0a2942828c0a496e131b5dfbae475d46ed7ebb2f3e89e5fd6d420705e88b31293331b247920c209653d0590b3aad618aab583a6a9ff49c8a'
7
- data.tar.gz: a7c590677a968516ae075fb46a5153e301b93e2bd13cf372d5cf020c4bd0c9c0cde7a7118e708e853a61c42a8957fcec73afa32e8a2eebd517943254905d0621
6
+ metadata.gz: f75af568151b9aa3fb9b57c2020464e5d1270174a25adff113f9058b8a92c11288f4df22c1f382af47c9312c1cca83f90214b1875a37334e232bd35bbfd4785e
7
+ data.tar.gz: a5b9d665dd2f985f03ea1056887169318c679a4fd30bc768f00109bd2d225958233f57f254a7c1e505c0b3b3fdb94b8a2fcd4027c55405088c6c1e88e3fd24be
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.5.4 (2024-12-28)
2
+
3
+ - Updated Tokenizers to 0.21.0
4
+ - Added support for Ruby 3.4
5
+
1
6
  ## 0.5.3 (2024-09-17)
2
7
 
3
8
  - Added `AddedToken` class
data/Cargo.lock CHANGED
@@ -19,9 +19,9 @@ checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
19
19
 
20
20
  [[package]]
21
21
  name = "bindgen"
22
- version = "0.69.4"
22
+ version = "0.69.5"
23
23
  source = "registry+https://github.com/rust-lang/crates.io-index"
24
- checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0"
24
+ checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088"
25
25
  dependencies = [
26
26
  "bitflags 2.6.0",
27
27
  "cexpr",
@@ -49,6 +49,12 @@ version = "2.6.0"
49
49
  source = "registry+https://github.com/rust-lang/crates.io-index"
50
50
  checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
51
51
 
52
+ [[package]]
53
+ name = "bumpalo"
54
+ version = "3.16.0"
55
+ source = "registry+https://github.com/rust-lang/crates.io-index"
56
+ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
57
+
52
58
  [[package]]
53
59
  name = "byteorder"
54
60
  version = "1.5.0"
@@ -57,9 +63,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
57
63
 
58
64
  [[package]]
59
65
  name = "cc"
60
- version = "1.1.15"
66
+ version = "1.2.6"
61
67
  source = "registry+https://github.com/rust-lang/crates.io-index"
62
- checksum = "57b6a275aa2903740dc87da01c62040406b8812552e97129a63ea8850a17c6e6"
68
+ checksum = "8d6dbb628b8f8555f86d0323c2eb39e3ec81901f4b83e091db8a6a76d316a333"
63
69
  dependencies = [
64
70
  "shlex",
65
71
  ]
@@ -92,22 +98,22 @@ dependencies = [
92
98
 
93
99
  [[package]]
94
100
  name = "console"
95
- version = "0.15.8"
101
+ version = "0.15.10"
96
102
  source = "registry+https://github.com/rust-lang/crates.io-index"
97
- checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb"
103
+ checksum = "ea3c6ecd8059b57859df5c69830340ed3c41d30e3da0c1cbed90a96ac853041b"
98
104
  dependencies = [
99
105
  "encode_unicode",
100
- "lazy_static",
101
106
  "libc",
107
+ "once_cell",
102
108
  "unicode-width",
103
109
  "windows-sys",
104
110
  ]
105
111
 
106
112
  [[package]]
107
113
  name = "crossbeam-deque"
108
- version = "0.8.5"
114
+ version = "0.8.6"
109
115
  source = "registry+https://github.com/rust-lang/crates.io-index"
110
- checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
116
+ checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
111
117
  dependencies = [
112
118
  "crossbeam-epoch",
113
119
  "crossbeam-utils",
@@ -124,9 +130,9 @@ dependencies = [
124
130
 
125
131
  [[package]]
126
132
  name = "crossbeam-utils"
127
- version = "0.8.20"
133
+ version = "0.8.21"
128
134
  source = "registry+https://github.com/rust-lang/crates.io-index"
129
- checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
135
+ checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
130
136
 
131
137
  [[package]]
132
138
  name = "darling"
@@ -165,18 +171,18 @@ dependencies = [
165
171
 
166
172
  [[package]]
167
173
  name = "derive_builder"
168
- version = "0.20.0"
174
+ version = "0.20.2"
169
175
  source = "registry+https://github.com/rust-lang/crates.io-index"
170
- checksum = "0350b5cb0331628a5916d6c5c0b72e97393b8b6b03b47a9284f4e7f5a405ffd7"
176
+ checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947"
171
177
  dependencies = [
172
178
  "derive_builder_macro",
173
179
  ]
174
180
 
175
181
  [[package]]
176
182
  name = "derive_builder_core"
177
- version = "0.20.0"
183
+ version = "0.20.2"
178
184
  source = "registry+https://github.com/rust-lang/crates.io-index"
179
- checksum = "d48cda787f839151732d396ac69e3473923d54312c070ee21e9effcaa8ca0b1d"
185
+ checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8"
180
186
  dependencies = [
181
187
  "darling",
182
188
  "proc-macro2",
@@ -186,9 +192,9 @@ dependencies = [
186
192
 
187
193
  [[package]]
188
194
  name = "derive_builder_macro"
189
- version = "0.20.0"
195
+ version = "0.20.2"
190
196
  source = "registry+https://github.com/rust-lang/crates.io-index"
191
- checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b"
197
+ checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
192
198
  dependencies = [
193
199
  "derive_builder_core",
194
200
  "syn",
@@ -202,9 +208,9 @@ checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
202
208
 
203
209
  [[package]]
204
210
  name = "encode_unicode"
205
- version = "0.3.6"
211
+ version = "1.0.0"
206
212
  source = "registry+https://github.com/rust-lang/crates.io-index"
207
- checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
213
+ checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
208
214
 
209
215
  [[package]]
210
216
  name = "esaxx-rs"
@@ -234,9 +240,9 @@ dependencies = [
234
240
 
235
241
  [[package]]
236
242
  name = "glob"
237
- version = "0.3.1"
243
+ version = "0.3.2"
238
244
  source = "registry+https://github.com/rust-lang/crates.io-index"
239
- checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
245
+ checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
240
246
 
241
247
  [[package]]
242
248
  name = "ident_case"
@@ -246,24 +252,15 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
246
252
 
247
253
  [[package]]
248
254
  name = "indicatif"
249
- version = "0.17.8"
255
+ version = "0.17.9"
250
256
  source = "registry+https://github.com/rust-lang/crates.io-index"
251
- checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3"
257
+ checksum = "cbf675b85ed934d3c67b5c5469701eec7db22689d0a2139d856e0925fa28b281"
252
258
  dependencies = [
253
259
  "console",
254
- "instant",
255
260
  "number_prefix",
256
261
  "portable-atomic",
257
262
  "unicode-width",
258
- ]
259
-
260
- [[package]]
261
- name = "instant"
262
- version = "0.1.13"
263
- source = "registry+https://github.com/rust-lang/crates.io-index"
264
- checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
265
- dependencies = [
266
- "cfg-if",
263
+ "web-time",
267
264
  ]
268
265
 
269
266
  [[package]]
@@ -286,9 +283,19 @@ dependencies = [
286
283
 
287
284
  [[package]]
288
285
  name = "itoa"
289
- version = "1.0.11"
286
+ version = "1.0.14"
290
287
  source = "registry+https://github.com/rust-lang/crates.io-index"
291
- checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
288
+ checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674"
289
+
290
+ [[package]]
291
+ name = "js-sys"
292
+ version = "0.3.76"
293
+ source = "registry+https://github.com/rust-lang/crates.io-index"
294
+ checksum = "6717b6b5b077764fb5966237269cb3c64edddde4b14ce42647430a78ced9e7b7"
295
+ dependencies = [
296
+ "once_cell",
297
+ "wasm-bindgen",
298
+ ]
292
299
 
293
300
  [[package]]
294
301
  name = "lazy_static"
@@ -304,15 +311,15 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
304
311
 
305
312
  [[package]]
306
313
  name = "libc"
307
- version = "0.2.158"
314
+ version = "0.2.169"
308
315
  source = "registry+https://github.com/rust-lang/crates.io-index"
309
- checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439"
316
+ checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
310
317
 
311
318
  [[package]]
312
319
  name = "libloading"
313
- version = "0.8.5"
320
+ version = "0.8.6"
314
321
  source = "registry+https://github.com/rust-lang/crates.io-index"
315
- checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4"
322
+ checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34"
316
323
  dependencies = [
317
324
  "cfg-if",
318
325
  "windows-targets",
@@ -414,9 +421,9 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
414
421
 
415
422
  [[package]]
416
423
  name = "once_cell"
417
- version = "1.19.0"
424
+ version = "1.20.2"
418
425
  source = "registry+https://github.com/rust-lang/crates.io-index"
419
- checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
426
+ checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
420
427
 
421
428
  [[package]]
422
429
  name = "onig"
@@ -448,15 +455,15 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
448
455
 
449
456
  [[package]]
450
457
  name = "pkg-config"
451
- version = "0.3.30"
458
+ version = "0.3.31"
452
459
  source = "registry+https://github.com/rust-lang/crates.io-index"
453
- checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec"
460
+ checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2"
454
461
 
455
462
  [[package]]
456
463
  name = "portable-atomic"
457
- version = "1.7.0"
464
+ version = "1.10.0"
458
465
  source = "registry+https://github.com/rust-lang/crates.io-index"
459
- checksum = "da544ee218f0d287a911e9c99a39a8c9bc8fcad3cb8db5959940044ecfc67265"
466
+ checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6"
460
467
 
461
468
  [[package]]
462
469
  name = "ppv-lite86"
@@ -469,18 +476,18 @@ dependencies = [
469
476
 
470
477
  [[package]]
471
478
  name = "proc-macro2"
472
- version = "1.0.86"
479
+ version = "1.0.92"
473
480
  source = "registry+https://github.com/rust-lang/crates.io-index"
474
- checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
481
+ checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0"
475
482
  dependencies = [
476
483
  "unicode-ident",
477
484
  ]
478
485
 
479
486
  [[package]]
480
487
  name = "quote"
481
- version = "1.0.37"
488
+ version = "1.0.38"
482
489
  source = "registry+https://github.com/rust-lang/crates.io-index"
483
- checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af"
490
+ checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc"
484
491
  dependencies = [
485
492
  "proc-macro2",
486
493
  ]
@@ -548,18 +555,18 @@ dependencies = [
548
555
 
549
556
  [[package]]
550
557
  name = "rb-sys"
551
- version = "0.9.102"
558
+ version = "0.9.105"
552
559
  source = "registry+https://github.com/rust-lang/crates.io-index"
553
- checksum = "df4dec4b1d304c3b308a2cd86b1216ea45dd4361f4e9fa056f108332d0a450c1"
560
+ checksum = "4b3a1f3ce8e7c36d777d52fe7a99039fe4fea7c8ec355a4c4f3a17f92a14029f"
554
561
  dependencies = [
555
562
  "rb-sys-build",
556
563
  ]
557
564
 
558
565
  [[package]]
559
566
  name = "rb-sys-build"
560
- version = "0.9.102"
567
+ version = "0.9.105"
561
568
  source = "registry+https://github.com/rust-lang/crates.io-index"
562
- checksum = "1d71de3e29d174b8fb17b5d4470f27d7aa2605f8a9d05fda0d3aeff30e05a570"
569
+ checksum = "3e6b246c29c0809e1cbe60a1ba9e093da72a4676d02adc68469297d1e589bbf0"
563
570
  dependencies = [
564
571
  "bindgen",
565
572
  "lazy_static",
@@ -578,9 +585,9 @@ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
578
585
 
579
586
  [[package]]
580
587
  name = "regex"
581
- version = "1.10.6"
588
+ version = "1.11.1"
582
589
  source = "registry+https://github.com/rust-lang/crates.io-index"
583
- checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619"
590
+ checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
584
591
  dependencies = [
585
592
  "aho-corasick",
586
593
  "memchr",
@@ -590,9 +597,9 @@ dependencies = [
590
597
 
591
598
  [[package]]
592
599
  name = "regex-automata"
593
- version = "0.4.7"
600
+ version = "0.4.9"
594
601
  source = "registry+https://github.com/rust-lang/crates.io-index"
595
- checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
602
+ checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
596
603
  dependencies = [
597
604
  "aho-corasick",
598
605
  "memchr",
@@ -601,9 +608,9 @@ dependencies = [
601
608
 
602
609
  [[package]]
603
610
  name = "regex-syntax"
604
- version = "0.8.4"
611
+ version = "0.8.5"
605
612
  source = "registry+https://github.com/rust-lang/crates.io-index"
606
- checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
613
+ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
607
614
 
608
615
  [[package]]
609
616
  name = "rustc-hash"
@@ -625,18 +632,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
625
632
 
626
633
  [[package]]
627
634
  name = "serde"
628
- version = "1.0.209"
635
+ version = "1.0.217"
629
636
  source = "registry+https://github.com/rust-lang/crates.io-index"
630
- checksum = "99fce0ffe7310761ca6bf9faf5115afbc19688edd00171d81b1bb1b116c63e09"
637
+ checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70"
631
638
  dependencies = [
632
639
  "serde_derive",
633
640
  ]
634
641
 
635
642
  [[package]]
636
643
  name = "serde_derive"
637
- version = "1.0.209"
644
+ version = "1.0.217"
638
645
  source = "registry+https://github.com/rust-lang/crates.io-index"
639
- checksum = "a5831b979fd7b5439637af1752d535ff49f4860c0f341d1baeb6faf0f4242170"
646
+ checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0"
640
647
  dependencies = [
641
648
  "proc-macro2",
642
649
  "quote",
@@ -645,9 +652,9 @@ dependencies = [
645
652
 
646
653
  [[package]]
647
654
  name = "serde_json"
648
- version = "1.0.127"
655
+ version = "1.0.134"
649
656
  source = "registry+https://github.com/rust-lang/crates.io-index"
650
- checksum = "8043c06d9f82bd7271361ed64f415fe5e12a77fdb52e573e7f06a516dea329ad"
657
+ checksum = "d00f4175c42ee48b15416f6193a959ba3a0d67fc699a0db9ad12df9f83991c7d"
651
658
  dependencies = [
652
659
  "itoa",
653
660
  "memchr",
@@ -693,9 +700,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
693
700
 
694
701
  [[package]]
695
702
  name = "syn"
696
- version = "2.0.76"
703
+ version = "2.0.93"
697
704
  source = "registry+https://github.com/rust-lang/crates.io-index"
698
- checksum = "578e081a14e0cefc3279b0472138c513f37b41a08d5a3cca9b6e4e8ceb6cd525"
705
+ checksum = "9c786062daee0d6db1132800e623df74274a0a87322d8e183338e01b3d98d058"
699
706
  dependencies = [
700
707
  "proc-macro2",
701
708
  "quote",
@@ -704,18 +711,18 @@ dependencies = [
704
711
 
705
712
  [[package]]
706
713
  name = "thiserror"
707
- version = "1.0.63"
714
+ version = "1.0.69"
708
715
  source = "registry+https://github.com/rust-lang/crates.io-index"
709
- checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724"
716
+ checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
710
717
  dependencies = [
711
718
  "thiserror-impl",
712
719
  ]
713
720
 
714
721
  [[package]]
715
722
  name = "thiserror-impl"
716
- version = "1.0.63"
723
+ version = "1.0.69"
717
724
  source = "registry+https://github.com/rust-lang/crates.io-index"
718
- checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
725
+ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
719
726
  dependencies = [
720
727
  "proc-macro2",
721
728
  "quote",
@@ -724,19 +731,19 @@ dependencies = [
724
731
 
725
732
  [[package]]
726
733
  name = "tokenizers"
727
- version = "0.5.3"
734
+ version = "0.5.4"
728
735
  dependencies = [
729
736
  "magnus",
730
737
  "onig",
731
738
  "serde",
732
- "tokenizers 0.20.0",
739
+ "tokenizers 0.21.0",
733
740
  ]
734
741
 
735
742
  [[package]]
736
743
  name = "tokenizers"
737
- version = "0.20.0"
744
+ version = "0.21.0"
738
745
  source = "registry+https://github.com/rust-lang/crates.io-index"
739
- checksum = "c8a24d7f7d6be5b9d1377418b893ab1808af0074f5d1bb2c64784452ddd2aa70"
746
+ checksum = "9ecededfed68a69bc657e486510089e255e53c3d38cc7d4d59c8742668ca2cae"
740
747
  dependencies = [
741
748
  "aho-corasick",
742
749
  "derive_builder",
@@ -766,9 +773,9 @@ dependencies = [
766
773
 
767
774
  [[package]]
768
775
  name = "unicode-ident"
769
- version = "1.0.12"
776
+ version = "1.0.14"
770
777
  source = "registry+https://github.com/rust-lang/crates.io-index"
771
- checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
778
+ checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
772
779
 
773
780
  [[package]]
774
781
  name = "unicode-normalization-alignments"
@@ -781,15 +788,15 @@ dependencies = [
781
788
 
782
789
  [[package]]
783
790
  name = "unicode-segmentation"
784
- version = "1.11.0"
791
+ version = "1.12.0"
785
792
  source = "registry+https://github.com/rust-lang/crates.io-index"
786
- checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
793
+ checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
787
794
 
788
795
  [[package]]
789
796
  name = "unicode-width"
790
- version = "0.1.13"
797
+ version = "0.2.0"
791
798
  source = "registry+https://github.com/rust-lang/crates.io-index"
792
- checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d"
799
+ checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd"
793
800
 
794
801
  [[package]]
795
802
  name = "unicode_categories"
@@ -803,11 +810,75 @@ version = "0.11.0+wasi-snapshot-preview1"
803
810
  source = "registry+https://github.com/rust-lang/crates.io-index"
804
811
  checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
805
812
 
813
+ [[package]]
814
+ name = "wasm-bindgen"
815
+ version = "0.2.99"
816
+ source = "registry+https://github.com/rust-lang/crates.io-index"
817
+ checksum = "a474f6281d1d70c17ae7aa6a613c87fce69a127e2624002df63dcb39d6cf6396"
818
+ dependencies = [
819
+ "cfg-if",
820
+ "once_cell",
821
+ "wasm-bindgen-macro",
822
+ ]
823
+
824
+ [[package]]
825
+ name = "wasm-bindgen-backend"
826
+ version = "0.2.99"
827
+ source = "registry+https://github.com/rust-lang/crates.io-index"
828
+ checksum = "5f89bb38646b4f81674e8f5c3fb81b562be1fd936d84320f3264486418519c79"
829
+ dependencies = [
830
+ "bumpalo",
831
+ "log",
832
+ "proc-macro2",
833
+ "quote",
834
+ "syn",
835
+ "wasm-bindgen-shared",
836
+ ]
837
+
838
+ [[package]]
839
+ name = "wasm-bindgen-macro"
840
+ version = "0.2.99"
841
+ source = "registry+https://github.com/rust-lang/crates.io-index"
842
+ checksum = "2cc6181fd9a7492eef6fef1f33961e3695e4579b9872a6f7c83aee556666d4fe"
843
+ dependencies = [
844
+ "quote",
845
+ "wasm-bindgen-macro-support",
846
+ ]
847
+
848
+ [[package]]
849
+ name = "wasm-bindgen-macro-support"
850
+ version = "0.2.99"
851
+ source = "registry+https://github.com/rust-lang/crates.io-index"
852
+ checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2"
853
+ dependencies = [
854
+ "proc-macro2",
855
+ "quote",
856
+ "syn",
857
+ "wasm-bindgen-backend",
858
+ "wasm-bindgen-shared",
859
+ ]
860
+
861
+ [[package]]
862
+ name = "wasm-bindgen-shared"
863
+ version = "0.2.99"
864
+ source = "registry+https://github.com/rust-lang/crates.io-index"
865
+ checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6"
866
+
867
+ [[package]]
868
+ name = "web-time"
869
+ version = "1.1.0"
870
+ source = "registry+https://github.com/rust-lang/crates.io-index"
871
+ checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
872
+ dependencies = [
873
+ "js-sys",
874
+ "wasm-bindgen",
875
+ ]
876
+
806
877
  [[package]]
807
878
  name = "windows-sys"
808
- version = "0.52.0"
879
+ version = "0.59.0"
809
880
  source = "registry+https://github.com/rust-lang/crates.io-index"
810
- checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
881
+ checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
811
882
  dependencies = [
812
883
  "windows-targets",
813
884
  ]
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "tokenizers"
3
- version = "0.5.3"
3
+ version = "0.5.4"
4
4
  license = "Apache-2.0"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -16,6 +16,6 @@ onig = { version = "6", default-features = false }
16
16
  serde = { version = "1", features = ["rc", "derive"] }
17
17
 
18
18
  [dependencies.tokenizers]
19
- version = "=0.20.0" # also update in from_pretrained.rb
19
+ version = "=0.21.0" # also update in from_pretrained.rb
20
20
  default-features = false
21
21
  features = ["progressbar", "onig", "esaxx_fast"]
@@ -3,8 +3,8 @@ use std::sync::{Arc, RwLock};
3
3
  use crate::pre_tokenizers::from_string;
4
4
  use magnus::value::Lazy;
5
5
  use magnus::{
6
- data_type_builder, function, method, Class, DataType, DataTypeFunctions, Module, Object, RClass, RModule,
7
- Ruby, TypedData,
6
+ data_type_builder, function, method, Class, DataType, DataTypeFunctions, Module, Object,
7
+ RClass, RModule, Ruby, TypedData,
8
8
  };
9
9
  use serde::{Deserialize, Serialize};
10
10
  use tk::decoders::bpe::BPEDecoder;
@@ -16,11 +16,11 @@ use tk::decoders::metaspace::{Metaspace, PrependScheme};
16
16
  use tk::decoders::strip::Strip;
17
17
  use tk::decoders::wordpiece::WordPiece;
18
18
  use tk::decoders::DecoderWrapper;
19
- use tk::Decoder;
20
19
  use tk::normalizers::replace::Replace;
20
+ use tk::Decoder;
21
21
 
22
22
  use super::utils::*;
23
- use super::{DECODERS, RbError, RbResult};
23
+ use super::{RbError, RbResult, DECODERS};
24
24
 
25
25
  #[derive(DataTypeFunctions, Clone, Deserialize, Serialize)]
26
26
  pub struct RbDecoder {
@@ -106,7 +106,7 @@ impl RbDecoder {
106
106
  }
107
107
 
108
108
  fn strip_set_content(&self, content: char) {
109
- setter!(self, Strip, content, content)
109
+ setter!(self, Strip, content, content);
110
110
  }
111
111
 
112
112
  fn strip_start(&self) -> usize {
@@ -114,7 +114,7 @@ impl RbDecoder {
114
114
  }
115
115
 
116
116
  fn strip_set_start(&self, start: usize) {
117
- setter!(self, Strip, start, start)
117
+ setter!(self, Strip, start, start);
118
118
  }
119
119
 
120
120
  fn strip_stop(&self) -> usize {
@@ -122,7 +122,7 @@ impl RbDecoder {
122
122
  }
123
123
 
124
124
  fn strip_set_stop(&self, stop: usize) {
125
- setter!(self, Strip, stop, stop)
125
+ setter!(self, Strip, stop, stop);
126
126
  }
127
127
 
128
128
  pub fn metaspace_replacement(&self) -> char {
@@ -228,7 +228,9 @@ pub struct RbReplaceDecoder {}
228
228
 
229
229
  impl RbReplaceDecoder {
230
230
  pub fn new(pattern: RbPattern, content: String) -> RbResult<RbDecoder> {
231
- Replace::new(pattern, content).map(|v| v.into()).map_err(RbError::from)
231
+ Replace::new(pattern, content)
232
+ .map(|v| v.into())
233
+ .map_err(RbError::from)
232
234
  }
233
235
  }
234
236
 
@@ -295,7 +297,8 @@ unsafe impl TypedData for RbDecoder {
295
297
  }
296
298
 
297
299
  fn data_type() -> &'static DataType {
298
- static DATA_TYPE: DataType = data_type_builder!(RbDecoder, "Tokenizers::Decoders::Decoder").build();
300
+ static DATA_TYPE: DataType =
301
+ data_type_builder!(RbDecoder, "Tokenizers::Decoders::Decoder").build();
299
302
  &DATA_TYPE
300
303
  }
301
304
 
@@ -383,18 +386,33 @@ pub fn init_decoders(ruby: &Ruby, module: &RModule) -> RbResult<()> {
383
386
  class.define_method("cleanup=", method!(RbDecoder::ctc_set_cleanup, 1))?;
384
387
  class.define_method("pad_token", method!(RbDecoder::ctc_pad_token, 0))?;
385
388
  class.define_method("pad_token=", method!(RbDecoder::ctc_set_pad_token, 1))?;
386
- class.define_method("word_delimiter_token", method!(RbDecoder::ctc_word_delimiter_token, 0))?;
387
- class.define_method("word_delimiter_token=", method!(RbDecoder::ctc_set_word_delimiter_token, 1))?;
389
+ class.define_method(
390
+ "word_delimiter_token",
391
+ method!(RbDecoder::ctc_word_delimiter_token, 0),
392
+ )?;
393
+ class.define_method(
394
+ "word_delimiter_token=",
395
+ method!(RbDecoder::ctc_set_word_delimiter_token, 1),
396
+ )?;
388
397
 
389
398
  let class = module.define_class("Fuse", decoder)?;
390
399
  class.define_singleton_method("new", function!(RbFuse::new, 0))?;
391
400
 
392
401
  let class = module.define_class("Metaspace", decoder)?;
393
402
  class.define_singleton_method("_new", function!(RbMetaspaceDecoder::new, 3))?;
394
- class.define_method("prepend_scheme", method!(RbDecoder::metaspace_prepend_scheme, 0))?;
395
- class.define_method("prepend_scheme=", method!(RbDecoder::metaspace_set_prepend_scheme, 1))?;
403
+ class.define_method(
404
+ "prepend_scheme",
405
+ method!(RbDecoder::metaspace_prepend_scheme, 0),
406
+ )?;
407
+ class.define_method(
408
+ "prepend_scheme=",
409
+ method!(RbDecoder::metaspace_set_prepend_scheme, 1),
410
+ )?;
396
411
  class.define_method("replacement", method!(RbDecoder::metaspace_replacement, 0))?;
397
- class.define_method("replacement=", method!(RbDecoder::metaspace_set_replacement, 1))?;
412
+ class.define_method(
413
+ "replacement=",
414
+ method!(RbDecoder::metaspace_set_replacement, 1),
415
+ )?;
398
416
  class.define_method("split", method!(RbDecoder::metaspace_split, 0))?;
399
417
  class.define_method("split=", method!(RbDecoder::metaspace_set_split, 1))?;
400
418
 
@@ -9,9 +9,14 @@ impl RbError {
9
9
  pub fn from(e: Box<dyn std::error::Error + Send + Sync>) -> Error {
10
10
  Error::new(error(), e.to_string())
11
11
  }
12
+
13
+ pub fn new_err(s: String) -> Error {
14
+ Error::new(error(), s)
15
+ }
12
16
  }
13
17
 
14
- static ERROR: Lazy<ExceptionClass> = Lazy::new(|ruby| ruby.get_inner(&TOKENIZERS).const_get("Error").unwrap());
18
+ static ERROR: Lazy<ExceptionClass> =
19
+ Lazy::new(|ruby| ruby.get_inner(&TOKENIZERS).const_get("Error").unwrap());
15
20
 
16
21
  fn error() -> ExceptionClass {
17
22
  Ruby::get().unwrap().get_inner(&ERROR)