tokenizers 0.5.3-x64-mingw-ucrt → 0.5.4-x64-mingw-ucrt
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Cargo.lock +154 -83
- data/LICENSE-THIRD-PARTY.txt +74 -81
- data/lib/tokenizers/3.1/tokenizers.so +0 -0
- data/lib/tokenizers/3.2/tokenizers.so +0 -0
- data/lib/tokenizers/3.3/tokenizers.so +0 -0
- data/lib/tokenizers/3.4/tokenizers.so +0 -0
- data/lib/tokenizers/from_pretrained.rb +1 -1
- data/lib/tokenizers/version.rb +1 -1
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c6bb67a2361bc8e34a9561f9b5d6afa684f9315e2a24a21519ede0e0143d4545
|
4
|
+
data.tar.gz: 3835ac6ca9ac2a2fc50b351c97663a870344a995b1bf68920a2c289f236c5568
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: de40170d02d6359ba3f53b8e8a53c552af7618654045f4888c6eb44e06c28b6ce7df99c0b31acb3c8da4bea566e4515973d43fb5fd01cde3a3c68c05f675e3b2
|
7
|
+
data.tar.gz: '082ae45456ce3543afbb3f551d21df793b3fdf23f60705c12bf9a0a1be71a8ef28d903cdebb41e1d66e5453c919f41db21cb757a4e8f2940ebff555d8623a1f2'
|
data/CHANGELOG.md
CHANGED
data/Cargo.lock
CHANGED
@@ -19,9 +19,9 @@ checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
|
|
19
19
|
|
20
20
|
[[package]]
|
21
21
|
name = "bindgen"
|
22
|
-
version = "0.69.
|
22
|
+
version = "0.69.5"
|
23
23
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
24
|
-
checksum = "
|
24
|
+
checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088"
|
25
25
|
dependencies = [
|
26
26
|
"bitflags 2.6.0",
|
27
27
|
"cexpr",
|
@@ -49,6 +49,12 @@ version = "2.6.0"
|
|
49
49
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
50
50
|
checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
|
51
51
|
|
52
|
+
[[package]]
|
53
|
+
name = "bumpalo"
|
54
|
+
version = "3.16.0"
|
55
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
56
|
+
checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
|
57
|
+
|
52
58
|
[[package]]
|
53
59
|
name = "byteorder"
|
54
60
|
version = "1.5.0"
|
@@ -57,9 +63,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
|
|
57
63
|
|
58
64
|
[[package]]
|
59
65
|
name = "cc"
|
60
|
-
version = "1.
|
66
|
+
version = "1.2.6"
|
61
67
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
62
|
-
checksum = "
|
68
|
+
checksum = "8d6dbb628b8f8555f86d0323c2eb39e3ec81901f4b83e091db8a6a76d316a333"
|
63
69
|
dependencies = [
|
64
70
|
"shlex",
|
65
71
|
]
|
@@ -92,22 +98,22 @@ dependencies = [
|
|
92
98
|
|
93
99
|
[[package]]
|
94
100
|
name = "console"
|
95
|
-
version = "0.15.
|
101
|
+
version = "0.15.10"
|
96
102
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
97
|
-
checksum = "
|
103
|
+
checksum = "ea3c6ecd8059b57859df5c69830340ed3c41d30e3da0c1cbed90a96ac853041b"
|
98
104
|
dependencies = [
|
99
105
|
"encode_unicode",
|
100
|
-
"lazy_static",
|
101
106
|
"libc",
|
107
|
+
"once_cell",
|
102
108
|
"unicode-width",
|
103
109
|
"windows-sys",
|
104
110
|
]
|
105
111
|
|
106
112
|
[[package]]
|
107
113
|
name = "crossbeam-deque"
|
108
|
-
version = "0.8.
|
114
|
+
version = "0.8.6"
|
109
115
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
110
|
-
checksum = "
|
116
|
+
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
|
111
117
|
dependencies = [
|
112
118
|
"crossbeam-epoch",
|
113
119
|
"crossbeam-utils",
|
@@ -124,9 +130,9 @@ dependencies = [
|
|
124
130
|
|
125
131
|
[[package]]
|
126
132
|
name = "crossbeam-utils"
|
127
|
-
version = "0.8.
|
133
|
+
version = "0.8.21"
|
128
134
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
129
|
-
checksum = "
|
135
|
+
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
|
130
136
|
|
131
137
|
[[package]]
|
132
138
|
name = "darling"
|
@@ -165,18 +171,18 @@ dependencies = [
|
|
165
171
|
|
166
172
|
[[package]]
|
167
173
|
name = "derive_builder"
|
168
|
-
version = "0.20.
|
174
|
+
version = "0.20.2"
|
169
175
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
170
|
-
checksum = "
|
176
|
+
checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947"
|
171
177
|
dependencies = [
|
172
178
|
"derive_builder_macro",
|
173
179
|
]
|
174
180
|
|
175
181
|
[[package]]
|
176
182
|
name = "derive_builder_core"
|
177
|
-
version = "0.20.
|
183
|
+
version = "0.20.2"
|
178
184
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
179
|
-
checksum = "
|
185
|
+
checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8"
|
180
186
|
dependencies = [
|
181
187
|
"darling",
|
182
188
|
"proc-macro2",
|
@@ -186,9 +192,9 @@ dependencies = [
|
|
186
192
|
|
187
193
|
[[package]]
|
188
194
|
name = "derive_builder_macro"
|
189
|
-
version = "0.20.
|
195
|
+
version = "0.20.2"
|
190
196
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
191
|
-
checksum = "
|
197
|
+
checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
|
192
198
|
dependencies = [
|
193
199
|
"derive_builder_core",
|
194
200
|
"syn",
|
@@ -202,9 +208,9 @@ checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
|
|
202
208
|
|
203
209
|
[[package]]
|
204
210
|
name = "encode_unicode"
|
205
|
-
version = "0.
|
211
|
+
version = "1.0.0"
|
206
212
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
207
|
-
checksum = "
|
213
|
+
checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
|
208
214
|
|
209
215
|
[[package]]
|
210
216
|
name = "esaxx-rs"
|
@@ -234,9 +240,9 @@ dependencies = [
|
|
234
240
|
|
235
241
|
[[package]]
|
236
242
|
name = "glob"
|
237
|
-
version = "0.3.
|
243
|
+
version = "0.3.2"
|
238
244
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
239
|
-
checksum = "
|
245
|
+
checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
|
240
246
|
|
241
247
|
[[package]]
|
242
248
|
name = "ident_case"
|
@@ -246,24 +252,15 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
|
|
246
252
|
|
247
253
|
[[package]]
|
248
254
|
name = "indicatif"
|
249
|
-
version = "0.17.
|
255
|
+
version = "0.17.9"
|
250
256
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
251
|
-
checksum = "
|
257
|
+
checksum = "cbf675b85ed934d3c67b5c5469701eec7db22689d0a2139d856e0925fa28b281"
|
252
258
|
dependencies = [
|
253
259
|
"console",
|
254
|
-
"instant",
|
255
260
|
"number_prefix",
|
256
261
|
"portable-atomic",
|
257
262
|
"unicode-width",
|
258
|
-
|
259
|
-
|
260
|
-
[[package]]
|
261
|
-
name = "instant"
|
262
|
-
version = "0.1.13"
|
263
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
264
|
-
checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
|
265
|
-
dependencies = [
|
266
|
-
"cfg-if",
|
263
|
+
"web-time",
|
267
264
|
]
|
268
265
|
|
269
266
|
[[package]]
|
@@ -286,9 +283,19 @@ dependencies = [
|
|
286
283
|
|
287
284
|
[[package]]
|
288
285
|
name = "itoa"
|
289
|
-
version = "1.0.
|
286
|
+
version = "1.0.14"
|
290
287
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
291
|
-
checksum = "
|
288
|
+
checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674"
|
289
|
+
|
290
|
+
[[package]]
|
291
|
+
name = "js-sys"
|
292
|
+
version = "0.3.76"
|
293
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
294
|
+
checksum = "6717b6b5b077764fb5966237269cb3c64edddde4b14ce42647430a78ced9e7b7"
|
295
|
+
dependencies = [
|
296
|
+
"once_cell",
|
297
|
+
"wasm-bindgen",
|
298
|
+
]
|
292
299
|
|
293
300
|
[[package]]
|
294
301
|
name = "lazy_static"
|
@@ -304,15 +311,15 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
|
304
311
|
|
305
312
|
[[package]]
|
306
313
|
name = "libc"
|
307
|
-
version = "0.2.
|
314
|
+
version = "0.2.169"
|
308
315
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
309
|
-
checksum = "
|
316
|
+
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
|
310
317
|
|
311
318
|
[[package]]
|
312
319
|
name = "libloading"
|
313
|
-
version = "0.8.
|
320
|
+
version = "0.8.6"
|
314
321
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
315
|
-
checksum = "
|
322
|
+
checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34"
|
316
323
|
dependencies = [
|
317
324
|
"cfg-if",
|
318
325
|
"windows-targets",
|
@@ -414,9 +421,9 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
|
|
414
421
|
|
415
422
|
[[package]]
|
416
423
|
name = "once_cell"
|
417
|
-
version = "1.
|
424
|
+
version = "1.20.2"
|
418
425
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
419
|
-
checksum = "
|
426
|
+
checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
|
420
427
|
|
421
428
|
[[package]]
|
422
429
|
name = "onig"
|
@@ -448,15 +455,15 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
|
|
448
455
|
|
449
456
|
[[package]]
|
450
457
|
name = "pkg-config"
|
451
|
-
version = "0.3.
|
458
|
+
version = "0.3.31"
|
452
459
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
453
|
-
checksum = "
|
460
|
+
checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2"
|
454
461
|
|
455
462
|
[[package]]
|
456
463
|
name = "portable-atomic"
|
457
|
-
version = "1.
|
464
|
+
version = "1.10.0"
|
458
465
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
459
|
-
checksum = "
|
466
|
+
checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6"
|
460
467
|
|
461
468
|
[[package]]
|
462
469
|
name = "ppv-lite86"
|
@@ -469,18 +476,18 @@ dependencies = [
|
|
469
476
|
|
470
477
|
[[package]]
|
471
478
|
name = "proc-macro2"
|
472
|
-
version = "1.0.
|
479
|
+
version = "1.0.92"
|
473
480
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
474
|
-
checksum = "
|
481
|
+
checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0"
|
475
482
|
dependencies = [
|
476
483
|
"unicode-ident",
|
477
484
|
]
|
478
485
|
|
479
486
|
[[package]]
|
480
487
|
name = "quote"
|
481
|
-
version = "1.0.
|
488
|
+
version = "1.0.38"
|
482
489
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
483
|
-
checksum = "
|
490
|
+
checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc"
|
484
491
|
dependencies = [
|
485
492
|
"proc-macro2",
|
486
493
|
]
|
@@ -548,18 +555,18 @@ dependencies = [
|
|
548
555
|
|
549
556
|
[[package]]
|
550
557
|
name = "rb-sys"
|
551
|
-
version = "0.9.
|
558
|
+
version = "0.9.105"
|
552
559
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
553
|
-
checksum = "
|
560
|
+
checksum = "4b3a1f3ce8e7c36d777d52fe7a99039fe4fea7c8ec355a4c4f3a17f92a14029f"
|
554
561
|
dependencies = [
|
555
562
|
"rb-sys-build",
|
556
563
|
]
|
557
564
|
|
558
565
|
[[package]]
|
559
566
|
name = "rb-sys-build"
|
560
|
-
version = "0.9.
|
567
|
+
version = "0.9.105"
|
561
568
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
562
|
-
checksum = "
|
569
|
+
checksum = "3e6b246c29c0809e1cbe60a1ba9e093da72a4676d02adc68469297d1e589bbf0"
|
563
570
|
dependencies = [
|
564
571
|
"bindgen",
|
565
572
|
"lazy_static",
|
@@ -578,9 +585,9 @@ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
|
|
578
585
|
|
579
586
|
[[package]]
|
580
587
|
name = "regex"
|
581
|
-
version = "1.
|
588
|
+
version = "1.11.1"
|
582
589
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
583
|
-
checksum = "
|
590
|
+
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
|
584
591
|
dependencies = [
|
585
592
|
"aho-corasick",
|
586
593
|
"memchr",
|
@@ -590,9 +597,9 @@ dependencies = [
|
|
590
597
|
|
591
598
|
[[package]]
|
592
599
|
name = "regex-automata"
|
593
|
-
version = "0.4.
|
600
|
+
version = "0.4.9"
|
594
601
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
595
|
-
checksum = "
|
602
|
+
checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
|
596
603
|
dependencies = [
|
597
604
|
"aho-corasick",
|
598
605
|
"memchr",
|
@@ -601,9 +608,9 @@ dependencies = [
|
|
601
608
|
|
602
609
|
[[package]]
|
603
610
|
name = "regex-syntax"
|
604
|
-
version = "0.8.
|
611
|
+
version = "0.8.5"
|
605
612
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
606
|
-
checksum = "
|
613
|
+
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
|
607
614
|
|
608
615
|
[[package]]
|
609
616
|
name = "rustc-hash"
|
@@ -625,18 +632,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
|
|
625
632
|
|
626
633
|
[[package]]
|
627
634
|
name = "serde"
|
628
|
-
version = "1.0.
|
635
|
+
version = "1.0.217"
|
629
636
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
630
|
-
checksum = "
|
637
|
+
checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70"
|
631
638
|
dependencies = [
|
632
639
|
"serde_derive",
|
633
640
|
]
|
634
641
|
|
635
642
|
[[package]]
|
636
643
|
name = "serde_derive"
|
637
|
-
version = "1.0.
|
644
|
+
version = "1.0.217"
|
638
645
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
639
|
-
checksum = "
|
646
|
+
checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0"
|
640
647
|
dependencies = [
|
641
648
|
"proc-macro2",
|
642
649
|
"quote",
|
@@ -645,9 +652,9 @@ dependencies = [
|
|
645
652
|
|
646
653
|
[[package]]
|
647
654
|
name = "serde_json"
|
648
|
-
version = "1.0.
|
655
|
+
version = "1.0.134"
|
649
656
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
650
|
-
checksum = "
|
657
|
+
checksum = "d00f4175c42ee48b15416f6193a959ba3a0d67fc699a0db9ad12df9f83991c7d"
|
651
658
|
dependencies = [
|
652
659
|
"itoa",
|
653
660
|
"memchr",
|
@@ -693,9 +700,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
|
693
700
|
|
694
701
|
[[package]]
|
695
702
|
name = "syn"
|
696
|
-
version = "2.0.
|
703
|
+
version = "2.0.93"
|
697
704
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
698
|
-
checksum = "
|
705
|
+
checksum = "9c786062daee0d6db1132800e623df74274a0a87322d8e183338e01b3d98d058"
|
699
706
|
dependencies = [
|
700
707
|
"proc-macro2",
|
701
708
|
"quote",
|
@@ -704,18 +711,18 @@ dependencies = [
|
|
704
711
|
|
705
712
|
[[package]]
|
706
713
|
name = "thiserror"
|
707
|
-
version = "1.0.
|
714
|
+
version = "1.0.69"
|
708
715
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
709
|
-
checksum = "
|
716
|
+
checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
|
710
717
|
dependencies = [
|
711
718
|
"thiserror-impl",
|
712
719
|
]
|
713
720
|
|
714
721
|
[[package]]
|
715
722
|
name = "thiserror-impl"
|
716
|
-
version = "1.0.
|
723
|
+
version = "1.0.69"
|
717
724
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
718
|
-
checksum = "
|
725
|
+
checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
|
719
726
|
dependencies = [
|
720
727
|
"proc-macro2",
|
721
728
|
"quote",
|
@@ -724,19 +731,19 @@ dependencies = [
|
|
724
731
|
|
725
732
|
[[package]]
|
726
733
|
name = "tokenizers"
|
727
|
-
version = "0.5.
|
734
|
+
version = "0.5.4"
|
728
735
|
dependencies = [
|
729
736
|
"magnus",
|
730
737
|
"onig",
|
731
738
|
"serde",
|
732
|
-
"tokenizers 0.
|
739
|
+
"tokenizers 0.21.0",
|
733
740
|
]
|
734
741
|
|
735
742
|
[[package]]
|
736
743
|
name = "tokenizers"
|
737
|
-
version = "0.
|
744
|
+
version = "0.21.0"
|
738
745
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
739
|
-
checksum = "
|
746
|
+
checksum = "9ecededfed68a69bc657e486510089e255e53c3d38cc7d4d59c8742668ca2cae"
|
740
747
|
dependencies = [
|
741
748
|
"aho-corasick",
|
742
749
|
"derive_builder",
|
@@ -766,9 +773,9 @@ dependencies = [
|
|
766
773
|
|
767
774
|
[[package]]
|
768
775
|
name = "unicode-ident"
|
769
|
-
version = "1.0.
|
776
|
+
version = "1.0.14"
|
770
777
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
771
|
-
checksum = "
|
778
|
+
checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
|
772
779
|
|
773
780
|
[[package]]
|
774
781
|
name = "unicode-normalization-alignments"
|
@@ -781,15 +788,15 @@ dependencies = [
|
|
781
788
|
|
782
789
|
[[package]]
|
783
790
|
name = "unicode-segmentation"
|
784
|
-
version = "1.
|
791
|
+
version = "1.12.0"
|
785
792
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
786
|
-
checksum = "
|
793
|
+
checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
|
787
794
|
|
788
795
|
[[package]]
|
789
796
|
name = "unicode-width"
|
790
|
-
version = "0.
|
797
|
+
version = "0.2.0"
|
791
798
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
792
|
-
checksum = "
|
799
|
+
checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd"
|
793
800
|
|
794
801
|
[[package]]
|
795
802
|
name = "unicode_categories"
|
@@ -803,11 +810,75 @@ version = "0.11.0+wasi-snapshot-preview1"
|
|
803
810
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
804
811
|
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
805
812
|
|
813
|
+
[[package]]
|
814
|
+
name = "wasm-bindgen"
|
815
|
+
version = "0.2.99"
|
816
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
817
|
+
checksum = "a474f6281d1d70c17ae7aa6a613c87fce69a127e2624002df63dcb39d6cf6396"
|
818
|
+
dependencies = [
|
819
|
+
"cfg-if",
|
820
|
+
"once_cell",
|
821
|
+
"wasm-bindgen-macro",
|
822
|
+
]
|
823
|
+
|
824
|
+
[[package]]
|
825
|
+
name = "wasm-bindgen-backend"
|
826
|
+
version = "0.2.99"
|
827
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
828
|
+
checksum = "5f89bb38646b4f81674e8f5c3fb81b562be1fd936d84320f3264486418519c79"
|
829
|
+
dependencies = [
|
830
|
+
"bumpalo",
|
831
|
+
"log",
|
832
|
+
"proc-macro2",
|
833
|
+
"quote",
|
834
|
+
"syn",
|
835
|
+
"wasm-bindgen-shared",
|
836
|
+
]
|
837
|
+
|
838
|
+
[[package]]
|
839
|
+
name = "wasm-bindgen-macro"
|
840
|
+
version = "0.2.99"
|
841
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
842
|
+
checksum = "2cc6181fd9a7492eef6fef1f33961e3695e4579b9872a6f7c83aee556666d4fe"
|
843
|
+
dependencies = [
|
844
|
+
"quote",
|
845
|
+
"wasm-bindgen-macro-support",
|
846
|
+
]
|
847
|
+
|
848
|
+
[[package]]
|
849
|
+
name = "wasm-bindgen-macro-support"
|
850
|
+
version = "0.2.99"
|
851
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
852
|
+
checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2"
|
853
|
+
dependencies = [
|
854
|
+
"proc-macro2",
|
855
|
+
"quote",
|
856
|
+
"syn",
|
857
|
+
"wasm-bindgen-backend",
|
858
|
+
"wasm-bindgen-shared",
|
859
|
+
]
|
860
|
+
|
861
|
+
[[package]]
|
862
|
+
name = "wasm-bindgen-shared"
|
863
|
+
version = "0.2.99"
|
864
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
865
|
+
checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6"
|
866
|
+
|
867
|
+
[[package]]
|
868
|
+
name = "web-time"
|
869
|
+
version = "1.1.0"
|
870
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
871
|
+
checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
|
872
|
+
dependencies = [
|
873
|
+
"js-sys",
|
874
|
+
"wasm-bindgen",
|
875
|
+
]
|
876
|
+
|
806
877
|
[[package]]
|
807
878
|
name = "windows-sys"
|
808
|
-
version = "0.
|
879
|
+
version = "0.59.0"
|
809
880
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
810
|
-
checksum = "
|
881
|
+
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
|
811
882
|
dependencies = [
|
812
883
|
"windows-targets",
|
813
884
|
]
|
data/LICENSE-THIRD-PARTY.txt
CHANGED
@@ -10,7 +10,7 @@ base64 v0.13.1
|
|
10
10
|
https://github.com/marshallpierce/rust-base64
|
11
11
|
MIT/Apache-2.0
|
12
12
|
|
13
|
-
bindgen v0.69.
|
13
|
+
bindgen v0.69.5
|
14
14
|
https://rust-lang.github.io/rust-bindgen/
|
15
15
|
BSD-3-Clause
|
16
16
|
|
@@ -26,7 +26,7 @@ byteorder v1.5.0
|
|
26
26
|
https://github.com/BurntSushi/byteorder
|
27
27
|
Unlicense OR MIT
|
28
28
|
|
29
|
-
cc v1.
|
29
|
+
cc v1.2.6
|
30
30
|
https://github.com/rust-lang/cc-rs
|
31
31
|
MIT OR Apache-2.0
|
32
32
|
|
@@ -42,11 +42,11 @@ clang-sys v1.8.1
|
|
42
42
|
https://github.com/KyleMayes/clang-sys
|
43
43
|
Apache-2.0
|
44
44
|
|
45
|
-
console v0.15.
|
45
|
+
console v0.15.10
|
46
46
|
https://github.com/console-rs/console
|
47
47
|
MIT
|
48
48
|
|
49
|
-
crossbeam-deque v0.8.
|
49
|
+
crossbeam-deque v0.8.6
|
50
50
|
https://github.com/crossbeam-rs/crossbeam/tree/master/crossbeam-deque
|
51
51
|
MIT OR Apache-2.0
|
52
52
|
|
@@ -54,7 +54,7 @@ crossbeam-epoch v0.9.18
|
|
54
54
|
https://github.com/crossbeam-rs/crossbeam/tree/master/crossbeam-epoch
|
55
55
|
MIT OR Apache-2.0
|
56
56
|
|
57
|
-
crossbeam-utils v0.8.
|
57
|
+
crossbeam-utils v0.8.21
|
58
58
|
https://github.com/crossbeam-rs/crossbeam/tree/master/crossbeam-utils
|
59
59
|
MIT OR Apache-2.0
|
60
60
|
|
@@ -70,15 +70,15 @@ darling_macro v0.20.10
|
|
70
70
|
https://github.com/TedDriggs/darling
|
71
71
|
MIT
|
72
72
|
|
73
|
-
derive_builder v0.20.
|
73
|
+
derive_builder v0.20.2
|
74
74
|
https://github.com/colin-kiegel/rust-derive-builder
|
75
75
|
MIT OR Apache-2.0
|
76
76
|
|
77
|
-
derive_builder_core v0.20.
|
77
|
+
derive_builder_core v0.20.2
|
78
78
|
https://github.com/colin-kiegel/rust-derive-builder
|
79
79
|
MIT OR Apache-2.0
|
80
80
|
|
81
|
-
derive_builder_macro v0.20.
|
81
|
+
derive_builder_macro v0.20.2
|
82
82
|
https://github.com/colin-kiegel/rust-derive-builder
|
83
83
|
MIT OR Apache-2.0
|
84
84
|
|
@@ -86,9 +86,9 @@ either v1.13.0
|
|
86
86
|
https://github.com/rayon-rs/either
|
87
87
|
MIT OR Apache-2.0
|
88
88
|
|
89
|
-
encode_unicode
|
89
|
+
encode_unicode v1.0.0
|
90
90
|
https://github.com/tormol/encode_unicode
|
91
|
-
|
91
|
+
Apache-2.0 OR MIT
|
92
92
|
|
93
93
|
esaxx-rs v0.1.10
|
94
94
|
https://github.com/Narsil/esaxx-rs
|
@@ -102,7 +102,7 @@ getrandom v0.2.15
|
|
102
102
|
https://github.com/rust-random/getrandom
|
103
103
|
MIT OR Apache-2.0
|
104
104
|
|
105
|
-
glob v0.3.
|
105
|
+
glob v0.3.2
|
106
106
|
https://github.com/rust-lang/glob
|
107
107
|
MIT OR Apache-2.0
|
108
108
|
|
@@ -110,7 +110,7 @@ ident_case v1.0.1
|
|
110
110
|
https://github.com/TedDriggs/ident_case
|
111
111
|
MIT/Apache-2.0
|
112
112
|
|
113
|
-
indicatif v0.17.
|
113
|
+
indicatif v0.17.9
|
114
114
|
https://github.com/console-rs/indicatif
|
115
115
|
MIT
|
116
116
|
|
@@ -122,7 +122,7 @@ itertools v0.12.1
|
|
122
122
|
https://github.com/rust-itertools/itertools
|
123
123
|
MIT OR Apache-2.0
|
124
124
|
|
125
|
-
itoa v1.0.
|
125
|
+
itoa v1.0.14
|
126
126
|
https://github.com/dtolnay/itoa
|
127
127
|
MIT OR Apache-2.0
|
128
128
|
|
@@ -134,11 +134,11 @@ lazycell v1.3.0
|
|
134
134
|
https://github.com/indiv0/lazycell
|
135
135
|
MIT/Apache-2.0
|
136
136
|
|
137
|
-
libc v0.2.
|
137
|
+
libc v0.2.169
|
138
138
|
https://github.com/rust-lang/libc
|
139
139
|
MIT OR Apache-2.0
|
140
140
|
|
141
|
-
libloading v0.8.
|
141
|
+
libloading v0.8.6
|
142
142
|
https://github.com/nagisa/rust_libloading/
|
143
143
|
ISC
|
144
144
|
|
@@ -186,7 +186,7 @@ number_prefix v0.4.0
|
|
186
186
|
https://github.com/ogham/rust-number-prefix
|
187
187
|
MIT
|
188
188
|
|
189
|
-
once_cell v1.
|
189
|
+
once_cell v1.20.2
|
190
190
|
https://github.com/matklad/once_cell
|
191
191
|
MIT OR Apache-2.0
|
192
192
|
|
@@ -202,11 +202,11 @@ paste v1.0.15
|
|
202
202
|
https://github.com/dtolnay/paste
|
203
203
|
MIT OR Apache-2.0
|
204
204
|
|
205
|
-
pkg-config v0.3.
|
205
|
+
pkg-config v0.3.31
|
206
206
|
https://github.com/rust-lang/pkg-config-rs
|
207
207
|
MIT OR Apache-2.0
|
208
208
|
|
209
|
-
portable-atomic v1.
|
209
|
+
portable-atomic v1.10.0
|
210
210
|
https://github.com/taiki-e/portable-atomic
|
211
211
|
Apache-2.0 OR MIT
|
212
212
|
|
@@ -214,11 +214,11 @@ ppv-lite86 v0.2.20
|
|
214
214
|
https://github.com/cryptocorrosion/cryptocorrosion
|
215
215
|
MIT/Apache-2.0
|
216
216
|
|
217
|
-
proc-macro2 v1.0.
|
217
|
+
proc-macro2 v1.0.92
|
218
218
|
https://github.com/dtolnay/proc-macro2
|
219
219
|
MIT OR Apache-2.0
|
220
220
|
|
221
|
-
quote v1.0.
|
221
|
+
quote v1.0.38
|
222
222
|
https://github.com/dtolnay/quote
|
223
223
|
MIT OR Apache-2.0
|
224
224
|
|
@@ -246,11 +246,11 @@ rayon-core v1.12.1
|
|
246
246
|
https://github.com/rayon-rs/rayon
|
247
247
|
MIT OR Apache-2.0
|
248
248
|
|
249
|
-
rb-sys v0.9.
|
249
|
+
rb-sys v0.9.105
|
250
250
|
https://github.com/oxidize-rb/rb-sys
|
251
251
|
MIT OR Apache-2.0
|
252
252
|
|
253
|
-
rb-sys-build v0.9.
|
253
|
+
rb-sys-build v0.9.105
|
254
254
|
https://github.com/oxidize-rb/rb-sys
|
255
255
|
MIT OR Apache-2.0
|
256
256
|
|
@@ -258,15 +258,15 @@ rb-sys-env v0.1.2
|
|
258
258
|
https://github.com/oxidize-rb/rb-sys
|
259
259
|
MIT OR Apache-2.0
|
260
260
|
|
261
|
-
regex v1.
|
261
|
+
regex v1.11.1
|
262
262
|
https://github.com/rust-lang/regex
|
263
263
|
MIT OR Apache-2.0
|
264
264
|
|
265
|
-
regex-automata v0.4.
|
265
|
+
regex-automata v0.4.9
|
266
266
|
https://github.com/rust-lang/regex/tree/master/regex-automata
|
267
267
|
MIT OR Apache-2.0
|
268
268
|
|
269
|
-
regex-syntax v0.8.
|
269
|
+
regex-syntax v0.8.5
|
270
270
|
https://github.com/rust-lang/regex/tree/master/regex-syntax
|
271
271
|
MIT OR Apache-2.0
|
272
272
|
|
@@ -282,15 +282,15 @@ seq-macro v0.3.5
|
|
282
282
|
https://github.com/dtolnay/seq-macro
|
283
283
|
MIT OR Apache-2.0
|
284
284
|
|
285
|
-
serde v1.0.
|
285
|
+
serde v1.0.217
|
286
286
|
https://serde.rs
|
287
287
|
MIT OR Apache-2.0
|
288
288
|
|
289
|
-
serde_derive v1.0.
|
289
|
+
serde_derive v1.0.217
|
290
290
|
https://serde.rs
|
291
291
|
MIT OR Apache-2.0
|
292
292
|
|
293
|
-
serde_json v1.0.
|
293
|
+
serde_json v1.0.134
|
294
294
|
https://github.com/serde-rs/json
|
295
295
|
MIT OR Apache-2.0
|
296
296
|
|
@@ -314,35 +314,35 @@ strsim v0.11.1
|
|
314
314
|
https://github.com/rapidfuzz/strsim-rs
|
315
315
|
MIT
|
316
316
|
|
317
|
-
syn v2.0.
|
317
|
+
syn v2.0.93
|
318
318
|
https://github.com/dtolnay/syn
|
319
319
|
MIT OR Apache-2.0
|
320
320
|
|
321
|
-
thiserror v1.0.
|
321
|
+
thiserror v1.0.69
|
322
322
|
https://github.com/dtolnay/thiserror
|
323
323
|
MIT OR Apache-2.0
|
324
324
|
|
325
|
-
thiserror-impl v1.0.
|
325
|
+
thiserror-impl v1.0.69
|
326
326
|
https://github.com/dtolnay/thiserror
|
327
327
|
MIT OR Apache-2.0
|
328
328
|
|
329
|
-
tokenizers v0.
|
329
|
+
tokenizers v0.21.0
|
330
330
|
https://github.com/huggingface/tokenizers
|
331
331
|
Apache-2.0
|
332
332
|
|
333
|
-
unicode-ident v1.0.
|
333
|
+
unicode-ident v1.0.14
|
334
334
|
https://github.com/dtolnay/unicode-ident
|
335
|
-
(MIT OR Apache-2.0) AND Unicode-
|
335
|
+
(MIT OR Apache-2.0) AND Unicode-3.0
|
336
336
|
|
337
337
|
unicode-normalization-alignments v0.1.12
|
338
338
|
https://github.com/n1t0/unicode-normalization
|
339
339
|
MIT/Apache-2.0
|
340
340
|
|
341
|
-
unicode-segmentation v1.
|
341
|
+
unicode-segmentation v1.12.0
|
342
342
|
https://github.com/unicode-rs/unicode-segmentation
|
343
|
-
MIT
|
343
|
+
MIT OR Apache-2.0
|
344
344
|
|
345
|
-
unicode-width v0.
|
345
|
+
unicode-width v0.2.0
|
346
346
|
https://github.com/unicode-rs/unicode-width
|
347
347
|
MIT OR Apache-2.0
|
348
348
|
|
@@ -350,7 +350,7 @@ unicode_categories v0.1.1
|
|
350
350
|
https://github.com/swgillespie/unicode-categories
|
351
351
|
MIT OR Apache-2.0
|
352
352
|
|
353
|
-
windows-sys v0.
|
353
|
+
windows-sys v0.59.0
|
354
354
|
https://github.com/microsoft/windows-rs
|
355
355
|
MIT OR Apache-2.0
|
356
356
|
|
@@ -15619,52 +15619,45 @@ DEALINGS IN THE SOFTWARE.
|
|
15619
15619
|
unicode-ident LICENSE-UNICODE
|
15620
15620
|
================================================================================
|
15621
15621
|
|
15622
|
-
UNICODE
|
15623
|
-
|
15624
|
-
See Terms of Use <https://www.unicode.org/copyright.html>
|
15625
|
-
for definitions of Unicode Inc.’s Data Files and Software.
|
15626
|
-
|
15627
|
-
NOTICE TO USER: Carefully read the following legal agreement.
|
15628
|
-
BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
|
15629
|
-
DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
|
15630
|
-
YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
|
15631
|
-
TERMS AND CONDITIONS OF THIS AGREEMENT.
|
15632
|
-
IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
|
15633
|
-
THE DATA FILES OR SOFTWARE.
|
15622
|
+
UNICODE LICENSE V3
|
15634
15623
|
|
15635
15624
|
COPYRIGHT AND PERMISSION NOTICE
|
15636
15625
|
|
15637
|
-
Copyright © 1991-
|
15638
|
-
|
15639
|
-
|
15640
|
-
|
15641
|
-
|
15642
|
-
|
15643
|
-
|
15644
|
-
|
15645
|
-
|
15646
|
-
|
15647
|
-
|
15648
|
-
|
15649
|
-
|
15650
|
-
|
15651
|
-
|
15652
|
-
|
15653
|
-
|
15654
|
-
|
15655
|
-
|
15656
|
-
|
15657
|
-
|
15658
|
-
|
15659
|
-
|
15660
|
-
|
15661
|
-
|
15662
|
-
|
15663
|
-
|
15664
|
-
|
15665
|
-
|
15666
|
-
|
15667
|
-
|
15626
|
+
Copyright © 1991-2023 Unicode, Inc.
|
15627
|
+
|
15628
|
+
NOTICE TO USER: Carefully read the following legal agreement. BY
|
15629
|
+
DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
|
15630
|
+
SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
|
15631
|
+
TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT
|
15632
|
+
DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.
|
15633
|
+
|
15634
|
+
Permission is hereby granted, free of charge, to any person obtaining a
|
15635
|
+
copy of data files and any associated documentation (the "Data Files") or
|
15636
|
+
software and any associated documentation (the "Software") to deal in the
|
15637
|
+
Data Files or Software without restriction, including without limitation
|
15638
|
+
the rights to use, copy, modify, merge, publish, distribute, and/or sell
|
15639
|
+
copies of the Data Files or Software, and to permit persons to whom the
|
15640
|
+
Data Files or Software are furnished to do so, provided that either (a)
|
15641
|
+
this copyright and permission notice appear with all copies of the Data
|
15642
|
+
Files or Software, or (b) this copyright and permission notice appear in
|
15643
|
+
associated Documentation.
|
15644
|
+
|
15645
|
+
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
|
15646
|
+
KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
15647
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
|
15648
|
+
THIRD PARTY RIGHTS.
|
15649
|
+
|
15650
|
+
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
|
15651
|
+
BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
|
15652
|
+
OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
15653
|
+
WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
|
15654
|
+
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA
|
15655
|
+
FILES OR SOFTWARE.
|
15656
|
+
|
15657
|
+
Except as contained in this notice, the name of a copyright holder shall
|
15658
|
+
not be used in advertising or otherwise to promote the sale, use or other
|
15659
|
+
dealings in these Data Files or Software without prior written
|
15660
|
+
authorization of the copyright holder.
|
15668
15661
|
|
15669
15662
|
================================================================================
|
15670
15663
|
unicode-normalization-alignments LICENSE-APACHE
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/tokenizers/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tokenizers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.4
|
5
5
|
platform: x64-mingw-ucrt
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-12-29 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|
@@ -26,6 +26,7 @@ files:
|
|
26
26
|
- lib/tokenizers/3.1/tokenizers.so
|
27
27
|
- lib/tokenizers/3.2/tokenizers.so
|
28
28
|
- lib/tokenizers/3.3/tokenizers.so
|
29
|
+
- lib/tokenizers/3.4/tokenizers.so
|
29
30
|
- lib/tokenizers/added_token.rb
|
30
31
|
- lib/tokenizers/char_bpe_tokenizer.rb
|
31
32
|
- lib/tokenizers/decoders/bpe_decoder.rb
|
@@ -71,14 +72,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
71
72
|
version: '3.1'
|
72
73
|
- - "<"
|
73
74
|
- !ruby/object:Gem::Version
|
74
|
-
version: 3.
|
75
|
+
version: 3.5.dev
|
75
76
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
76
77
|
requirements:
|
77
78
|
- - ">="
|
78
79
|
- !ruby/object:Gem::Version
|
79
80
|
version: '0'
|
80
81
|
requirements: []
|
81
|
-
rubygems_version: 3.
|
82
|
+
rubygems_version: 3.5.23
|
82
83
|
signing_key:
|
83
84
|
specification_version: 4
|
84
85
|
summary: Fast state-of-the-art tokenizers for Ruby
|