tokenizers 0.5.3-x86_64-linux → 0.5.4-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Cargo.lock +154 -83
- data/LICENSE-THIRD-PARTY.txt +71 -78
- data/lib/tokenizers/3.1/tokenizers.so +0 -0
- data/lib/tokenizers/3.2/tokenizers.so +0 -0
- data/lib/tokenizers/3.3/tokenizers.so +0 -0
- data/lib/tokenizers/3.4/tokenizers.so +0 -0
- data/lib/tokenizers/from_pretrained.rb +1 -1
- data/lib/tokenizers/version.rb +1 -1
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 344f9898cfce88864858a0a3e9b5e1b5efa0299854dfdd848698858e9dc99593
|
4
|
+
data.tar.gz: b7f552e8277346d344e66b5b0fd5366504bce42b946c32b6457d543f6db461d8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0aeb68e542ed2f52dc1ada8f8c342e11f26de342f570394dcdb4b2b2b330dbefa27d261f3367ae2ff614b914bd289cc24e4aa42b9b82ec0836d242b619eb9cba
|
7
|
+
data.tar.gz: 28fab166cf68f6115b4a01d11baddb2ee04c255be377b736f9611843eff2aed9e6a780b4e3cdf5178a534b4c36944c6badccf21ad87e597b594699a5e715078b
|
data/CHANGELOG.md
CHANGED
data/Cargo.lock
CHANGED
@@ -19,9 +19,9 @@ checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
|
|
19
19
|
|
20
20
|
[[package]]
|
21
21
|
name = "bindgen"
|
22
|
-
version = "0.69.
|
22
|
+
version = "0.69.5"
|
23
23
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
24
|
-
checksum = "
|
24
|
+
checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088"
|
25
25
|
dependencies = [
|
26
26
|
"bitflags 2.6.0",
|
27
27
|
"cexpr",
|
@@ -49,6 +49,12 @@ version = "2.6.0"
|
|
49
49
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
50
50
|
checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
|
51
51
|
|
52
|
+
[[package]]
|
53
|
+
name = "bumpalo"
|
54
|
+
version = "3.16.0"
|
55
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
56
|
+
checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
|
57
|
+
|
52
58
|
[[package]]
|
53
59
|
name = "byteorder"
|
54
60
|
version = "1.5.0"
|
@@ -57,9 +63,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
|
|
57
63
|
|
58
64
|
[[package]]
|
59
65
|
name = "cc"
|
60
|
-
version = "1.
|
66
|
+
version = "1.2.6"
|
61
67
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
62
|
-
checksum = "
|
68
|
+
checksum = "8d6dbb628b8f8555f86d0323c2eb39e3ec81901f4b83e091db8a6a76d316a333"
|
63
69
|
dependencies = [
|
64
70
|
"shlex",
|
65
71
|
]
|
@@ -92,22 +98,22 @@ dependencies = [
|
|
92
98
|
|
93
99
|
[[package]]
|
94
100
|
name = "console"
|
95
|
-
version = "0.15.
|
101
|
+
version = "0.15.10"
|
96
102
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
97
|
-
checksum = "
|
103
|
+
checksum = "ea3c6ecd8059b57859df5c69830340ed3c41d30e3da0c1cbed90a96ac853041b"
|
98
104
|
dependencies = [
|
99
105
|
"encode_unicode",
|
100
|
-
"lazy_static",
|
101
106
|
"libc",
|
107
|
+
"once_cell",
|
102
108
|
"unicode-width",
|
103
109
|
"windows-sys",
|
104
110
|
]
|
105
111
|
|
106
112
|
[[package]]
|
107
113
|
name = "crossbeam-deque"
|
108
|
-
version = "0.8.
|
114
|
+
version = "0.8.6"
|
109
115
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
110
|
-
checksum = "
|
116
|
+
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
|
111
117
|
dependencies = [
|
112
118
|
"crossbeam-epoch",
|
113
119
|
"crossbeam-utils",
|
@@ -124,9 +130,9 @@ dependencies = [
|
|
124
130
|
|
125
131
|
[[package]]
|
126
132
|
name = "crossbeam-utils"
|
127
|
-
version = "0.8.
|
133
|
+
version = "0.8.21"
|
128
134
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
129
|
-
checksum = "
|
135
|
+
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
|
130
136
|
|
131
137
|
[[package]]
|
132
138
|
name = "darling"
|
@@ -165,18 +171,18 @@ dependencies = [
|
|
165
171
|
|
166
172
|
[[package]]
|
167
173
|
name = "derive_builder"
|
168
|
-
version = "0.20.
|
174
|
+
version = "0.20.2"
|
169
175
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
170
|
-
checksum = "
|
176
|
+
checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947"
|
171
177
|
dependencies = [
|
172
178
|
"derive_builder_macro",
|
173
179
|
]
|
174
180
|
|
175
181
|
[[package]]
|
176
182
|
name = "derive_builder_core"
|
177
|
-
version = "0.20.
|
183
|
+
version = "0.20.2"
|
178
184
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
179
|
-
checksum = "
|
185
|
+
checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8"
|
180
186
|
dependencies = [
|
181
187
|
"darling",
|
182
188
|
"proc-macro2",
|
@@ -186,9 +192,9 @@ dependencies = [
|
|
186
192
|
|
187
193
|
[[package]]
|
188
194
|
name = "derive_builder_macro"
|
189
|
-
version = "0.20.
|
195
|
+
version = "0.20.2"
|
190
196
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
191
|
-
checksum = "
|
197
|
+
checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c"
|
192
198
|
dependencies = [
|
193
199
|
"derive_builder_core",
|
194
200
|
"syn",
|
@@ -202,9 +208,9 @@ checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
|
|
202
208
|
|
203
209
|
[[package]]
|
204
210
|
name = "encode_unicode"
|
205
|
-
version = "0.
|
211
|
+
version = "1.0.0"
|
206
212
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
207
|
-
checksum = "
|
213
|
+
checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
|
208
214
|
|
209
215
|
[[package]]
|
210
216
|
name = "esaxx-rs"
|
@@ -234,9 +240,9 @@ dependencies = [
|
|
234
240
|
|
235
241
|
[[package]]
|
236
242
|
name = "glob"
|
237
|
-
version = "0.3.
|
243
|
+
version = "0.3.2"
|
238
244
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
239
|
-
checksum = "
|
245
|
+
checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
|
240
246
|
|
241
247
|
[[package]]
|
242
248
|
name = "ident_case"
|
@@ -246,24 +252,15 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
|
|
246
252
|
|
247
253
|
[[package]]
|
248
254
|
name = "indicatif"
|
249
|
-
version = "0.17.
|
255
|
+
version = "0.17.9"
|
250
256
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
251
|
-
checksum = "
|
257
|
+
checksum = "cbf675b85ed934d3c67b5c5469701eec7db22689d0a2139d856e0925fa28b281"
|
252
258
|
dependencies = [
|
253
259
|
"console",
|
254
|
-
"instant",
|
255
260
|
"number_prefix",
|
256
261
|
"portable-atomic",
|
257
262
|
"unicode-width",
|
258
|
-
|
259
|
-
|
260
|
-
[[package]]
|
261
|
-
name = "instant"
|
262
|
-
version = "0.1.13"
|
263
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
264
|
-
checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
|
265
|
-
dependencies = [
|
266
|
-
"cfg-if",
|
263
|
+
"web-time",
|
267
264
|
]
|
268
265
|
|
269
266
|
[[package]]
|
@@ -286,9 +283,19 @@ dependencies = [
|
|
286
283
|
|
287
284
|
[[package]]
|
288
285
|
name = "itoa"
|
289
|
-
version = "1.0.
|
286
|
+
version = "1.0.14"
|
290
287
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
291
|
-
checksum = "
|
288
|
+
checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674"
|
289
|
+
|
290
|
+
[[package]]
|
291
|
+
name = "js-sys"
|
292
|
+
version = "0.3.76"
|
293
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
294
|
+
checksum = "6717b6b5b077764fb5966237269cb3c64edddde4b14ce42647430a78ced9e7b7"
|
295
|
+
dependencies = [
|
296
|
+
"once_cell",
|
297
|
+
"wasm-bindgen",
|
298
|
+
]
|
292
299
|
|
293
300
|
[[package]]
|
294
301
|
name = "lazy_static"
|
@@ -304,15 +311,15 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
|
304
311
|
|
305
312
|
[[package]]
|
306
313
|
name = "libc"
|
307
|
-
version = "0.2.
|
314
|
+
version = "0.2.169"
|
308
315
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
309
|
-
checksum = "
|
316
|
+
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
|
310
317
|
|
311
318
|
[[package]]
|
312
319
|
name = "libloading"
|
313
|
-
version = "0.8.
|
320
|
+
version = "0.8.6"
|
314
321
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
315
|
-
checksum = "
|
322
|
+
checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34"
|
316
323
|
dependencies = [
|
317
324
|
"cfg-if",
|
318
325
|
"windows-targets",
|
@@ -414,9 +421,9 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
|
|
414
421
|
|
415
422
|
[[package]]
|
416
423
|
name = "once_cell"
|
417
|
-
version = "1.
|
424
|
+
version = "1.20.2"
|
418
425
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
419
|
-
checksum = "
|
426
|
+
checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
|
420
427
|
|
421
428
|
[[package]]
|
422
429
|
name = "onig"
|
@@ -448,15 +455,15 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
|
|
448
455
|
|
449
456
|
[[package]]
|
450
457
|
name = "pkg-config"
|
451
|
-
version = "0.3.
|
458
|
+
version = "0.3.31"
|
452
459
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
453
|
-
checksum = "
|
460
|
+
checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2"
|
454
461
|
|
455
462
|
[[package]]
|
456
463
|
name = "portable-atomic"
|
457
|
-
version = "1.
|
464
|
+
version = "1.10.0"
|
458
465
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
459
|
-
checksum = "
|
466
|
+
checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6"
|
460
467
|
|
461
468
|
[[package]]
|
462
469
|
name = "ppv-lite86"
|
@@ -469,18 +476,18 @@ dependencies = [
|
|
469
476
|
|
470
477
|
[[package]]
|
471
478
|
name = "proc-macro2"
|
472
|
-
version = "1.0.
|
479
|
+
version = "1.0.92"
|
473
480
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
474
|
-
checksum = "
|
481
|
+
checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0"
|
475
482
|
dependencies = [
|
476
483
|
"unicode-ident",
|
477
484
|
]
|
478
485
|
|
479
486
|
[[package]]
|
480
487
|
name = "quote"
|
481
|
-
version = "1.0.
|
488
|
+
version = "1.0.38"
|
482
489
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
483
|
-
checksum = "
|
490
|
+
checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc"
|
484
491
|
dependencies = [
|
485
492
|
"proc-macro2",
|
486
493
|
]
|
@@ -548,18 +555,18 @@ dependencies = [
|
|
548
555
|
|
549
556
|
[[package]]
|
550
557
|
name = "rb-sys"
|
551
|
-
version = "0.9.
|
558
|
+
version = "0.9.105"
|
552
559
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
553
|
-
checksum = "
|
560
|
+
checksum = "4b3a1f3ce8e7c36d777d52fe7a99039fe4fea7c8ec355a4c4f3a17f92a14029f"
|
554
561
|
dependencies = [
|
555
562
|
"rb-sys-build",
|
556
563
|
]
|
557
564
|
|
558
565
|
[[package]]
|
559
566
|
name = "rb-sys-build"
|
560
|
-
version = "0.9.
|
567
|
+
version = "0.9.105"
|
561
568
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
562
|
-
checksum = "
|
569
|
+
checksum = "3e6b246c29c0809e1cbe60a1ba9e093da72a4676d02adc68469297d1e589bbf0"
|
563
570
|
dependencies = [
|
564
571
|
"bindgen",
|
565
572
|
"lazy_static",
|
@@ -578,9 +585,9 @@ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
|
|
578
585
|
|
579
586
|
[[package]]
|
580
587
|
name = "regex"
|
581
|
-
version = "1.
|
588
|
+
version = "1.11.1"
|
582
589
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
583
|
-
checksum = "
|
590
|
+
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
|
584
591
|
dependencies = [
|
585
592
|
"aho-corasick",
|
586
593
|
"memchr",
|
@@ -590,9 +597,9 @@ dependencies = [
|
|
590
597
|
|
591
598
|
[[package]]
|
592
599
|
name = "regex-automata"
|
593
|
-
version = "0.4.
|
600
|
+
version = "0.4.9"
|
594
601
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
595
|
-
checksum = "
|
602
|
+
checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
|
596
603
|
dependencies = [
|
597
604
|
"aho-corasick",
|
598
605
|
"memchr",
|
@@ -601,9 +608,9 @@ dependencies = [
|
|
601
608
|
|
602
609
|
[[package]]
|
603
610
|
name = "regex-syntax"
|
604
|
-
version = "0.8.
|
611
|
+
version = "0.8.5"
|
605
612
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
606
|
-
checksum = "
|
613
|
+
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
|
607
614
|
|
608
615
|
[[package]]
|
609
616
|
name = "rustc-hash"
|
@@ -625,18 +632,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
|
|
625
632
|
|
626
633
|
[[package]]
|
627
634
|
name = "serde"
|
628
|
-
version = "1.0.
|
635
|
+
version = "1.0.217"
|
629
636
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
630
|
-
checksum = "
|
637
|
+
checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70"
|
631
638
|
dependencies = [
|
632
639
|
"serde_derive",
|
633
640
|
]
|
634
641
|
|
635
642
|
[[package]]
|
636
643
|
name = "serde_derive"
|
637
|
-
version = "1.0.
|
644
|
+
version = "1.0.217"
|
638
645
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
639
|
-
checksum = "
|
646
|
+
checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0"
|
640
647
|
dependencies = [
|
641
648
|
"proc-macro2",
|
642
649
|
"quote",
|
@@ -645,9 +652,9 @@ dependencies = [
|
|
645
652
|
|
646
653
|
[[package]]
|
647
654
|
name = "serde_json"
|
648
|
-
version = "1.0.
|
655
|
+
version = "1.0.134"
|
649
656
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
650
|
-
checksum = "
|
657
|
+
checksum = "d00f4175c42ee48b15416f6193a959ba3a0d67fc699a0db9ad12df9f83991c7d"
|
651
658
|
dependencies = [
|
652
659
|
"itoa",
|
653
660
|
"memchr",
|
@@ -693,9 +700,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
|
693
700
|
|
694
701
|
[[package]]
|
695
702
|
name = "syn"
|
696
|
-
version = "2.0.
|
703
|
+
version = "2.0.93"
|
697
704
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
698
|
-
checksum = "
|
705
|
+
checksum = "9c786062daee0d6db1132800e623df74274a0a87322d8e183338e01b3d98d058"
|
699
706
|
dependencies = [
|
700
707
|
"proc-macro2",
|
701
708
|
"quote",
|
@@ -704,18 +711,18 @@ dependencies = [
|
|
704
711
|
|
705
712
|
[[package]]
|
706
713
|
name = "thiserror"
|
707
|
-
version = "1.0.
|
714
|
+
version = "1.0.69"
|
708
715
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
709
|
-
checksum = "
|
716
|
+
checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
|
710
717
|
dependencies = [
|
711
718
|
"thiserror-impl",
|
712
719
|
]
|
713
720
|
|
714
721
|
[[package]]
|
715
722
|
name = "thiserror-impl"
|
716
|
-
version = "1.0.
|
723
|
+
version = "1.0.69"
|
717
724
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
718
|
-
checksum = "
|
725
|
+
checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
|
719
726
|
dependencies = [
|
720
727
|
"proc-macro2",
|
721
728
|
"quote",
|
@@ -724,19 +731,19 @@ dependencies = [
|
|
724
731
|
|
725
732
|
[[package]]
|
726
733
|
name = "tokenizers"
|
727
|
-
version = "0.5.
|
734
|
+
version = "0.5.4"
|
728
735
|
dependencies = [
|
729
736
|
"magnus",
|
730
737
|
"onig",
|
731
738
|
"serde",
|
732
|
-
"tokenizers 0.
|
739
|
+
"tokenizers 0.21.0",
|
733
740
|
]
|
734
741
|
|
735
742
|
[[package]]
|
736
743
|
name = "tokenizers"
|
737
|
-
version = "0.
|
744
|
+
version = "0.21.0"
|
738
745
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
739
|
-
checksum = "
|
746
|
+
checksum = "9ecededfed68a69bc657e486510089e255e53c3d38cc7d4d59c8742668ca2cae"
|
740
747
|
dependencies = [
|
741
748
|
"aho-corasick",
|
742
749
|
"derive_builder",
|
@@ -766,9 +773,9 @@ dependencies = [
|
|
766
773
|
|
767
774
|
[[package]]
|
768
775
|
name = "unicode-ident"
|
769
|
-
version = "1.0.
|
776
|
+
version = "1.0.14"
|
770
777
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
771
|
-
checksum = "
|
778
|
+
checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
|
772
779
|
|
773
780
|
[[package]]
|
774
781
|
name = "unicode-normalization-alignments"
|
@@ -781,15 +788,15 @@ dependencies = [
|
|
781
788
|
|
782
789
|
[[package]]
|
783
790
|
name = "unicode-segmentation"
|
784
|
-
version = "1.
|
791
|
+
version = "1.12.0"
|
785
792
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
786
|
-
checksum = "
|
793
|
+
checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
|
787
794
|
|
788
795
|
[[package]]
|
789
796
|
name = "unicode-width"
|
790
|
-
version = "0.
|
797
|
+
version = "0.2.0"
|
791
798
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
792
|
-
checksum = "
|
799
|
+
checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd"
|
793
800
|
|
794
801
|
[[package]]
|
795
802
|
name = "unicode_categories"
|
@@ -803,11 +810,75 @@ version = "0.11.0+wasi-snapshot-preview1"
|
|
803
810
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
804
811
|
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
805
812
|
|
813
|
+
[[package]]
|
814
|
+
name = "wasm-bindgen"
|
815
|
+
version = "0.2.99"
|
816
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
817
|
+
checksum = "a474f6281d1d70c17ae7aa6a613c87fce69a127e2624002df63dcb39d6cf6396"
|
818
|
+
dependencies = [
|
819
|
+
"cfg-if",
|
820
|
+
"once_cell",
|
821
|
+
"wasm-bindgen-macro",
|
822
|
+
]
|
823
|
+
|
824
|
+
[[package]]
|
825
|
+
name = "wasm-bindgen-backend"
|
826
|
+
version = "0.2.99"
|
827
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
828
|
+
checksum = "5f89bb38646b4f81674e8f5c3fb81b562be1fd936d84320f3264486418519c79"
|
829
|
+
dependencies = [
|
830
|
+
"bumpalo",
|
831
|
+
"log",
|
832
|
+
"proc-macro2",
|
833
|
+
"quote",
|
834
|
+
"syn",
|
835
|
+
"wasm-bindgen-shared",
|
836
|
+
]
|
837
|
+
|
838
|
+
[[package]]
|
839
|
+
name = "wasm-bindgen-macro"
|
840
|
+
version = "0.2.99"
|
841
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
842
|
+
checksum = "2cc6181fd9a7492eef6fef1f33961e3695e4579b9872a6f7c83aee556666d4fe"
|
843
|
+
dependencies = [
|
844
|
+
"quote",
|
845
|
+
"wasm-bindgen-macro-support",
|
846
|
+
]
|
847
|
+
|
848
|
+
[[package]]
|
849
|
+
name = "wasm-bindgen-macro-support"
|
850
|
+
version = "0.2.99"
|
851
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
852
|
+
checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2"
|
853
|
+
dependencies = [
|
854
|
+
"proc-macro2",
|
855
|
+
"quote",
|
856
|
+
"syn",
|
857
|
+
"wasm-bindgen-backend",
|
858
|
+
"wasm-bindgen-shared",
|
859
|
+
]
|
860
|
+
|
861
|
+
[[package]]
|
862
|
+
name = "wasm-bindgen-shared"
|
863
|
+
version = "0.2.99"
|
864
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
865
|
+
checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6"
|
866
|
+
|
867
|
+
[[package]]
|
868
|
+
name = "web-time"
|
869
|
+
version = "1.1.0"
|
870
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
871
|
+
checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
|
872
|
+
dependencies = [
|
873
|
+
"js-sys",
|
874
|
+
"wasm-bindgen",
|
875
|
+
]
|
876
|
+
|
806
877
|
[[package]]
|
807
878
|
name = "windows-sys"
|
808
|
-
version = "0.
|
879
|
+
version = "0.59.0"
|
809
880
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
810
|
-
checksum = "
|
881
|
+
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
|
811
882
|
dependencies = [
|
812
883
|
"windows-targets",
|
813
884
|
]
|
data/LICENSE-THIRD-PARTY.txt
CHANGED
@@ -10,7 +10,7 @@ base64 v0.13.1
|
|
10
10
|
https://github.com/marshallpierce/rust-base64
|
11
11
|
MIT/Apache-2.0
|
12
12
|
|
13
|
-
bindgen v0.69.
|
13
|
+
bindgen v0.69.5
|
14
14
|
https://rust-lang.github.io/rust-bindgen/
|
15
15
|
BSD-3-Clause
|
16
16
|
|
@@ -26,7 +26,7 @@ byteorder v1.5.0
|
|
26
26
|
https://github.com/BurntSushi/byteorder
|
27
27
|
Unlicense OR MIT
|
28
28
|
|
29
|
-
cc v1.
|
29
|
+
cc v1.2.6
|
30
30
|
https://github.com/rust-lang/cc-rs
|
31
31
|
MIT OR Apache-2.0
|
32
32
|
|
@@ -42,11 +42,11 @@ clang-sys v1.8.1
|
|
42
42
|
https://github.com/KyleMayes/clang-sys
|
43
43
|
Apache-2.0
|
44
44
|
|
45
|
-
console v0.15.
|
45
|
+
console v0.15.10
|
46
46
|
https://github.com/console-rs/console
|
47
47
|
MIT
|
48
48
|
|
49
|
-
crossbeam-deque v0.8.
|
49
|
+
crossbeam-deque v0.8.6
|
50
50
|
https://github.com/crossbeam-rs/crossbeam/tree/master/crossbeam-deque
|
51
51
|
MIT OR Apache-2.0
|
52
52
|
|
@@ -54,7 +54,7 @@ crossbeam-epoch v0.9.18
|
|
54
54
|
https://github.com/crossbeam-rs/crossbeam/tree/master/crossbeam-epoch
|
55
55
|
MIT OR Apache-2.0
|
56
56
|
|
57
|
-
crossbeam-utils v0.8.
|
57
|
+
crossbeam-utils v0.8.21
|
58
58
|
https://github.com/crossbeam-rs/crossbeam/tree/master/crossbeam-utils
|
59
59
|
MIT OR Apache-2.0
|
60
60
|
|
@@ -70,15 +70,15 @@ darling_macro v0.20.10
|
|
70
70
|
https://github.com/TedDriggs/darling
|
71
71
|
MIT
|
72
72
|
|
73
|
-
derive_builder v0.20.
|
73
|
+
derive_builder v0.20.2
|
74
74
|
https://github.com/colin-kiegel/rust-derive-builder
|
75
75
|
MIT OR Apache-2.0
|
76
76
|
|
77
|
-
derive_builder_core v0.20.
|
77
|
+
derive_builder_core v0.20.2
|
78
78
|
https://github.com/colin-kiegel/rust-derive-builder
|
79
79
|
MIT OR Apache-2.0
|
80
80
|
|
81
|
-
derive_builder_macro v0.20.
|
81
|
+
derive_builder_macro v0.20.2
|
82
82
|
https://github.com/colin-kiegel/rust-derive-builder
|
83
83
|
MIT OR Apache-2.0
|
84
84
|
|
@@ -98,7 +98,7 @@ getrandom v0.2.15
|
|
98
98
|
https://github.com/rust-random/getrandom
|
99
99
|
MIT OR Apache-2.0
|
100
100
|
|
101
|
-
glob v0.3.
|
101
|
+
glob v0.3.2
|
102
102
|
https://github.com/rust-lang/glob
|
103
103
|
MIT OR Apache-2.0
|
104
104
|
|
@@ -106,7 +106,7 @@ ident_case v1.0.1
|
|
106
106
|
https://github.com/TedDriggs/ident_case
|
107
107
|
MIT/Apache-2.0
|
108
108
|
|
109
|
-
indicatif v0.17.
|
109
|
+
indicatif v0.17.9
|
110
110
|
https://github.com/console-rs/indicatif
|
111
111
|
MIT
|
112
112
|
|
@@ -118,7 +118,7 @@ itertools v0.12.1
|
|
118
118
|
https://github.com/rust-itertools/itertools
|
119
119
|
MIT OR Apache-2.0
|
120
120
|
|
121
|
-
itoa v1.0.
|
121
|
+
itoa v1.0.14
|
122
122
|
https://github.com/dtolnay/itoa
|
123
123
|
MIT OR Apache-2.0
|
124
124
|
|
@@ -130,11 +130,11 @@ lazycell v1.3.0
|
|
130
130
|
https://github.com/indiv0/lazycell
|
131
131
|
MIT/Apache-2.0
|
132
132
|
|
133
|
-
libc v0.2.
|
133
|
+
libc v0.2.169
|
134
134
|
https://github.com/rust-lang/libc
|
135
135
|
MIT OR Apache-2.0
|
136
136
|
|
137
|
-
libloading v0.8.
|
137
|
+
libloading v0.8.6
|
138
138
|
https://github.com/nagisa/rust_libloading/
|
139
139
|
ISC
|
140
140
|
|
@@ -182,7 +182,7 @@ number_prefix v0.4.0
|
|
182
182
|
https://github.com/ogham/rust-number-prefix
|
183
183
|
MIT
|
184
184
|
|
185
|
-
once_cell v1.
|
185
|
+
once_cell v1.20.2
|
186
186
|
https://github.com/matklad/once_cell
|
187
187
|
MIT OR Apache-2.0
|
188
188
|
|
@@ -198,11 +198,11 @@ paste v1.0.15
|
|
198
198
|
https://github.com/dtolnay/paste
|
199
199
|
MIT OR Apache-2.0
|
200
200
|
|
201
|
-
pkg-config v0.3.
|
201
|
+
pkg-config v0.3.31
|
202
202
|
https://github.com/rust-lang/pkg-config-rs
|
203
203
|
MIT OR Apache-2.0
|
204
204
|
|
205
|
-
portable-atomic v1.
|
205
|
+
portable-atomic v1.10.0
|
206
206
|
https://github.com/taiki-e/portable-atomic
|
207
207
|
Apache-2.0 OR MIT
|
208
208
|
|
@@ -210,11 +210,11 @@ ppv-lite86 v0.2.20
|
|
210
210
|
https://github.com/cryptocorrosion/cryptocorrosion
|
211
211
|
MIT/Apache-2.0
|
212
212
|
|
213
|
-
proc-macro2 v1.0.
|
213
|
+
proc-macro2 v1.0.92
|
214
214
|
https://github.com/dtolnay/proc-macro2
|
215
215
|
MIT OR Apache-2.0
|
216
216
|
|
217
|
-
quote v1.0.
|
217
|
+
quote v1.0.38
|
218
218
|
https://github.com/dtolnay/quote
|
219
219
|
MIT OR Apache-2.0
|
220
220
|
|
@@ -242,11 +242,11 @@ rayon-core v1.12.1
|
|
242
242
|
https://github.com/rayon-rs/rayon
|
243
243
|
MIT OR Apache-2.0
|
244
244
|
|
245
|
-
rb-sys v0.9.
|
245
|
+
rb-sys v0.9.105
|
246
246
|
https://github.com/oxidize-rb/rb-sys
|
247
247
|
MIT OR Apache-2.0
|
248
248
|
|
249
|
-
rb-sys-build v0.9.
|
249
|
+
rb-sys-build v0.9.105
|
250
250
|
https://github.com/oxidize-rb/rb-sys
|
251
251
|
MIT OR Apache-2.0
|
252
252
|
|
@@ -254,15 +254,15 @@ rb-sys-env v0.1.2
|
|
254
254
|
https://github.com/oxidize-rb/rb-sys
|
255
255
|
MIT OR Apache-2.0
|
256
256
|
|
257
|
-
regex v1.
|
257
|
+
regex v1.11.1
|
258
258
|
https://github.com/rust-lang/regex
|
259
259
|
MIT OR Apache-2.0
|
260
260
|
|
261
|
-
regex-automata v0.4.
|
261
|
+
regex-automata v0.4.9
|
262
262
|
https://github.com/rust-lang/regex/tree/master/regex-automata
|
263
263
|
MIT OR Apache-2.0
|
264
264
|
|
265
|
-
regex-syntax v0.8.
|
265
|
+
regex-syntax v0.8.5
|
266
266
|
https://github.com/rust-lang/regex/tree/master/regex-syntax
|
267
267
|
MIT OR Apache-2.0
|
268
268
|
|
@@ -278,15 +278,15 @@ seq-macro v0.3.5
|
|
278
278
|
https://github.com/dtolnay/seq-macro
|
279
279
|
MIT OR Apache-2.0
|
280
280
|
|
281
|
-
serde v1.0.
|
281
|
+
serde v1.0.217
|
282
282
|
https://serde.rs
|
283
283
|
MIT OR Apache-2.0
|
284
284
|
|
285
|
-
serde_derive v1.0.
|
285
|
+
serde_derive v1.0.217
|
286
286
|
https://serde.rs
|
287
287
|
MIT OR Apache-2.0
|
288
288
|
|
289
|
-
serde_json v1.0.
|
289
|
+
serde_json v1.0.134
|
290
290
|
https://github.com/serde-rs/json
|
291
291
|
MIT OR Apache-2.0
|
292
292
|
|
@@ -310,35 +310,35 @@ strsim v0.11.1
|
|
310
310
|
https://github.com/rapidfuzz/strsim-rs
|
311
311
|
MIT
|
312
312
|
|
313
|
-
syn v2.0.
|
313
|
+
syn v2.0.93
|
314
314
|
https://github.com/dtolnay/syn
|
315
315
|
MIT OR Apache-2.0
|
316
316
|
|
317
|
-
thiserror v1.0.
|
317
|
+
thiserror v1.0.69
|
318
318
|
https://github.com/dtolnay/thiserror
|
319
319
|
MIT OR Apache-2.0
|
320
320
|
|
321
|
-
thiserror-impl v1.0.
|
321
|
+
thiserror-impl v1.0.69
|
322
322
|
https://github.com/dtolnay/thiserror
|
323
323
|
MIT OR Apache-2.0
|
324
324
|
|
325
|
-
tokenizers v0.
|
325
|
+
tokenizers v0.21.0
|
326
326
|
https://github.com/huggingface/tokenizers
|
327
327
|
Apache-2.0
|
328
328
|
|
329
|
-
unicode-ident v1.0.
|
329
|
+
unicode-ident v1.0.14
|
330
330
|
https://github.com/dtolnay/unicode-ident
|
331
|
-
(MIT OR Apache-2.0) AND Unicode-
|
331
|
+
(MIT OR Apache-2.0) AND Unicode-3.0
|
332
332
|
|
333
333
|
unicode-normalization-alignments v0.1.12
|
334
334
|
https://github.com/n1t0/unicode-normalization
|
335
335
|
MIT/Apache-2.0
|
336
336
|
|
337
|
-
unicode-segmentation v1.
|
337
|
+
unicode-segmentation v1.12.0
|
338
338
|
https://github.com/unicode-rs/unicode-segmentation
|
339
|
-
MIT
|
339
|
+
MIT OR Apache-2.0
|
340
340
|
|
341
|
-
unicode-width v0.
|
341
|
+
unicode-width v0.2.0
|
342
342
|
https://github.com/unicode-rs/unicode-width
|
343
343
|
MIT OR Apache-2.0
|
344
344
|
|
@@ -15374,52 +15374,45 @@ DEALINGS IN THE SOFTWARE.
|
|
15374
15374
|
unicode-ident LICENSE-UNICODE
|
15375
15375
|
================================================================================
|
15376
15376
|
|
15377
|
-
UNICODE
|
15378
|
-
|
15379
|
-
See Terms of Use <https://www.unicode.org/copyright.html>
|
15380
|
-
for definitions of Unicode Inc.’s Data Files and Software.
|
15381
|
-
|
15382
|
-
NOTICE TO USER: Carefully read the following legal agreement.
|
15383
|
-
BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
|
15384
|
-
DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
|
15385
|
-
YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
|
15386
|
-
TERMS AND CONDITIONS OF THIS AGREEMENT.
|
15387
|
-
IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
|
15388
|
-
THE DATA FILES OR SOFTWARE.
|
15377
|
+
UNICODE LICENSE V3
|
15389
15378
|
|
15390
15379
|
COPYRIGHT AND PERMISSION NOTICE
|
15391
15380
|
|
15392
|
-
Copyright © 1991-
|
15393
|
-
|
15394
|
-
|
15395
|
-
|
15396
|
-
|
15397
|
-
|
15398
|
-
|
15399
|
-
|
15400
|
-
|
15401
|
-
|
15402
|
-
|
15403
|
-
|
15404
|
-
|
15405
|
-
|
15406
|
-
|
15407
|
-
|
15408
|
-
|
15409
|
-
|
15410
|
-
|
15411
|
-
|
15412
|
-
|
15413
|
-
|
15414
|
-
|
15415
|
-
|
15416
|
-
|
15417
|
-
|
15418
|
-
|
15419
|
-
|
15420
|
-
|
15421
|
-
|
15422
|
-
|
15381
|
+
Copyright © 1991-2023 Unicode, Inc.
|
15382
|
+
|
15383
|
+
NOTICE TO USER: Carefully read the following legal agreement. BY
|
15384
|
+
DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
|
15385
|
+
SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
|
15386
|
+
TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT
|
15387
|
+
DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.
|
15388
|
+
|
15389
|
+
Permission is hereby granted, free of charge, to any person obtaining a
|
15390
|
+
copy of data files and any associated documentation (the "Data Files") or
|
15391
|
+
software and any associated documentation (the "Software") to deal in the
|
15392
|
+
Data Files or Software without restriction, including without limitation
|
15393
|
+
the rights to use, copy, modify, merge, publish, distribute, and/or sell
|
15394
|
+
copies of the Data Files or Software, and to permit persons to whom the
|
15395
|
+
Data Files or Software are furnished to do so, provided that either (a)
|
15396
|
+
this copyright and permission notice appear with all copies of the Data
|
15397
|
+
Files or Software, or (b) this copyright and permission notice appear in
|
15398
|
+
associated Documentation.
|
15399
|
+
|
15400
|
+
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
|
15401
|
+
KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
15402
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
|
15403
|
+
THIRD PARTY RIGHTS.
|
15404
|
+
|
15405
|
+
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
|
15406
|
+
BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
|
15407
|
+
OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
15408
|
+
WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
|
15409
|
+
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA
|
15410
|
+
FILES OR SOFTWARE.
|
15411
|
+
|
15412
|
+
Except as contained in this notice, the name of a copyright holder shall
|
15413
|
+
not be used in advertising or otherwise to promote the sale, use or other
|
15414
|
+
dealings in these Data Files or Software without prior written
|
15415
|
+
authorization of the copyright holder.
|
15423
15416
|
|
15424
15417
|
================================================================================
|
15425
15418
|
unicode-normalization-alignments LICENSE-APACHE
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/tokenizers/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tokenizers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.4
|
5
5
|
platform: x86_64-linux
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-12-29 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|
@@ -26,6 +26,7 @@ files:
|
|
26
26
|
- lib/tokenizers/3.1/tokenizers.so
|
27
27
|
- lib/tokenizers/3.2/tokenizers.so
|
28
28
|
- lib/tokenizers/3.3/tokenizers.so
|
29
|
+
- lib/tokenizers/3.4/tokenizers.so
|
29
30
|
- lib/tokenizers/added_token.rb
|
30
31
|
- lib/tokenizers/char_bpe_tokenizer.rb
|
31
32
|
- lib/tokenizers/decoders/bpe_decoder.rb
|
@@ -71,14 +72,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
71
72
|
version: '3.1'
|
72
73
|
- - "<"
|
73
74
|
- !ruby/object:Gem::Version
|
74
|
-
version: 3.
|
75
|
+
version: 3.5.dev
|
75
76
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
76
77
|
requirements:
|
77
78
|
- - ">="
|
78
79
|
- !ruby/object:Gem::Version
|
79
80
|
version: '0'
|
80
81
|
requirements: []
|
81
|
-
rubygems_version: 3.
|
82
|
+
rubygems_version: 3.5.23
|
82
83
|
signing_key:
|
83
84
|
specification_version: 4
|
84
85
|
summary: Fast state-of-the-art tokenizers for Ruby
|