tokenizers 0.4.4-arm64-darwin → 0.5.0-arm64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/Cargo.lock +55 -72
- data/LICENSE-THIRD-PARTY.txt +107 -647
- data/lib/tokenizers/3.1/tokenizers.bundle +0 -0
- data/lib/tokenizers/3.2/tokenizers.bundle +0 -0
- data/lib/tokenizers/3.3/tokenizers.bundle +0 -0
- data/lib/tokenizers/decoders/metaspace.rb +2 -2
- data/lib/tokenizers/from_pretrained.rb +1 -1
- data/lib/tokenizers/pre_tokenizers/metaspace.rb +2 -2
- data/lib/tokenizers/version.rb +1 -1
- metadata +3 -4
- data/lib/tokenizers/3.0/tokenizers.bundle +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 55411471aeb09dbf1b750230debf729e6a4c4c10bc7f0a4a9c3c928485521b5c
|
4
|
+
data.tar.gz: b2a217139911aa0a2521b7713f41db74797daa550b1bed11c0a31941114f34eb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c4874409d41935efe0df377c809879299bc38eecbf2fc15bf699108cead7eb48411cc9500ec40fc5cc0e59b3b4754cf27fdd4b4143b22f71f7a10621427a5bfb
|
7
|
+
data.tar.gz: 14d308316afb82ad511f36b8d8c1b60e0043d7406fc0c1c31999119c2f67d86575bf11964a8bc4a6d4bf23899fe4101cb75a726dfa4aa64f33ec8f0247d44dc9
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
## 0.5.0 (2024-05-21)
|
2
|
+
|
3
|
+
- Updated Tokenizers to 0.19.1
|
4
|
+
- Replaced `add_prefix_space` with `prepend_scheme` and `split` options for `Metaspace` decoder and pre-tokenizer
|
5
|
+
- Dropped support for Ruby < 3.1
|
6
|
+
|
1
7
|
## 0.4.4 (2024-02-27)
|
2
8
|
|
3
9
|
- Updated Tokenizers to 0.15.2
|
data/Cargo.lock
CHANGED
@@ -40,7 +40,7 @@ dependencies = [
|
|
40
40
|
"regex",
|
41
41
|
"rustc-hash",
|
42
42
|
"shlex",
|
43
|
-
"syn
|
43
|
+
"syn",
|
44
44
|
]
|
45
45
|
|
46
46
|
[[package]]
|
@@ -135,9 +135,9 @@ dependencies = [
|
|
135
135
|
|
136
136
|
[[package]]
|
137
137
|
name = "darling"
|
138
|
-
version = "0.
|
138
|
+
version = "0.20.8"
|
139
139
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
140
|
-
checksum = "
|
140
|
+
checksum = "54e36fcd13ed84ffdfda6f5be89b31287cbb80c439841fe69e04841435464391"
|
141
141
|
dependencies = [
|
142
142
|
"darling_core",
|
143
143
|
"darling_macro",
|
@@ -145,58 +145,58 @@ dependencies = [
|
|
145
145
|
|
146
146
|
[[package]]
|
147
147
|
name = "darling_core"
|
148
|
-
version = "0.
|
148
|
+
version = "0.20.8"
|
149
149
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
150
|
-
checksum = "
|
150
|
+
checksum = "9c2cf1c23a687a1feeb728783b993c4e1ad83d99f351801977dd809b48d0a70f"
|
151
151
|
dependencies = [
|
152
152
|
"fnv",
|
153
153
|
"ident_case",
|
154
154
|
"proc-macro2",
|
155
155
|
"quote",
|
156
156
|
"strsim",
|
157
|
-
"syn
|
157
|
+
"syn",
|
158
158
|
]
|
159
159
|
|
160
160
|
[[package]]
|
161
161
|
name = "darling_macro"
|
162
|
-
version = "0.
|
162
|
+
version = "0.20.8"
|
163
163
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
164
|
-
checksum = "
|
164
|
+
checksum = "a668eda54683121533a393014d8692171709ff57a7d61f187b6e782719f8933f"
|
165
165
|
dependencies = [
|
166
166
|
"darling_core",
|
167
167
|
"quote",
|
168
|
-
"syn
|
168
|
+
"syn",
|
169
169
|
]
|
170
170
|
|
171
171
|
[[package]]
|
172
172
|
name = "derive_builder"
|
173
|
-
version = "0.
|
173
|
+
version = "0.20.0"
|
174
174
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
175
|
-
checksum = "
|
175
|
+
checksum = "0350b5cb0331628a5916d6c5c0b72e97393b8b6b03b47a9284f4e7f5a405ffd7"
|
176
176
|
dependencies = [
|
177
177
|
"derive_builder_macro",
|
178
178
|
]
|
179
179
|
|
180
180
|
[[package]]
|
181
181
|
name = "derive_builder_core"
|
182
|
-
version = "0.
|
182
|
+
version = "0.20.0"
|
183
183
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
184
|
-
checksum = "
|
184
|
+
checksum = "d48cda787f839151732d396ac69e3473923d54312c070ee21e9effcaa8ca0b1d"
|
185
185
|
dependencies = [
|
186
186
|
"darling",
|
187
187
|
"proc-macro2",
|
188
188
|
"quote",
|
189
|
-
"syn
|
189
|
+
"syn",
|
190
190
|
]
|
191
191
|
|
192
192
|
[[package]]
|
193
193
|
name = "derive_builder_macro"
|
194
|
-
version = "0.
|
194
|
+
version = "0.20.0"
|
195
195
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
196
|
-
checksum = "
|
196
|
+
checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b"
|
197
197
|
dependencies = [
|
198
198
|
"derive_builder_core",
|
199
|
-
"syn
|
199
|
+
"syn",
|
200
200
|
]
|
201
201
|
|
202
202
|
[[package]]
|
@@ -350,9 +350,9 @@ checksum = "b8dd856d451cc0da70e2ef2ce95a18e39a93b7558bedf10201ad28503f918568"
|
|
350
350
|
|
351
351
|
[[package]]
|
352
352
|
name = "magnus"
|
353
|
-
version = "0.6.
|
353
|
+
version = "0.6.4"
|
354
354
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
355
|
-
checksum = "
|
355
|
+
checksum = "b1597ef40aa8c36be098249e82c9a20cf7199278ac1c1a1a995eeead6a184479"
|
356
356
|
dependencies = [
|
357
357
|
"magnus-macros",
|
358
358
|
"rb-sys",
|
@@ -368,7 +368,7 @@ checksum = "5968c820e2960565f647819f5928a42d6e874551cab9d88d75e3e0660d7f71e3"
|
|
368
368
|
dependencies = [
|
369
369
|
"proc-macro2",
|
370
370
|
"quote",
|
371
|
-
"syn
|
371
|
+
"syn",
|
372
372
|
]
|
373
373
|
|
374
374
|
[[package]]
|
@@ -394,9 +394,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
|
394
394
|
|
395
395
|
[[package]]
|
396
396
|
name = "monostate"
|
397
|
-
version = "0.1.
|
397
|
+
version = "0.1.12"
|
398
398
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
399
|
-
checksum = "
|
399
|
+
checksum = "a20fffcd8ca4c69d31e036a71abc400147b41f90895df4edcb36497a1f8af8bf"
|
400
400
|
dependencies = [
|
401
401
|
"monostate-impl",
|
402
402
|
"serde",
|
@@ -404,13 +404,13 @@ dependencies = [
|
|
404
404
|
|
405
405
|
[[package]]
|
406
406
|
name = "monostate-impl"
|
407
|
-
version = "0.1.
|
407
|
+
version = "0.1.12"
|
408
408
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
409
|
-
checksum = "
|
409
|
+
checksum = "bf307cbbbd777a9c10cec88ddafee572b3484caad5cce0c9236523c3803105a6"
|
410
410
|
dependencies = [
|
411
411
|
"proc-macro2",
|
412
412
|
"quote",
|
413
|
-
"syn
|
413
|
+
"syn",
|
414
414
|
]
|
415
415
|
|
416
416
|
[[package]]
|
@@ -489,18 +489,18 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
|
|
489
489
|
|
490
490
|
[[package]]
|
491
491
|
name = "proc-macro2"
|
492
|
-
version = "1.0.
|
492
|
+
version = "1.0.81"
|
493
493
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
494
|
-
checksum = "
|
494
|
+
checksum = "3d1597b0c024618f09a9c3b8655b7e430397a36d23fdafec26d6965e9eec3eba"
|
495
495
|
dependencies = [
|
496
496
|
"unicode-ident",
|
497
497
|
]
|
498
498
|
|
499
499
|
[[package]]
|
500
500
|
name = "quote"
|
501
|
-
version = "1.0.
|
501
|
+
version = "1.0.36"
|
502
502
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
503
|
-
checksum = "
|
503
|
+
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
|
504
504
|
dependencies = [
|
505
505
|
"proc-macro2",
|
506
506
|
]
|
@@ -537,9 +537,9 @@ dependencies = [
|
|
537
537
|
|
538
538
|
[[package]]
|
539
539
|
name = "rayon"
|
540
|
-
version = "1.
|
540
|
+
version = "1.10.0"
|
541
541
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
542
|
-
checksum = "
|
542
|
+
checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
|
543
543
|
dependencies = [
|
544
544
|
"either",
|
545
545
|
"rayon-core",
|
@@ -558,9 +558,9 @@ dependencies = [
|
|
558
558
|
|
559
559
|
[[package]]
|
560
560
|
name = "rayon-core"
|
561
|
-
version = "1.12.
|
561
|
+
version = "1.12.1"
|
562
562
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
563
|
-
checksum = "
|
563
|
+
checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
|
564
564
|
dependencies = [
|
565
565
|
"crossbeam-deque",
|
566
566
|
"crossbeam-utils",
|
@@ -568,18 +568,18 @@ dependencies = [
|
|
568
568
|
|
569
569
|
[[package]]
|
570
570
|
name = "rb-sys"
|
571
|
-
version = "0.9.
|
571
|
+
version = "0.9.97"
|
572
572
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
573
|
-
checksum = "
|
573
|
+
checksum = "47d30bcad206b51f2f66121190ca678dce1fdf3a2eae0ac5d838d1818b19bdf5"
|
574
574
|
dependencies = [
|
575
575
|
"rb-sys-build",
|
576
576
|
]
|
577
577
|
|
578
578
|
[[package]]
|
579
579
|
name = "rb-sys-build"
|
580
|
-
version = "0.9.
|
580
|
+
version = "0.9.97"
|
581
581
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
582
|
-
checksum = "
|
582
|
+
checksum = "3cbd92f281615f3c2dcb9dcb0f0576624752afbf9a7f99173b37c4b55b62dd8a"
|
583
583
|
dependencies = [
|
584
584
|
"bindgen",
|
585
585
|
"lazy_static",
|
@@ -587,7 +587,7 @@ dependencies = [
|
|
587
587
|
"quote",
|
588
588
|
"regex",
|
589
589
|
"shell-words",
|
590
|
-
"syn
|
590
|
+
"syn",
|
591
591
|
]
|
592
592
|
|
593
593
|
[[package]]
|
@@ -598,33 +598,27 @@ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
|
|
598
598
|
|
599
599
|
[[package]]
|
600
600
|
name = "regex"
|
601
|
-
version = "1.
|
601
|
+
version = "1.10.4"
|
602
602
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
603
|
-
checksum = "
|
603
|
+
checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c"
|
604
604
|
dependencies = [
|
605
605
|
"aho-corasick",
|
606
606
|
"memchr",
|
607
607
|
"regex-automata",
|
608
|
-
"regex-syntax
|
608
|
+
"regex-syntax",
|
609
609
|
]
|
610
610
|
|
611
611
|
[[package]]
|
612
612
|
name = "regex-automata"
|
613
|
-
version = "0.
|
613
|
+
version = "0.4.6"
|
614
614
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
615
|
-
checksum = "
|
615
|
+
checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
|
616
616
|
dependencies = [
|
617
617
|
"aho-corasick",
|
618
618
|
"memchr",
|
619
|
-
"regex-syntax
|
619
|
+
"regex-syntax",
|
620
620
|
]
|
621
621
|
|
622
|
-
[[package]]
|
623
|
-
name = "regex-syntax"
|
624
|
-
version = "0.7.5"
|
625
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
626
|
-
checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
|
627
|
-
|
628
622
|
[[package]]
|
629
623
|
name = "regex-syntax"
|
630
624
|
version = "0.8.2"
|
@@ -672,7 +666,7 @@ checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2"
|
|
672
666
|
dependencies = [
|
673
667
|
"proc-macro2",
|
674
668
|
"quote",
|
675
|
-
"syn
|
669
|
+
"syn",
|
676
670
|
]
|
677
671
|
|
678
672
|
[[package]]
|
@@ -724,20 +718,9 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
|
|
724
718
|
|
725
719
|
[[package]]
|
726
720
|
name = "syn"
|
727
|
-
version = "
|
728
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
729
|
-
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
|
730
|
-
dependencies = [
|
731
|
-
"proc-macro2",
|
732
|
-
"quote",
|
733
|
-
"unicode-ident",
|
734
|
-
]
|
735
|
-
|
736
|
-
[[package]]
|
737
|
-
name = "syn"
|
738
|
-
version = "2.0.38"
|
721
|
+
version = "2.0.59"
|
739
722
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
740
|
-
checksum = "
|
723
|
+
checksum = "4a6531ffc7b071655e4ce2e04bd464c4830bb585a61cabb96cf808f05172615a"
|
741
724
|
dependencies = [
|
742
725
|
"proc-macro2",
|
743
726
|
"quote",
|
@@ -761,24 +744,24 @@ checksum = "10712f02019e9288794769fba95cd6847df9874d49d871d062172f9dd41bc4cc"
|
|
761
744
|
dependencies = [
|
762
745
|
"proc-macro2",
|
763
746
|
"quote",
|
764
|
-
"syn
|
747
|
+
"syn",
|
765
748
|
]
|
766
749
|
|
767
750
|
[[package]]
|
768
751
|
name = "tokenizers"
|
769
|
-
version = "0.
|
752
|
+
version = "0.5.0"
|
770
753
|
dependencies = [
|
771
754
|
"magnus",
|
772
755
|
"onig",
|
773
756
|
"serde",
|
774
|
-
"tokenizers 0.
|
757
|
+
"tokenizers 0.19.1",
|
775
758
|
]
|
776
759
|
|
777
760
|
[[package]]
|
778
761
|
name = "tokenizers"
|
779
|
-
version = "0.
|
762
|
+
version = "0.19.1"
|
780
763
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
781
|
-
checksum = "
|
764
|
+
checksum = "e500fad1dd3af3d626327e6a3fe5050e664a6eaa4708b8ca92f1794aaf73e6fd"
|
782
765
|
dependencies = [
|
783
766
|
"aho-corasick",
|
784
767
|
"derive_builder",
|
@@ -796,7 +779,7 @@ dependencies = [
|
|
796
779
|
"rayon",
|
797
780
|
"rayon-cond",
|
798
781
|
"regex",
|
799
|
-
"regex-syntax
|
782
|
+
"regex-syntax",
|
800
783
|
"serde",
|
801
784
|
"serde_json",
|
802
785
|
"spm_precompiled",
|
@@ -823,9 +806,9 @@ dependencies = [
|
|
823
806
|
|
824
807
|
[[package]]
|
825
808
|
name = "unicode-segmentation"
|
826
|
-
version = "1.
|
809
|
+
version = "1.11.0"
|
827
810
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
828
|
-
checksum = "
|
811
|
+
checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
|
829
812
|
|
830
813
|
[[package]]
|
831
814
|
name = "unicode-width"
|