tokenizers 0.4.4-x86_64-linux → 0.5.0-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/Cargo.lock +55 -72
- data/LICENSE-THIRD-PARTY.txt +107 -647
- data/lib/tokenizers/3.1/tokenizers.so +0 -0
- data/lib/tokenizers/3.2/tokenizers.so +0 -0
- data/lib/tokenizers/3.3/tokenizers.so +0 -0
- data/lib/tokenizers/decoders/metaspace.rb +2 -2
- data/lib/tokenizers/from_pretrained.rb +1 -1
- data/lib/tokenizers/pre_tokenizers/metaspace.rb +2 -2
- data/lib/tokenizers/version.rb +1 -1
- metadata +3 -4
- data/lib/tokenizers/3.0/tokenizers.so +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 73c92c500a274ba952cfd529fc69f96613e19f2dc2acb0e608eba84e6477442f
|
4
|
+
data.tar.gz: da18d3aa30c01ae75b8b50c1fa675fd6a32acabe3f14c5a30802b6078c9801e4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 544f43731aba4981487a1e14a13b76dd627ec60e6c68f808d47b4b721a0153fe427402e845e7595ca729ab877008b6a452b32b95a851bfb1236b273c3e64d10b
|
7
|
+
data.tar.gz: 38ca9c1e9dd511f0a875de4ba83df3015186c9e609d740525da9ee7660db70e90e1f539c20e38c43ae44fbe7df09e8ffc0781de8e7826fbe0f7a8f5c86bd6125
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
## 0.5.0 (2024-05-21)
|
2
|
+
|
3
|
+
- Updated Tokenizers to 0.19.1
|
4
|
+
- Replaced `add_prefix_space` with `prepend_scheme` and `split` options for `Metaspace` decoder and pre-tokenizer
|
5
|
+
- Dropped support for Ruby < 3.1
|
6
|
+
|
1
7
|
## 0.4.4 (2024-02-27)
|
2
8
|
|
3
9
|
- Updated Tokenizers to 0.15.2
|
data/Cargo.lock
CHANGED
@@ -40,7 +40,7 @@ dependencies = [
|
|
40
40
|
"regex",
|
41
41
|
"rustc-hash",
|
42
42
|
"shlex",
|
43
|
-
"syn
|
43
|
+
"syn",
|
44
44
|
]
|
45
45
|
|
46
46
|
[[package]]
|
@@ -135,9 +135,9 @@ dependencies = [
|
|
135
135
|
|
136
136
|
[[package]]
|
137
137
|
name = "darling"
|
138
|
-
version = "0.
|
138
|
+
version = "0.20.8"
|
139
139
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
140
|
-
checksum = "
|
140
|
+
checksum = "54e36fcd13ed84ffdfda6f5be89b31287cbb80c439841fe69e04841435464391"
|
141
141
|
dependencies = [
|
142
142
|
"darling_core",
|
143
143
|
"darling_macro",
|
@@ -145,58 +145,58 @@ dependencies = [
|
|
145
145
|
|
146
146
|
[[package]]
|
147
147
|
name = "darling_core"
|
148
|
-
version = "0.
|
148
|
+
version = "0.20.8"
|
149
149
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
150
|
-
checksum = "
|
150
|
+
checksum = "9c2cf1c23a687a1feeb728783b993c4e1ad83d99f351801977dd809b48d0a70f"
|
151
151
|
dependencies = [
|
152
152
|
"fnv",
|
153
153
|
"ident_case",
|
154
154
|
"proc-macro2",
|
155
155
|
"quote",
|
156
156
|
"strsim",
|
157
|
-
"syn
|
157
|
+
"syn",
|
158
158
|
]
|
159
159
|
|
160
160
|
[[package]]
|
161
161
|
name = "darling_macro"
|
162
|
-
version = "0.
|
162
|
+
version = "0.20.8"
|
163
163
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
164
|
-
checksum = "
|
164
|
+
checksum = "a668eda54683121533a393014d8692171709ff57a7d61f187b6e782719f8933f"
|
165
165
|
dependencies = [
|
166
166
|
"darling_core",
|
167
167
|
"quote",
|
168
|
-
"syn
|
168
|
+
"syn",
|
169
169
|
]
|
170
170
|
|
171
171
|
[[package]]
|
172
172
|
name = "derive_builder"
|
173
|
-
version = "0.
|
173
|
+
version = "0.20.0"
|
174
174
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
175
|
-
checksum = "
|
175
|
+
checksum = "0350b5cb0331628a5916d6c5c0b72e97393b8b6b03b47a9284f4e7f5a405ffd7"
|
176
176
|
dependencies = [
|
177
177
|
"derive_builder_macro",
|
178
178
|
]
|
179
179
|
|
180
180
|
[[package]]
|
181
181
|
name = "derive_builder_core"
|
182
|
-
version = "0.
|
182
|
+
version = "0.20.0"
|
183
183
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
184
|
-
checksum = "
|
184
|
+
checksum = "d48cda787f839151732d396ac69e3473923d54312c070ee21e9effcaa8ca0b1d"
|
185
185
|
dependencies = [
|
186
186
|
"darling",
|
187
187
|
"proc-macro2",
|
188
188
|
"quote",
|
189
|
-
"syn
|
189
|
+
"syn",
|
190
190
|
]
|
191
191
|
|
192
192
|
[[package]]
|
193
193
|
name = "derive_builder_macro"
|
194
|
-
version = "0.
|
194
|
+
version = "0.20.0"
|
195
195
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
196
|
-
checksum = "
|
196
|
+
checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b"
|
197
197
|
dependencies = [
|
198
198
|
"derive_builder_core",
|
199
|
-
"syn
|
199
|
+
"syn",
|
200
200
|
]
|
201
201
|
|
202
202
|
[[package]]
|
@@ -350,9 +350,9 @@ checksum = "b8dd856d451cc0da70e2ef2ce95a18e39a93b7558bedf10201ad28503f918568"
|
|
350
350
|
|
351
351
|
[[package]]
|
352
352
|
name = "magnus"
|
353
|
-
version = "0.6.
|
353
|
+
version = "0.6.4"
|
354
354
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
355
|
-
checksum = "
|
355
|
+
checksum = "b1597ef40aa8c36be098249e82c9a20cf7199278ac1c1a1a995eeead6a184479"
|
356
356
|
dependencies = [
|
357
357
|
"magnus-macros",
|
358
358
|
"rb-sys",
|
@@ -368,7 +368,7 @@ checksum = "5968c820e2960565f647819f5928a42d6e874551cab9d88d75e3e0660d7f71e3"
|
|
368
368
|
dependencies = [
|
369
369
|
"proc-macro2",
|
370
370
|
"quote",
|
371
|
-
"syn
|
371
|
+
"syn",
|
372
372
|
]
|
373
373
|
|
374
374
|
[[package]]
|
@@ -394,9 +394,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
|
394
394
|
|
395
395
|
[[package]]
|
396
396
|
name = "monostate"
|
397
|
-
version = "0.1.
|
397
|
+
version = "0.1.12"
|
398
398
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
399
|
-
checksum = "
|
399
|
+
checksum = "a20fffcd8ca4c69d31e036a71abc400147b41f90895df4edcb36497a1f8af8bf"
|
400
400
|
dependencies = [
|
401
401
|
"monostate-impl",
|
402
402
|
"serde",
|
@@ -404,13 +404,13 @@ dependencies = [
|
|
404
404
|
|
405
405
|
[[package]]
|
406
406
|
name = "monostate-impl"
|
407
|
-
version = "0.1.
|
407
|
+
version = "0.1.12"
|
408
408
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
409
|
-
checksum = "
|
409
|
+
checksum = "bf307cbbbd777a9c10cec88ddafee572b3484caad5cce0c9236523c3803105a6"
|
410
410
|
dependencies = [
|
411
411
|
"proc-macro2",
|
412
412
|
"quote",
|
413
|
-
"syn
|
413
|
+
"syn",
|
414
414
|
]
|
415
415
|
|
416
416
|
[[package]]
|
@@ -489,18 +489,18 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
|
|
489
489
|
|
490
490
|
[[package]]
|
491
491
|
name = "proc-macro2"
|
492
|
-
version = "1.0.
|
492
|
+
version = "1.0.81"
|
493
493
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
494
|
-
checksum = "
|
494
|
+
checksum = "3d1597b0c024618f09a9c3b8655b7e430397a36d23fdafec26d6965e9eec3eba"
|
495
495
|
dependencies = [
|
496
496
|
"unicode-ident",
|
497
497
|
]
|
498
498
|
|
499
499
|
[[package]]
|
500
500
|
name = "quote"
|
501
|
-
version = "1.0.
|
501
|
+
version = "1.0.36"
|
502
502
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
503
|
-
checksum = "
|
503
|
+
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
|
504
504
|
dependencies = [
|
505
505
|
"proc-macro2",
|
506
506
|
]
|
@@ -537,9 +537,9 @@ dependencies = [
|
|
537
537
|
|
538
538
|
[[package]]
|
539
539
|
name = "rayon"
|
540
|
-
version = "1.
|
540
|
+
version = "1.10.0"
|
541
541
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
542
|
-
checksum = "
|
542
|
+
checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
|
543
543
|
dependencies = [
|
544
544
|
"either",
|
545
545
|
"rayon-core",
|
@@ -558,9 +558,9 @@ dependencies = [
|
|
558
558
|
|
559
559
|
[[package]]
|
560
560
|
name = "rayon-core"
|
561
|
-
version = "1.12.
|
561
|
+
version = "1.12.1"
|
562
562
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
563
|
-
checksum = "
|
563
|
+
checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
|
564
564
|
dependencies = [
|
565
565
|
"crossbeam-deque",
|
566
566
|
"crossbeam-utils",
|
@@ -568,18 +568,18 @@ dependencies = [
|
|
568
568
|
|
569
569
|
[[package]]
|
570
570
|
name = "rb-sys"
|
571
|
-
version = "0.9.
|
571
|
+
version = "0.9.97"
|
572
572
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
573
|
-
checksum = "
|
573
|
+
checksum = "47d30bcad206b51f2f66121190ca678dce1fdf3a2eae0ac5d838d1818b19bdf5"
|
574
574
|
dependencies = [
|
575
575
|
"rb-sys-build",
|
576
576
|
]
|
577
577
|
|
578
578
|
[[package]]
|
579
579
|
name = "rb-sys-build"
|
580
|
-
version = "0.9.
|
580
|
+
version = "0.9.97"
|
581
581
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
582
|
-
checksum = "
|
582
|
+
checksum = "3cbd92f281615f3c2dcb9dcb0f0576624752afbf9a7f99173b37c4b55b62dd8a"
|
583
583
|
dependencies = [
|
584
584
|
"bindgen",
|
585
585
|
"lazy_static",
|
@@ -587,7 +587,7 @@ dependencies = [
|
|
587
587
|
"quote",
|
588
588
|
"regex",
|
589
589
|
"shell-words",
|
590
|
-
"syn
|
590
|
+
"syn",
|
591
591
|
]
|
592
592
|
|
593
593
|
[[package]]
|
@@ -598,33 +598,27 @@ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
|
|
598
598
|
|
599
599
|
[[package]]
|
600
600
|
name = "regex"
|
601
|
-
version = "1.
|
601
|
+
version = "1.10.4"
|
602
602
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
603
|
-
checksum = "
|
603
|
+
checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c"
|
604
604
|
dependencies = [
|
605
605
|
"aho-corasick",
|
606
606
|
"memchr",
|
607
607
|
"regex-automata",
|
608
|
-
"regex-syntax
|
608
|
+
"regex-syntax",
|
609
609
|
]
|
610
610
|
|
611
611
|
[[package]]
|
612
612
|
name = "regex-automata"
|
613
|
-
version = "0.
|
613
|
+
version = "0.4.6"
|
614
614
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
615
|
-
checksum = "
|
615
|
+
checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
|
616
616
|
dependencies = [
|
617
617
|
"aho-corasick",
|
618
618
|
"memchr",
|
619
|
-
"regex-syntax
|
619
|
+
"regex-syntax",
|
620
620
|
]
|
621
621
|
|
622
|
-
[[package]]
|
623
|
-
name = "regex-syntax"
|
624
|
-
version = "0.7.5"
|
625
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
626
|
-
checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
|
627
|
-
|
628
622
|
[[package]]
|
629
623
|
name = "regex-syntax"
|
630
624
|
version = "0.8.2"
|
@@ -672,7 +666,7 @@ checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2"
|
|
672
666
|
dependencies = [
|
673
667
|
"proc-macro2",
|
674
668
|
"quote",
|
675
|
-
"syn
|
669
|
+
"syn",
|
676
670
|
]
|
677
671
|
|
678
672
|
[[package]]
|
@@ -724,20 +718,9 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
|
|
724
718
|
|
725
719
|
[[package]]
|
726
720
|
name = "syn"
|
727
|
-
version = "
|
728
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
729
|
-
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
|
730
|
-
dependencies = [
|
731
|
-
"proc-macro2",
|
732
|
-
"quote",
|
733
|
-
"unicode-ident",
|
734
|
-
]
|
735
|
-
|
736
|
-
[[package]]
|
737
|
-
name = "syn"
|
738
|
-
version = "2.0.38"
|
721
|
+
version = "2.0.59"
|
739
722
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
740
|
-
checksum = "
|
723
|
+
checksum = "4a6531ffc7b071655e4ce2e04bd464c4830bb585a61cabb96cf808f05172615a"
|
741
724
|
dependencies = [
|
742
725
|
"proc-macro2",
|
743
726
|
"quote",
|
@@ -761,24 +744,24 @@ checksum = "10712f02019e9288794769fba95cd6847df9874d49d871d062172f9dd41bc4cc"
|
|
761
744
|
dependencies = [
|
762
745
|
"proc-macro2",
|
763
746
|
"quote",
|
764
|
-
"syn
|
747
|
+
"syn",
|
765
748
|
]
|
766
749
|
|
767
750
|
[[package]]
|
768
751
|
name = "tokenizers"
|
769
|
-
version = "0.
|
752
|
+
version = "0.5.0"
|
770
753
|
dependencies = [
|
771
754
|
"magnus",
|
772
755
|
"onig",
|
773
756
|
"serde",
|
774
|
-
"tokenizers 0.
|
757
|
+
"tokenizers 0.19.1",
|
775
758
|
]
|
776
759
|
|
777
760
|
[[package]]
|
778
761
|
name = "tokenizers"
|
779
|
-
version = "0.
|
762
|
+
version = "0.19.1"
|
780
763
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
781
|
-
checksum = "
|
764
|
+
checksum = "e500fad1dd3af3d626327e6a3fe5050e664a6eaa4708b8ca92f1794aaf73e6fd"
|
782
765
|
dependencies = [
|
783
766
|
"aho-corasick",
|
784
767
|
"derive_builder",
|
@@ -796,7 +779,7 @@ dependencies = [
|
|
796
779
|
"rayon",
|
797
780
|
"rayon-cond",
|
798
781
|
"regex",
|
799
|
-
"regex-syntax
|
782
|
+
"regex-syntax",
|
800
783
|
"serde",
|
801
784
|
"serde_json",
|
802
785
|
"spm_precompiled",
|
@@ -823,9 +806,9 @@ dependencies = [
|
|
823
806
|
|
824
807
|
[[package]]
|
825
808
|
name = "unicode-segmentation"
|
826
|
-
version = "1.
|
809
|
+
version = "1.11.0"
|
827
810
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
828
|
-
checksum = "
|
811
|
+
checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
|
829
812
|
|
830
813
|
[[package]]
|
831
814
|
name = "unicode-width"
|