tokenizers 0.4.3-x86_64-darwin → 0.4.4-x86_64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Cargo.lock +28 -13
- data/LICENSE-THIRD-PARTY.txt +550 -8
- data/README.md +1 -1
- data/lib/tokenizers/3.0/tokenizers.bundle +0 -0
- data/lib/tokenizers/3.1/tokenizers.bundle +0 -0
- data/lib/tokenizers/3.2/tokenizers.bundle +0 -0
- data/lib/tokenizers/3.3/tokenizers.bundle +0 -0
- data/lib/tokenizers/from_pretrained.rb +1 -1
- data/lib/tokenizers/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ccbb8ef90600ea21debc7908e9bff6f89a88710002b43aa205f2714dd322afdb
|
4
|
+
data.tar.gz: c20b98864fa217ffbd9e0181d0669bad5ac08738790030fbb3afd9822438b814
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0f0d5e16ef148ae425762a085948fe687035699cf65cf2fc148df6c1cac4eff832f8b8b0b93f7043d00b5040e17e0cebf5ccf838d6ad129f0f460a880ae0d2a9
|
7
|
+
data.tar.gz: d20c18fa5c165ad1b66f52d17ad71eb030ccc4d2ce562b8ad83b1a42f0aaf774d73c73fe388c1f423d0f1e710bc5ec0db4f813b3d725a3ddd0b2fce1c96757fd
|
data/CHANGELOG.md
CHANGED
data/Cargo.lock
CHANGED
@@ -280,6 +280,15 @@ dependencies = [
|
|
280
280
|
"either",
|
281
281
|
]
|
282
282
|
|
283
|
+
[[package]]
|
284
|
+
name = "itertools"
|
285
|
+
version = "0.12.1"
|
286
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
287
|
+
checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
|
288
|
+
dependencies = [
|
289
|
+
"either",
|
290
|
+
]
|
291
|
+
|
283
292
|
[[package]]
|
284
293
|
name = "itoa"
|
285
294
|
version = "1.0.6"
|
@@ -543,7 +552,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
543
552
|
checksum = "059f538b55efd2309c9794130bc149c6a553db90e9d99c2030785c82f0bd7df9"
|
544
553
|
dependencies = [
|
545
554
|
"either",
|
546
|
-
"itertools",
|
555
|
+
"itertools 0.11.0",
|
547
556
|
"rayon",
|
548
557
|
]
|
549
558
|
|
@@ -559,18 +568,18 @@ dependencies = [
|
|
559
568
|
|
560
569
|
[[package]]
|
561
570
|
name = "rb-sys"
|
562
|
-
version = "0.9.
|
571
|
+
version = "0.9.89"
|
563
572
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
564
|
-
checksum = "
|
573
|
+
checksum = "0d197f2c03751ef006f29d593d22aa9068c9c358e04ca503afea0329c366147c"
|
565
574
|
dependencies = [
|
566
575
|
"rb-sys-build",
|
567
576
|
]
|
568
577
|
|
569
578
|
[[package]]
|
570
579
|
name = "rb-sys-build"
|
571
|
-
version = "0.9.
|
580
|
+
version = "0.9.89"
|
572
581
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
573
|
-
checksum = "
|
582
|
+
checksum = "2b50caf8fd028f12abe00d6debe2ae2adf6202c9ca3caa59487eda710d90fa28"
|
574
583
|
dependencies = [
|
575
584
|
"bindgen",
|
576
585
|
"lazy_static",
|
@@ -596,7 +605,7 @@ dependencies = [
|
|
596
605
|
"aho-corasick",
|
597
606
|
"memchr",
|
598
607
|
"regex-automata",
|
599
|
-
"regex-syntax",
|
608
|
+
"regex-syntax 0.7.5",
|
600
609
|
]
|
601
610
|
|
602
611
|
[[package]]
|
@@ -607,7 +616,7 @@ checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795"
|
|
607
616
|
dependencies = [
|
608
617
|
"aho-corasick",
|
609
618
|
"memchr",
|
610
|
-
"regex-syntax",
|
619
|
+
"regex-syntax 0.7.5",
|
611
620
|
]
|
612
621
|
|
613
622
|
[[package]]
|
@@ -616,6 +625,12 @@ version = "0.7.5"
|
|
616
625
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
617
626
|
checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
|
618
627
|
|
628
|
+
[[package]]
|
629
|
+
name = "regex-syntax"
|
630
|
+
version = "0.8.2"
|
631
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
632
|
+
checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
|
633
|
+
|
619
634
|
[[package]]
|
620
635
|
name = "rustc-hash"
|
621
636
|
version = "1.1.0"
|
@@ -751,26 +766,26 @@ dependencies = [
|
|
751
766
|
|
752
767
|
[[package]]
|
753
768
|
name = "tokenizers"
|
754
|
-
version = "0.4.
|
769
|
+
version = "0.4.4"
|
755
770
|
dependencies = [
|
756
771
|
"magnus",
|
757
772
|
"onig",
|
758
773
|
"serde",
|
759
|
-
"tokenizers 0.15.
|
774
|
+
"tokenizers 0.15.2",
|
760
775
|
]
|
761
776
|
|
762
777
|
[[package]]
|
763
778
|
name = "tokenizers"
|
764
|
-
version = "0.15.
|
779
|
+
version = "0.15.2"
|
765
780
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
766
|
-
checksum = "
|
781
|
+
checksum = "3dd47962b0ba36e7fd33518fbf1754d136fd1474000162bbf2a8b5fcb2d3654d"
|
767
782
|
dependencies = [
|
768
783
|
"aho-corasick",
|
769
784
|
"derive_builder",
|
770
785
|
"esaxx-rs",
|
771
786
|
"getrandom",
|
772
787
|
"indicatif",
|
773
|
-
"itertools",
|
788
|
+
"itertools 0.12.1",
|
774
789
|
"lazy_static",
|
775
790
|
"log",
|
776
791
|
"macro_rules_attribute",
|
@@ -781,7 +796,7 @@ dependencies = [
|
|
781
796
|
"rayon",
|
782
797
|
"rayon-cond",
|
783
798
|
"regex",
|
784
|
-
"regex-syntax",
|
799
|
+
"regex-syntax 0.8.2",
|
785
800
|
"serde",
|
786
801
|
"serde_json",
|
787
802
|
"spm_precompiled",
|
data/LICENSE-THIRD-PARTY.txt
CHANGED
@@ -114,6 +114,10 @@ itertools v0.11.0
|
|
114
114
|
https://github.com/rust-itertools/itertools
|
115
115
|
MIT OR Apache-2.0
|
116
116
|
|
117
|
+
itertools v0.12.1
|
118
|
+
https://github.com/rust-itertools/itertools
|
119
|
+
MIT OR Apache-2.0
|
120
|
+
|
117
121
|
itoa v1.0.6
|
118
122
|
https://github.com/dtolnay/itoa
|
119
123
|
MIT OR Apache-2.0
|
@@ -246,11 +250,11 @@ rayon-core v1.12.0
|
|
246
250
|
https://github.com/rayon-rs/rayon
|
247
251
|
MIT OR Apache-2.0
|
248
252
|
|
249
|
-
rb-sys v0.9.
|
253
|
+
rb-sys v0.9.89
|
250
254
|
https://github.com/oxidize-rb/rb-sys
|
251
255
|
MIT OR Apache-2.0
|
252
256
|
|
253
|
-
rb-sys-build v0.9.
|
257
|
+
rb-sys-build v0.9.89
|
254
258
|
https://github.com/oxidize-rb/rb-sys
|
255
259
|
MIT OR Apache-2.0
|
256
260
|
|
@@ -270,6 +274,10 @@ regex-syntax v0.7.5
|
|
270
274
|
https://github.com/rust-lang/regex/tree/master/regex-syntax
|
271
275
|
MIT OR Apache-2.0
|
272
276
|
|
277
|
+
regex-syntax v0.8.2
|
278
|
+
https://github.com/rust-lang/regex/tree/master/regex-syntax
|
279
|
+
MIT OR Apache-2.0
|
280
|
+
|
273
281
|
rustc-hash v1.1.0
|
274
282
|
https://github.com/rust-lang-nursery/rustc-hash
|
275
283
|
Apache-2.0/MIT
|
@@ -334,7 +342,7 @@ thiserror-impl v1.0.49
|
|
334
342
|
https://github.com/dtolnay/thiserror
|
335
343
|
MIT OR Apache-2.0
|
336
344
|
|
337
|
-
tokenizers v0.15.
|
345
|
+
tokenizers v0.15.2
|
338
346
|
https://github.com/huggingface/tokenizers
|
339
347
|
Apache-2.0
|
340
348
|
|
@@ -5031,7 +5039,243 @@ SOFTWARE.
|
|
5031
5039
|
|
5032
5040
|
|
5033
5041
|
================================================================================
|
5034
|
-
itertools LICENSE-APACHE
|
5042
|
+
itertools v0.11.0 LICENSE-APACHE
|
5043
|
+
================================================================================
|
5044
|
+
|
5045
|
+
Apache License
|
5046
|
+
Version 2.0, January 2004
|
5047
|
+
http://www.apache.org/licenses/
|
5048
|
+
|
5049
|
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
5050
|
+
|
5051
|
+
1. Definitions.
|
5052
|
+
|
5053
|
+
"License" shall mean the terms and conditions for use, reproduction,
|
5054
|
+
and distribution as defined by Sections 1 through 9 of this document.
|
5055
|
+
|
5056
|
+
"Licensor" shall mean the copyright owner or entity authorized by
|
5057
|
+
the copyright owner that is granting the License.
|
5058
|
+
|
5059
|
+
"Legal Entity" shall mean the union of the acting entity and all
|
5060
|
+
other entities that control, are controlled by, or are under common
|
5061
|
+
control with that entity. For the purposes of this definition,
|
5062
|
+
"control" means (i) the power, direct or indirect, to cause the
|
5063
|
+
direction or management of such entity, whether by contract or
|
5064
|
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
5065
|
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
5066
|
+
|
5067
|
+
"You" (or "Your") shall mean an individual or Legal Entity
|
5068
|
+
exercising permissions granted by this License.
|
5069
|
+
|
5070
|
+
"Source" form shall mean the preferred form for making modifications,
|
5071
|
+
including but not limited to software source code, documentation
|
5072
|
+
source, and configuration files.
|
5073
|
+
|
5074
|
+
"Object" form shall mean any form resulting from mechanical
|
5075
|
+
transformation or translation of a Source form, including but
|
5076
|
+
not limited to compiled object code, generated documentation,
|
5077
|
+
and conversions to other media types.
|
5078
|
+
|
5079
|
+
"Work" shall mean the work of authorship, whether in Source or
|
5080
|
+
Object form, made available under the License, as indicated by a
|
5081
|
+
copyright notice that is included in or attached to the work
|
5082
|
+
(an example is provided in the Appendix below).
|
5083
|
+
|
5084
|
+
"Derivative Works" shall mean any work, whether in Source or Object
|
5085
|
+
form, that is based on (or derived from) the Work and for which the
|
5086
|
+
editorial revisions, annotations, elaborations, or other modifications
|
5087
|
+
represent, as a whole, an original work of authorship. For the purposes
|
5088
|
+
of this License, Derivative Works shall not include works that remain
|
5089
|
+
separable from, or merely link (or bind by name) to the interfaces of,
|
5090
|
+
the Work and Derivative Works thereof.
|
5091
|
+
|
5092
|
+
"Contribution" shall mean any work of authorship, including
|
5093
|
+
the original version of the Work and any modifications or additions
|
5094
|
+
to that Work or Derivative Works thereof, that is intentionally
|
5095
|
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
5096
|
+
or by an individual or Legal Entity authorized to submit on behalf of
|
5097
|
+
the copyright owner. For the purposes of this definition, "submitted"
|
5098
|
+
means any form of electronic, verbal, or written communication sent
|
5099
|
+
to the Licensor or its representatives, including but not limited to
|
5100
|
+
communication on electronic mailing lists, source code control systems,
|
5101
|
+
and issue tracking systems that are managed by, or on behalf of, the
|
5102
|
+
Licensor for the purpose of discussing and improving the Work, but
|
5103
|
+
excluding communication that is conspicuously marked or otherwise
|
5104
|
+
designated in writing by the copyright owner as "Not a Contribution."
|
5105
|
+
|
5106
|
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
5107
|
+
on behalf of whom a Contribution has been received by Licensor and
|
5108
|
+
subsequently incorporated within the Work.
|
5109
|
+
|
5110
|
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
5111
|
+
this License, each Contributor hereby grants to You a perpetual,
|
5112
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
5113
|
+
copyright license to reproduce, prepare Derivative Works of,
|
5114
|
+
publicly display, publicly perform, sublicense, and distribute the
|
5115
|
+
Work and such Derivative Works in Source or Object form.
|
5116
|
+
|
5117
|
+
3. Grant of Patent License. Subject to the terms and conditions of
|
5118
|
+
this License, each Contributor hereby grants to You a perpetual,
|
5119
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
5120
|
+
(except as stated in this section) patent license to make, have made,
|
5121
|
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
5122
|
+
where such license applies only to those patent claims licensable
|
5123
|
+
by such Contributor that are necessarily infringed by their
|
5124
|
+
Contribution(s) alone or by combination of their Contribution(s)
|
5125
|
+
with the Work to which such Contribution(s) was submitted. If You
|
5126
|
+
institute patent litigation against any entity (including a
|
5127
|
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
5128
|
+
or a Contribution incorporated within the Work constitutes direct
|
5129
|
+
or contributory patent infringement, then any patent licenses
|
5130
|
+
granted to You under this License for that Work shall terminate
|
5131
|
+
as of the date such litigation is filed.
|
5132
|
+
|
5133
|
+
4. Redistribution. You may reproduce and distribute copies of the
|
5134
|
+
Work or Derivative Works thereof in any medium, with or without
|
5135
|
+
modifications, and in Source or Object form, provided that You
|
5136
|
+
meet the following conditions:
|
5137
|
+
|
5138
|
+
(a) You must give any other recipients of the Work or
|
5139
|
+
Derivative Works a copy of this License; and
|
5140
|
+
|
5141
|
+
(b) You must cause any modified files to carry prominent notices
|
5142
|
+
stating that You changed the files; and
|
5143
|
+
|
5144
|
+
(c) You must retain, in the Source form of any Derivative Works
|
5145
|
+
that You distribute, all copyright, patent, trademark, and
|
5146
|
+
attribution notices from the Source form of the Work,
|
5147
|
+
excluding those notices that do not pertain to any part of
|
5148
|
+
the Derivative Works; and
|
5149
|
+
|
5150
|
+
(d) If the Work includes a "NOTICE" text file as part of its
|
5151
|
+
distribution, then any Derivative Works that You distribute must
|
5152
|
+
include a readable copy of the attribution notices contained
|
5153
|
+
within such NOTICE file, excluding those notices that do not
|
5154
|
+
pertain to any part of the Derivative Works, in at least one
|
5155
|
+
of the following places: within a NOTICE text file distributed
|
5156
|
+
as part of the Derivative Works; within the Source form or
|
5157
|
+
documentation, if provided along with the Derivative Works; or,
|
5158
|
+
within a display generated by the Derivative Works, if and
|
5159
|
+
wherever such third-party notices normally appear. The contents
|
5160
|
+
of the NOTICE file are for informational purposes only and
|
5161
|
+
do not modify the License. You may add Your own attribution
|
5162
|
+
notices within Derivative Works that You distribute, alongside
|
5163
|
+
or as an addendum to the NOTICE text from the Work, provided
|
5164
|
+
that such additional attribution notices cannot be construed
|
5165
|
+
as modifying the License.
|
5166
|
+
|
5167
|
+
You may add Your own copyright statement to Your modifications and
|
5168
|
+
may provide additional or different license terms and conditions
|
5169
|
+
for use, reproduction, or distribution of Your modifications, or
|
5170
|
+
for any such Derivative Works as a whole, provided Your use,
|
5171
|
+
reproduction, and distribution of the Work otherwise complies with
|
5172
|
+
the conditions stated in this License.
|
5173
|
+
|
5174
|
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
5175
|
+
any Contribution intentionally submitted for inclusion in the Work
|
5176
|
+
by You to the Licensor shall be under the terms and conditions of
|
5177
|
+
this License, without any additional terms or conditions.
|
5178
|
+
Notwithstanding the above, nothing herein shall supersede or modify
|
5179
|
+
the terms of any separate license agreement you may have executed
|
5180
|
+
with Licensor regarding such Contributions.
|
5181
|
+
|
5182
|
+
6. Trademarks. This License does not grant permission to use the trade
|
5183
|
+
names, trademarks, service marks, or product names of the Licensor,
|
5184
|
+
except as required for reasonable and customary use in describing the
|
5185
|
+
origin of the Work and reproducing the content of the NOTICE file.
|
5186
|
+
|
5187
|
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
5188
|
+
agreed to in writing, Licensor provides the Work (and each
|
5189
|
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
5190
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
5191
|
+
implied, including, without limitation, any warranties or conditions
|
5192
|
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
5193
|
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
5194
|
+
appropriateness of using or redistributing the Work and assume any
|
5195
|
+
risks associated with Your exercise of permissions under this License.
|
5196
|
+
|
5197
|
+
8. Limitation of Liability. In no event and under no legal theory,
|
5198
|
+
whether in tort (including negligence), contract, or otherwise,
|
5199
|
+
unless required by applicable law (such as deliberate and grossly
|
5200
|
+
negligent acts) or agreed to in writing, shall any Contributor be
|
5201
|
+
liable to You for damages, including any direct, indirect, special,
|
5202
|
+
incidental, or consequential damages of any character arising as a
|
5203
|
+
result of this License or out of the use or inability to use the
|
5204
|
+
Work (including but not limited to damages for loss of goodwill,
|
5205
|
+
work stoppage, computer failure or malfunction, or any and all
|
5206
|
+
other commercial damages or losses), even if such Contributor
|
5207
|
+
has been advised of the possibility of such damages.
|
5208
|
+
|
5209
|
+
9. Accepting Warranty or Additional Liability. While redistributing
|
5210
|
+
the Work or Derivative Works thereof, You may choose to offer,
|
5211
|
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
5212
|
+
or other liability obligations and/or rights consistent with this
|
5213
|
+
License. However, in accepting such obligations, You may act only
|
5214
|
+
on Your own behalf and on Your sole responsibility, not on behalf
|
5215
|
+
of any other Contributor, and only if You agree to indemnify,
|
5216
|
+
defend, and hold each Contributor harmless for any liability
|
5217
|
+
incurred by, or claims asserted against, such Contributor by reason
|
5218
|
+
of your accepting any such warranty or additional liability.
|
5219
|
+
|
5220
|
+
END OF TERMS AND CONDITIONS
|
5221
|
+
|
5222
|
+
APPENDIX: How to apply the Apache License to your work.
|
5223
|
+
|
5224
|
+
To apply the Apache License to your work, attach the following
|
5225
|
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
5226
|
+
replaced with your own identifying information. (Don't include
|
5227
|
+
the brackets!) The text should be enclosed in the appropriate
|
5228
|
+
comment syntax for the file format. We also recommend that a
|
5229
|
+
file or class name and description of purpose be included on the
|
5230
|
+
same "printed page" as the copyright notice for easier
|
5231
|
+
identification within third-party archives.
|
5232
|
+
|
5233
|
+
Copyright [yyyy] [name of copyright owner]
|
5234
|
+
|
5235
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5236
|
+
you may not use this file except in compliance with the License.
|
5237
|
+
You may obtain a copy of the License at
|
5238
|
+
|
5239
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
5240
|
+
|
5241
|
+
Unless required by applicable law or agreed to in writing, software
|
5242
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
5243
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
5244
|
+
See the License for the specific language governing permissions and
|
5245
|
+
limitations under the License.
|
5246
|
+
|
5247
|
+
================================================================================
|
5248
|
+
itertools v0.11.0 LICENSE-MIT
|
5249
|
+
================================================================================
|
5250
|
+
|
5251
|
+
Copyright (c) 2015
|
5252
|
+
|
5253
|
+
Permission is hereby granted, free of charge, to any
|
5254
|
+
person obtaining a copy of this software and associated
|
5255
|
+
documentation files (the "Software"), to deal in the
|
5256
|
+
Software without restriction, including without
|
5257
|
+
limitation the rights to use, copy, modify, merge,
|
5258
|
+
publish, distribute, sublicense, and/or sell copies of
|
5259
|
+
the Software, and to permit persons to whom the Software
|
5260
|
+
is furnished to do so, subject to the following
|
5261
|
+
conditions:
|
5262
|
+
|
5263
|
+
The above copyright notice and this permission notice
|
5264
|
+
shall be included in all copies or substantial portions
|
5265
|
+
of the Software.
|
5266
|
+
|
5267
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
|
5268
|
+
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
5269
|
+
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
5270
|
+
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
5271
|
+
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
5272
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
5273
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
5274
|
+
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
5275
|
+
DEALINGS IN THE SOFTWARE.
|
5276
|
+
|
5277
|
+
================================================================================
|
5278
|
+
itertools v0.12.1 LICENSE-APACHE
|
5035
5279
|
================================================================================
|
5036
5280
|
|
5037
5281
|
Apache License
|
@@ -5237,7 +5481,7 @@ See the License for the specific language governing permissions and
|
|
5237
5481
|
limitations under the License.
|
5238
5482
|
|
5239
5483
|
================================================================================
|
5240
|
-
itertools LICENSE-MIT
|
5484
|
+
itertools v0.12.1 LICENSE-MIT
|
5241
5485
|
================================================================================
|
5242
5486
|
|
5243
5487
|
Copyright (c) 2015
|
@@ -12203,7 +12447,305 @@ IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
12203
12447
|
DEALINGS IN THE SOFTWARE.
|
12204
12448
|
|
12205
12449
|
================================================================================
|
12206
|
-
regex-syntax LICENSE-APACHE
|
12450
|
+
regex-syntax v0.7.5 LICENSE-APACHE
|
12451
|
+
================================================================================
|
12452
|
+
|
12453
|
+
Apache License
|
12454
|
+
Version 2.0, January 2004
|
12455
|
+
http://www.apache.org/licenses/
|
12456
|
+
|
12457
|
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
12458
|
+
|
12459
|
+
1. Definitions.
|
12460
|
+
|
12461
|
+
"License" shall mean the terms and conditions for use, reproduction,
|
12462
|
+
and distribution as defined by Sections 1 through 9 of this document.
|
12463
|
+
|
12464
|
+
"Licensor" shall mean the copyright owner or entity authorized by
|
12465
|
+
the copyright owner that is granting the License.
|
12466
|
+
|
12467
|
+
"Legal Entity" shall mean the union of the acting entity and all
|
12468
|
+
other entities that control, are controlled by, or are under common
|
12469
|
+
control with that entity. For the purposes of this definition,
|
12470
|
+
"control" means (i) the power, direct or indirect, to cause the
|
12471
|
+
direction or management of such entity, whether by contract or
|
12472
|
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
12473
|
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
12474
|
+
|
12475
|
+
"You" (or "Your") shall mean an individual or Legal Entity
|
12476
|
+
exercising permissions granted by this License.
|
12477
|
+
|
12478
|
+
"Source" form shall mean the preferred form for making modifications,
|
12479
|
+
including but not limited to software source code, documentation
|
12480
|
+
source, and configuration files.
|
12481
|
+
|
12482
|
+
"Object" form shall mean any form resulting from mechanical
|
12483
|
+
transformation or translation of a Source form, including but
|
12484
|
+
not limited to compiled object code, generated documentation,
|
12485
|
+
and conversions to other media types.
|
12486
|
+
|
12487
|
+
"Work" shall mean the work of authorship, whether in Source or
|
12488
|
+
Object form, made available under the License, as indicated by a
|
12489
|
+
copyright notice that is included in or attached to the work
|
12490
|
+
(an example is provided in the Appendix below).
|
12491
|
+
|
12492
|
+
"Derivative Works" shall mean any work, whether in Source or Object
|
12493
|
+
form, that is based on (or derived from) the Work and for which the
|
12494
|
+
editorial revisions, annotations, elaborations, or other modifications
|
12495
|
+
represent, as a whole, an original work of authorship. For the purposes
|
12496
|
+
of this License, Derivative Works shall not include works that remain
|
12497
|
+
separable from, or merely link (or bind by name) to the interfaces of,
|
12498
|
+
the Work and Derivative Works thereof.
|
12499
|
+
|
12500
|
+
"Contribution" shall mean any work of authorship, including
|
12501
|
+
the original version of the Work and any modifications or additions
|
12502
|
+
to that Work or Derivative Works thereof, that is intentionally
|
12503
|
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
12504
|
+
or by an individual or Legal Entity authorized to submit on behalf of
|
12505
|
+
the copyright owner. For the purposes of this definition, "submitted"
|
12506
|
+
means any form of electronic, verbal, or written communication sent
|
12507
|
+
to the Licensor or its representatives, including but not limited to
|
12508
|
+
communication on electronic mailing lists, source code control systems,
|
12509
|
+
and issue tracking systems that are managed by, or on behalf of, the
|
12510
|
+
Licensor for the purpose of discussing and improving the Work, but
|
12511
|
+
excluding communication that is conspicuously marked or otherwise
|
12512
|
+
designated in writing by the copyright owner as "Not a Contribution."
|
12513
|
+
|
12514
|
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
12515
|
+
on behalf of whom a Contribution has been received by Licensor and
|
12516
|
+
subsequently incorporated within the Work.
|
12517
|
+
|
12518
|
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
12519
|
+
this License, each Contributor hereby grants to You a perpetual,
|
12520
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
12521
|
+
copyright license to reproduce, prepare Derivative Works of,
|
12522
|
+
publicly display, publicly perform, sublicense, and distribute the
|
12523
|
+
Work and such Derivative Works in Source or Object form.
|
12524
|
+
|
12525
|
+
3. Grant of Patent License. Subject to the terms and conditions of
|
12526
|
+
this License, each Contributor hereby grants to You a perpetual,
|
12527
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
12528
|
+
(except as stated in this section) patent license to make, have made,
|
12529
|
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
12530
|
+
where such license applies only to those patent claims licensable
|
12531
|
+
by such Contributor that are necessarily infringed by their
|
12532
|
+
Contribution(s) alone or by combination of their Contribution(s)
|
12533
|
+
with the Work to which such Contribution(s) was submitted. If You
|
12534
|
+
institute patent litigation against any entity (including a
|
12535
|
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
12536
|
+
or a Contribution incorporated within the Work constitutes direct
|
12537
|
+
or contributory patent infringement, then any patent licenses
|
12538
|
+
granted to You under this License for that Work shall terminate
|
12539
|
+
as of the date such litigation is filed.
|
12540
|
+
|
12541
|
+
4. Redistribution. You may reproduce and distribute copies of the
|
12542
|
+
Work or Derivative Works thereof in any medium, with or without
|
12543
|
+
modifications, and in Source or Object form, provided that You
|
12544
|
+
meet the following conditions:
|
12545
|
+
|
12546
|
+
(a) You must give any other recipients of the Work or
|
12547
|
+
Derivative Works a copy of this License; and
|
12548
|
+
|
12549
|
+
(b) You must cause any modified files to carry prominent notices
|
12550
|
+
stating that You changed the files; and
|
12551
|
+
|
12552
|
+
(c) You must retain, in the Source form of any Derivative Works
|
12553
|
+
that You distribute, all copyright, patent, trademark, and
|
12554
|
+
attribution notices from the Source form of the Work,
|
12555
|
+
excluding those notices that do not pertain to any part of
|
12556
|
+
the Derivative Works; and
|
12557
|
+
|
12558
|
+
(d) If the Work includes a "NOTICE" text file as part of its
|
12559
|
+
distribution, then any Derivative Works that You distribute must
|
12560
|
+
include a readable copy of the attribution notices contained
|
12561
|
+
within such NOTICE file, excluding those notices that do not
|
12562
|
+
pertain to any part of the Derivative Works, in at least one
|
12563
|
+
of the following places: within a NOTICE text file distributed
|
12564
|
+
as part of the Derivative Works; within the Source form or
|
12565
|
+
documentation, if provided along with the Derivative Works; or,
|
12566
|
+
within a display generated by the Derivative Works, if and
|
12567
|
+
wherever such third-party notices normally appear. The contents
|
12568
|
+
of the NOTICE file are for informational purposes only and
|
12569
|
+
do not modify the License. You may add Your own attribution
|
12570
|
+
notices within Derivative Works that You distribute, alongside
|
12571
|
+
or as an addendum to the NOTICE text from the Work, provided
|
12572
|
+
that such additional attribution notices cannot be construed
|
12573
|
+
as modifying the License.
|
12574
|
+
|
12575
|
+
You may add Your own copyright statement to Your modifications and
|
12576
|
+
may provide additional or different license terms and conditions
|
12577
|
+
for use, reproduction, or distribution of Your modifications, or
|
12578
|
+
for any such Derivative Works as a whole, provided Your use,
|
12579
|
+
reproduction, and distribution of the Work otherwise complies with
|
12580
|
+
the conditions stated in this License.
|
12581
|
+
|
12582
|
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
12583
|
+
any Contribution intentionally submitted for inclusion in the Work
|
12584
|
+
by You to the Licensor shall be under the terms and conditions of
|
12585
|
+
this License, without any additional terms or conditions.
|
12586
|
+
Notwithstanding the above, nothing herein shall supersede or modify
|
12587
|
+
the terms of any separate license agreement you may have executed
|
12588
|
+
with Licensor regarding such Contributions.
|
12589
|
+
|
12590
|
+
6. Trademarks. This License does not grant permission to use the trade
|
12591
|
+
names, trademarks, service marks, or product names of the Licensor,
|
12592
|
+
except as required for reasonable and customary use in describing the
|
12593
|
+
origin of the Work and reproducing the content of the NOTICE file.
|
12594
|
+
|
12595
|
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
12596
|
+
agreed to in writing, Licensor provides the Work (and each
|
12597
|
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
12598
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
12599
|
+
implied, including, without limitation, any warranties or conditions
|
12600
|
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
12601
|
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
12602
|
+
appropriateness of using or redistributing the Work and assume any
|
12603
|
+
risks associated with Your exercise of permissions under this License.
|
12604
|
+
|
12605
|
+
8. Limitation of Liability. In no event and under no legal theory,
|
12606
|
+
whether in tort (including negligence), contract, or otherwise,
|
12607
|
+
unless required by applicable law (such as deliberate and grossly
|
12608
|
+
negligent acts) or agreed to in writing, shall any Contributor be
|
12609
|
+
liable to You for damages, including any direct, indirect, special,
|
12610
|
+
incidental, or consequential damages of any character arising as a
|
12611
|
+
result of this License or out of the use or inability to use the
|
12612
|
+
Work (including but not limited to damages for loss of goodwill,
|
12613
|
+
work stoppage, computer failure or malfunction, or any and all
|
12614
|
+
other commercial damages or losses), even if such Contributor
|
12615
|
+
has been advised of the possibility of such damages.
|
12616
|
+
|
12617
|
+
9. Accepting Warranty or Additional Liability. While redistributing
|
12618
|
+
the Work or Derivative Works thereof, You may choose to offer,
|
12619
|
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
12620
|
+
or other liability obligations and/or rights consistent with this
|
12621
|
+
License. However, in accepting such obligations, You may act only
|
12622
|
+
on Your own behalf and on Your sole responsibility, not on behalf
|
12623
|
+
of any other Contributor, and only if You agree to indemnify,
|
12624
|
+
defend, and hold each Contributor harmless for any liability
|
12625
|
+
incurred by, or claims asserted against, such Contributor by reason
|
12626
|
+
of your accepting any such warranty or additional liability.
|
12627
|
+
|
12628
|
+
END OF TERMS AND CONDITIONS
|
12629
|
+
|
12630
|
+
APPENDIX: How to apply the Apache License to your work.
|
12631
|
+
|
12632
|
+
To apply the Apache License to your work, attach the following
|
12633
|
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
12634
|
+
replaced with your own identifying information. (Don't include
|
12635
|
+
the brackets!) The text should be enclosed in the appropriate
|
12636
|
+
comment syntax for the file format. We also recommend that a
|
12637
|
+
file or class name and description of purpose be included on the
|
12638
|
+
same "printed page" as the copyright notice for easier
|
12639
|
+
identification within third-party archives.
|
12640
|
+
|
12641
|
+
Copyright [yyyy] [name of copyright owner]
|
12642
|
+
|
12643
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
12644
|
+
you may not use this file except in compliance with the License.
|
12645
|
+
You may obtain a copy of the License at
|
12646
|
+
|
12647
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
12648
|
+
|
12649
|
+
Unless required by applicable law or agreed to in writing, software
|
12650
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12651
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12652
|
+
See the License for the specific language governing permissions and
|
12653
|
+
limitations under the License.
|
12654
|
+
|
12655
|
+
================================================================================
|
12656
|
+
regex-syntax v0.7.5 LICENSE-MIT
|
12657
|
+
================================================================================
|
12658
|
+
|
12659
|
+
Copyright (c) 2014 The Rust Project Developers
|
12660
|
+
|
12661
|
+
Permission is hereby granted, free of charge, to any
|
12662
|
+
person obtaining a copy of this software and associated
|
12663
|
+
documentation files (the "Software"), to deal in the
|
12664
|
+
Software without restriction, including without
|
12665
|
+
limitation the rights to use, copy, modify, merge,
|
12666
|
+
publish, distribute, sublicense, and/or sell copies of
|
12667
|
+
the Software, and to permit persons to whom the Software
|
12668
|
+
is furnished to do so, subject to the following
|
12669
|
+
conditions:
|
12670
|
+
|
12671
|
+
The above copyright notice and this permission notice
|
12672
|
+
shall be included in all copies or substantial portions
|
12673
|
+
of the Software.
|
12674
|
+
|
12675
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
|
12676
|
+
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
12677
|
+
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
12678
|
+
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
12679
|
+
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
12680
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
12681
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
12682
|
+
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
12683
|
+
DEALINGS IN THE SOFTWARE.
|
12684
|
+
|
12685
|
+
================================================================================
|
12686
|
+
regex-syntax v0.7.5 src/unicode_tables/LICENSE-UNICODE
|
12687
|
+
================================================================================
|
12688
|
+
|
12689
|
+
UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
|
12690
|
+
|
12691
|
+
Unicode Data Files include all data files under the directories
|
12692
|
+
http://www.unicode.org/Public/, http://www.unicode.org/reports/,
|
12693
|
+
http://www.unicode.org/cldr/data/, http://source.icu-project.org/repos/icu/, and
|
12694
|
+
http://www.unicode.org/utility/trac/browser/.
|
12695
|
+
|
12696
|
+
Unicode Data Files do not include PDF online code charts under the
|
12697
|
+
directory http://www.unicode.org/Public/.
|
12698
|
+
|
12699
|
+
Software includes any source code published in the Unicode Standard
|
12700
|
+
or under the directories
|
12701
|
+
http://www.unicode.org/Public/, http://www.unicode.org/reports/,
|
12702
|
+
http://www.unicode.org/cldr/data/, http://source.icu-project.org/repos/icu/, and
|
12703
|
+
http://www.unicode.org/utility/trac/browser/.
|
12704
|
+
|
12705
|
+
NOTICE TO USER: Carefully read the following legal agreement.
|
12706
|
+
BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
|
12707
|
+
DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
|
12708
|
+
YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
|
12709
|
+
TERMS AND CONDITIONS OF THIS AGREEMENT.
|
12710
|
+
IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
|
12711
|
+
THE DATA FILES OR SOFTWARE.
|
12712
|
+
|
12713
|
+
COPYRIGHT AND PERMISSION NOTICE
|
12714
|
+
|
12715
|
+
Copyright © 1991-2018 Unicode, Inc. All rights reserved.
|
12716
|
+
Distributed under the Terms of Use in http://www.unicode.org/copyright.html.
|
12717
|
+
|
12718
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
12719
|
+
a copy of the Unicode data files and any associated documentation
|
12720
|
+
(the "Data Files") or Unicode software and any associated documentation
|
12721
|
+
(the "Software") to deal in the Data Files or Software
|
12722
|
+
without restriction, including without limitation the rights to use,
|
12723
|
+
copy, modify, merge, publish, distribute, and/or sell copies of
|
12724
|
+
the Data Files or Software, and to permit persons to whom the Data Files
|
12725
|
+
or Software are furnished to do so, provided that either
|
12726
|
+
(a) this copyright and permission notice appear with all copies
|
12727
|
+
of the Data Files or Software, or
|
12728
|
+
(b) this copyright and permission notice appear in associated
|
12729
|
+
Documentation.
|
12730
|
+
|
12731
|
+
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
|
12732
|
+
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
|
12733
|
+
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
12734
|
+
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
|
12735
|
+
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
|
12736
|
+
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
|
12737
|
+
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
|
12738
|
+
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
12739
|
+
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
12740
|
+
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
|
12741
|
+
|
12742
|
+
Except as contained in this notice, the name of a copyright holder
|
12743
|
+
shall not be used in advertising or otherwise to promote the sale,
|
12744
|
+
use or other dealings in these Data Files or Software without prior
|
12745
|
+
written authorization of the copyright holder.
|
12746
|
+
|
12747
|
+
================================================================================
|
12748
|
+
regex-syntax v0.8.2 LICENSE-APACHE
|
12207
12749
|
================================================================================
|
12208
12750
|
|
12209
12751
|
Apache License
|
@@ -12409,7 +12951,7 @@ See the License for the specific language governing permissions and
|
|
12409
12951
|
limitations under the License.
|
12410
12952
|
|
12411
12953
|
================================================================================
|
12412
|
-
regex-syntax LICENSE-MIT
|
12954
|
+
regex-syntax v0.8.2 LICENSE-MIT
|
12413
12955
|
================================================================================
|
12414
12956
|
|
12415
12957
|
Copyright (c) 2014 The Rust Project Developers
|
@@ -12439,7 +12981,7 @@ IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
12439
12981
|
DEALINGS IN THE SOFTWARE.
|
12440
12982
|
|
12441
12983
|
================================================================================
|
12442
|
-
regex-syntax src/unicode_tables/LICENSE-UNICODE
|
12984
|
+
regex-syntax v0.8.2 src/unicode_tables/LICENSE-UNICODE
|
12443
12985
|
================================================================================
|
12444
12986
|
|
12445
12987
|
UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
|
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
:slightly_smiling_face: Fast state-of-the-art [tokenizers](https://github.com/huggingface/tokenizers) for Ruby
|
4
4
|
|
5
|
-
[![Build Status](https://github.com/ankane/tokenizers-ruby/workflows/build/badge.svg
|
5
|
+
[![Build Status](https://github.com/ankane/tokenizers-ruby/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/tokenizers-ruby/actions)
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/tokenizers/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tokenizers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.4
|
5
5
|
platform: x86_64-darwin
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-02-27 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|