tokenizers 0.3.3-x86_64-linux → 0.4.0-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4041eb3f8e79ca25397620670357fce958823e6fb49c0ef7c3968f11b6ce99e1
4
- data.tar.gz: 17952cf748eff3f62165bfedc304753f127cfde805e2be3e8cfd7adcfd5e878c
3
+ metadata.gz: 32e0ffbced9ba6ede5acdd6ff7a1d2a39efaea0445dc7b42d23524a387c039da
4
+ data.tar.gz: 5c896851ce062d1fa4457038549f9742d16e6f1c32fe3b3f715f5989fc06ca69
5
5
  SHA512:
6
- metadata.gz: a3b4c35630874860ddc85b09b75c03f119dfd622dcf7ddd47fd682b547ba338a08656ec52b4bcad409ebdc6c128dbd640f78f639c4a11839ecc28dc4289a0da6
7
- data.tar.gz: 8c90c7e8a1e67ce65c2b545bbb879e4d277da908f6fcbc3ddc029711f36e2ac4d4e73d0bf76d71f333eaceb6a612ee9261708d80c507b0d597d8f924562ec912
6
+ metadata.gz: 0b9eb472f71f49273d6b1ade1e1017f66953c35159530b1e944ba67d6575697f27c5b8bd9534fbe506562541664f193ed9e9efaf7e6937db7fb482e98cd74d79
7
+ data.tar.gz: 375f812105aa5a78688e47c7e65ad69d86e65a9112821cb63b470c51a00afd3281bf69162a47e980d7f777e5cca1ad5d2c66ce16fd064aca5cf19b14115b106c
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.4.0 (2023-07-20)
2
+
3
+ - Updated Tokenizers to 0.14.0
4
+ - Dropped support for Ruby < 3
5
+
1
6
  ## 0.3.3 (2023-04-09)
2
7
 
3
8
  - Updated Tokenizers to 0.13.3
data/Cargo.lock CHANGED
@@ -11,6 +11,15 @@ dependencies = [
11
11
  "memchr",
12
12
  ]
13
13
 
14
+ [[package]]
15
+ name = "aho-corasick"
16
+ version = "1.0.5"
17
+ source = "registry+https://github.com/rust-lang/crates.io-index"
18
+ checksum = "0c378d78423fdad8089616f827526ee33c19f2fddbd5de1629152c9593ba4783"
19
+ dependencies = [
20
+ "memchr",
21
+ ]
22
+
14
23
  [[package]]
15
24
  name = "autocfg"
16
25
  version = "1.1.0"
@@ -25,9 +34,9 @@ checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
25
34
 
26
35
  [[package]]
27
36
  name = "bindgen"
28
- version = "0.60.1"
37
+ version = "0.62.0"
29
38
  source = "registry+https://github.com/rust-lang/crates.io-index"
30
- checksum = "062dddbc1ba4aca46de6338e2bf87771414c335f7b2f2036e8f3e9befebf88e6"
39
+ checksum = "c6720a8b7b2d39dd533285ed438d458f65b31b5c257e6ac7bb3d7e82844dd722"
31
40
  dependencies = [
32
41
  "bitflags",
33
42
  "cexpr",
@@ -40,6 +49,7 @@ dependencies = [
40
49
  "regex",
41
50
  "rustc-hash",
42
51
  "shlex",
52
+ "syn 1.0.109",
43
53
  ]
44
54
 
45
55
  [[package]]
@@ -352,31 +362,32 @@ checksum = "58093314a45e00c77d5c508f76e77c3396afbbc0d01506e7fae47b018bac2b1d"
352
362
 
353
363
  [[package]]
354
364
  name = "magnus"
355
- version = "0.5.3"
365
+ version = "0.6.0"
356
366
  source = "registry+https://github.com/rust-lang/crates.io-index"
357
- checksum = "c8dc14463c2552e753ef562961f486ca76f17a857c121db40e9f3ade3f35ab81"
367
+ checksum = "68e9585bfe236e88e6b10b6d8eb5349bd0e0009f3f9dff8d2e99a82601b33743"
358
368
  dependencies = [
359
369
  "magnus-macros",
360
370
  "rb-sys",
361
371
  "rb-sys-env",
372
+ "seq-macro",
362
373
  ]
363
374
 
364
375
  [[package]]
365
376
  name = "magnus-macros"
366
- version = "0.4.1"
377
+ version = "0.6.0"
367
378
  source = "registry+https://github.com/rust-lang/crates.io-index"
368
- checksum = "6cc17af1d45442c011aa579d727ec6cff8a69aea8a6bbad26736e7112d749bfb"
379
+ checksum = "5968c820e2960565f647819f5928a42d6e874551cab9d88d75e3e0660d7f71e3"
369
380
  dependencies = [
370
381
  "proc-macro2",
371
382
  "quote",
372
- "syn 1.0.109",
383
+ "syn 2.0.13",
373
384
  ]
374
385
 
375
386
  [[package]]
376
387
  name = "memchr"
377
- version = "2.5.0"
388
+ version = "2.6.3"
378
389
  source = "registry+https://github.com/rust-lang/crates.io-index"
379
- checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
390
+ checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c"
380
391
 
381
392
  [[package]]
382
393
  name = "memoffset"
@@ -575,18 +586,18 @@ dependencies = [
575
586
 
576
587
  [[package]]
577
588
  name = "rb-sys"
578
- version = "0.9.71"
589
+ version = "0.9.79"
579
590
  source = "registry+https://github.com/rust-lang/crates.io-index"
580
- checksum = "156bfedced1e236600bcaad538477097ff2ed5c6b474e411d15b791e1d24c0f1"
591
+ checksum = "939fb78db3e4f26665c1d4c7b91ca66d3578335a19aba552d4a6445811d07072"
581
592
  dependencies = [
582
593
  "rb-sys-build",
583
594
  ]
584
595
 
585
596
  [[package]]
586
597
  name = "rb-sys-build"
587
- version = "0.9.71"
598
+ version = "0.9.79"
588
599
  source = "registry+https://github.com/rust-lang/crates.io-index"
589
- checksum = "5cb2e4a32cbc290b543a74567072ad24b708aff7bb5dde5a68d5690379cd7938"
600
+ checksum = "335a95eb0420d52fa94ef12019df3c2c250c6b19cbb3c60bd05cb7e9c362072c"
590
601
  dependencies = [
591
602
  "bindgen",
592
603
  "lazy_static",
@@ -605,20 +616,32 @@ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
605
616
 
606
617
  [[package]]
607
618
  name = "regex"
608
- version = "1.7.3"
619
+ version = "1.9.5"
609
620
  source = "registry+https://github.com/rust-lang/crates.io-index"
610
- checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d"
621
+ checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47"
611
622
  dependencies = [
612
- "aho-corasick",
623
+ "aho-corasick 1.0.5",
624
+ "memchr",
625
+ "regex-automata",
626
+ "regex-syntax",
627
+ ]
628
+
629
+ [[package]]
630
+ name = "regex-automata"
631
+ version = "0.3.8"
632
+ source = "registry+https://github.com/rust-lang/crates.io-index"
633
+ checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795"
634
+ dependencies = [
635
+ "aho-corasick 1.0.5",
613
636
  "memchr",
614
637
  "regex-syntax",
615
638
  ]
616
639
 
617
640
  [[package]]
618
641
  name = "regex-syntax"
619
- version = "0.6.29"
642
+ version = "0.7.5"
620
643
  source = "registry+https://github.com/rust-lang/crates.io-index"
621
- checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
644
+ checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
622
645
 
623
646
  [[package]]
624
647
  name = "rustc-hash"
@@ -638,6 +661,12 @@ version = "1.1.0"
638
661
  source = "registry+https://github.com/rust-lang/crates.io-index"
639
662
  checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
640
663
 
664
+ [[package]]
665
+ name = "seq-macro"
666
+ version = "0.3.5"
667
+ source = "registry+https://github.com/rust-lang/crates.io-index"
668
+ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
669
+
641
670
  [[package]]
642
671
  name = "serde"
643
672
  version = "1.0.159"
@@ -749,21 +778,21 @@ dependencies = [
749
778
 
750
779
  [[package]]
751
780
  name = "tokenizers"
752
- version = "0.3.3"
781
+ version = "0.4.0"
753
782
  dependencies = [
754
783
  "magnus",
755
784
  "onig",
756
785
  "serde",
757
- "tokenizers 0.13.3",
786
+ "tokenizers 0.14.0",
758
787
  ]
759
788
 
760
789
  [[package]]
761
790
  name = "tokenizers"
762
- version = "0.13.3"
791
+ version = "0.14.0"
763
792
  source = "registry+https://github.com/rust-lang/crates.io-index"
764
- checksum = "5cf49017523bf0bc01c9966f172c5f120bbb7b96cccd1708772dd42e767fb9f5"
793
+ checksum = "12b515a66453a4d68f03398054f7204fd0dde6b93d3f20ea90b08025ab49b499"
765
794
  dependencies = [
766
- "aho-corasick",
795
+ "aho-corasick 0.7.20",
767
796
  "derive_builder",
768
797
  "esaxx-rs",
769
798
  "getrandom",
@@ -6,6 +6,10 @@ aho-corasick v0.7.20
6
6
  https://github.com/BurntSushi/aho-corasick
7
7
  Unlicense OR MIT
8
8
 
9
+ aho-corasick v1.0.5
10
+ https://github.com/BurntSushi/aho-corasick
11
+ Unlicense OR MIT
12
+
9
13
  autocfg v1.1.0
10
14
  https://github.com/cuviper/autocfg
11
15
  Apache-2.0 OR MIT
@@ -14,7 +18,7 @@ base64 v0.13.1
14
18
  https://github.com/marshallpierce/rust-base64
15
19
  MIT/Apache-2.0
16
20
 
17
- bindgen v0.60.1
21
+ bindgen v0.62.0
18
22
  https://rust-lang.github.io/rust-bindgen/
19
23
  BSD-3-Clause
20
24
 
@@ -150,17 +154,17 @@ macro_rules_attribute-proc_macro v0.1.3
150
154
  https://github.com/danielhenrymantilla/macro_rules_attribute-rs
151
155
  MIT
152
156
 
153
- magnus v0.5.3
157
+ magnus v0.6.0
154
158
  https://github.com/matsadler/magnus
155
159
  MIT
156
160
 
157
- magnus-macros v0.4.1
161
+ magnus-macros v0.6.0
158
162
  https://github.com/matsadler/magnus
159
163
  MIT
160
164
 
161
- memchr v2.5.0
165
+ memchr v2.6.3
162
166
  https://github.com/BurntSushi/memchr
163
- Unlicense/MIT
167
+ Unlicense OR MIT
164
168
 
165
169
  memoffset v0.8.0
166
170
  https://github.com/Gilnaa/memoffset
@@ -250,11 +254,11 @@ rayon-core v1.11.0
250
254
  https://github.com/rayon-rs/rayon
251
255
  MIT OR Apache-2.0
252
256
 
253
- rb-sys v0.9.71
257
+ rb-sys v0.9.79
254
258
  https://github.com/oxidize-rb/rb-sys
255
259
  MIT OR Apache-2.0
256
260
 
257
- rb-sys-build v0.9.71
261
+ rb-sys-build v0.9.79
258
262
  https://github.com/oxidize-rb/rb-sys
259
263
  MIT OR Apache-2.0
260
264
 
@@ -262,12 +266,16 @@ rb-sys-env v0.1.2
262
266
  https://github.com/oxidize-rb/rb-sys
263
267
  MIT OR Apache-2.0
264
268
 
265
- regex v1.7.3
269
+ regex v1.9.5
266
270
  https://github.com/rust-lang/regex
267
271
  MIT OR Apache-2.0
268
272
 
269
- regex-syntax v0.6.29
270
- https://github.com/rust-lang/regex
273
+ regex-automata v0.3.8
274
+ https://github.com/rust-lang/regex/tree/master/regex-automata
275
+ MIT OR Apache-2.0
276
+
277
+ regex-syntax v0.7.5
278
+ https://github.com/rust-lang/regex/tree/master/regex-syntax
271
279
  MIT OR Apache-2.0
272
280
 
273
281
  rustc-hash v1.1.0
@@ -282,6 +290,10 @@ scopeguard v1.1.0
282
290
  https://github.com/bluss/scopeguard
283
291
  MIT/Apache-2.0
284
292
 
293
+ seq-macro v0.3.5
294
+ https://github.com/dtolnay/seq-macro
295
+ MIT OR Apache-2.0
296
+
285
297
  serde v1.0.159
286
298
  https://serde.rs
287
299
  MIT OR Apache-2.0
@@ -330,7 +342,7 @@ thiserror-impl v1.0.40
330
342
  https://github.com/dtolnay/thiserror
331
343
  MIT OR Apache-2.0
332
344
 
333
- tokenizers v0.13.3
345
+ tokenizers v0.14.0
334
346
  https://github.com/huggingface/tokenizers
335
347
  Apache-2.0
336
348
 
@@ -355,7 +367,70 @@ https://github.com/swgillespie/unicode-categories
355
367
  MIT OR Apache-2.0
356
368
 
357
369
  ================================================================================
358
- aho-corasick COPYING
370
+ aho-corasick v0.7.20 COPYING
371
+ ================================================================================
372
+
373
+ This project is dual-licensed under the Unlicense and MIT licenses.
374
+
375
+ You may use this code under the terms of either license.
376
+
377
+ ================================================================================
378
+ aho-corasick v0.7.20 LICENSE-MIT
379
+ ================================================================================
380
+
381
+ The MIT License (MIT)
382
+
383
+ Copyright (c) 2015 Andrew Gallant
384
+
385
+ Permission is hereby granted, free of charge, to any person obtaining a copy
386
+ of this software and associated documentation files (the "Software"), to deal
387
+ in the Software without restriction, including without limitation the rights
388
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
389
+ copies of the Software, and to permit persons to whom the Software is
390
+ furnished to do so, subject to the following conditions:
391
+
392
+ The above copyright notice and this permission notice shall be included in
393
+ all copies or substantial portions of the Software.
394
+
395
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
396
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
397
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
398
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
399
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
400
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
401
+ THE SOFTWARE.
402
+
403
+ ================================================================================
404
+ aho-corasick v0.7.20 UNLICENSE
405
+ ================================================================================
406
+
407
+ This is free and unencumbered software released into the public domain.
408
+
409
+ Anyone is free to copy, modify, publish, use, compile, sell, or
410
+ distribute this software, either in source code form or as a compiled
411
+ binary, for any purpose, commercial or non-commercial, and by any
412
+ means.
413
+
414
+ In jurisdictions that recognize copyright laws, the author or authors
415
+ of this software dedicate any and all copyright interest in the
416
+ software to the public domain. We make this dedication for the benefit
417
+ of the public at large and to the detriment of our heirs and
418
+ successors. We intend this dedication to be an overt act of
419
+ relinquishment in perpetuity of all present and future rights to this
420
+ software under copyright law.
421
+
422
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
423
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
424
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
425
+ IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
426
+ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
427
+ ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
428
+ OTHER DEALINGS IN THE SOFTWARE.
429
+
430
+ For more information, please refer to <http://unlicense.org/>
431
+
432
+ ================================================================================
433
+ aho-corasick v1.0.5 COPYING
359
434
  ================================================================================
360
435
 
361
436
  This project is dual-licensed under the Unlicense and MIT licenses.
@@ -363,7 +438,7 @@ This project is dual-licensed under the Unlicense and MIT licenses.
363
438
  You may use this code under the terms of either license.
364
439
 
365
440
  ================================================================================
366
- aho-corasick LICENSE-MIT
441
+ aho-corasick v1.0.5 LICENSE-MIT
367
442
  ================================================================================
368
443
 
369
444
  The MIT License (MIT)
@@ -389,7 +464,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
389
464
  THE SOFTWARE.
390
465
 
391
466
  ================================================================================
392
- aho-corasick UNLICENSE
467
+ aho-corasick v1.0.5 UNLICENSE
393
468
  ================================================================================
394
469
 
395
470
  This is free and unencumbered software released into the public domain.
@@ -7762,7 +7837,7 @@ magnus LICENSE
7762
7837
 
7763
7838
  MIT License
7764
7839
 
7765
- Copyright (c) 2022, 2021 Matthew Sadler
7840
+ Copyright (c) 2023, 2022, 2021 Matthew Sadler
7766
7841
 
7767
7842
  Permission is hereby granted, free of charge, to any person obtaining a copy
7768
7843
  of this software and associated documentation files (the "Software"), to deal
@@ -7788,7 +7863,7 @@ magnus-macros LICENSE
7788
7863
 
7789
7864
  MIT License
7790
7865
 
7791
- Copyright (c) 2022, 2021 Matthew Sadler
7866
+ Copyright (c) 2023, 2022, 2021 Matthew Sadler
7792
7867
 
7793
7868
  Permission is hereby granted, free of charge, to any person obtaining a copy
7794
7869
  of this software and associated documentation files (the "Software"), to deal
@@ -12820,28 +12895,240 @@ IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
12820
12895
  DEALINGS IN THE SOFTWARE.
12821
12896
 
12822
12897
  ================================================================================
12823
- regex src/testdata/LICENSE
12898
+ regex-automata LICENSE-APACHE
12824
12899
  ================================================================================
12825
12900
 
12826
- The following license covers testregex.c and all associated test data.
12901
+ Apache License
12902
+ Version 2.0, January 2004
12903
+ http://www.apache.org/licenses/
12827
12904
 
12828
- Permission is hereby granted, free of charge, to any person obtaining a
12829
- copy of THIS SOFTWARE FILE (the "Software"), to deal in the Software
12830
- without restriction, including without limitation the rights to use,
12831
- copy, modify, merge, publish, distribute, and/or sell copies of the
12832
- Software, and to permit persons to whom the Software is furnished to do
12833
- so, subject to the following disclaimer:
12834
-
12835
- THIS SOFTWARE IS PROVIDED BY AT&T ``AS IS'' AND ANY EXPRESS OR IMPLIED
12836
- WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
12837
- MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
12838
- IN NO EVENT SHALL AT&T BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
12839
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
12840
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
12841
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
12842
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
12843
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
12844
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
12905
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
12906
+
12907
+ 1. Definitions.
12908
+
12909
+ "License" shall mean the terms and conditions for use, reproduction,
12910
+ and distribution as defined by Sections 1 through 9 of this document.
12911
+
12912
+ "Licensor" shall mean the copyright owner or entity authorized by
12913
+ the copyright owner that is granting the License.
12914
+
12915
+ "Legal Entity" shall mean the union of the acting entity and all
12916
+ other entities that control, are controlled by, or are under common
12917
+ control with that entity. For the purposes of this definition,
12918
+ "control" means (i) the power, direct or indirect, to cause the
12919
+ direction or management of such entity, whether by contract or
12920
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
12921
+ outstanding shares, or (iii) beneficial ownership of such entity.
12922
+
12923
+ "You" (or "Your") shall mean an individual or Legal Entity
12924
+ exercising permissions granted by this License.
12925
+
12926
+ "Source" form shall mean the preferred form for making modifications,
12927
+ including but not limited to software source code, documentation
12928
+ source, and configuration files.
12929
+
12930
+ "Object" form shall mean any form resulting from mechanical
12931
+ transformation or translation of a Source form, including but
12932
+ not limited to compiled object code, generated documentation,
12933
+ and conversions to other media types.
12934
+
12935
+ "Work" shall mean the work of authorship, whether in Source or
12936
+ Object form, made available under the License, as indicated by a
12937
+ copyright notice that is included in or attached to the work
12938
+ (an example is provided in the Appendix below).
12939
+
12940
+ "Derivative Works" shall mean any work, whether in Source or Object
12941
+ form, that is based on (or derived from) the Work and for which the
12942
+ editorial revisions, annotations, elaborations, or other modifications
12943
+ represent, as a whole, an original work of authorship. For the purposes
12944
+ of this License, Derivative Works shall not include works that remain
12945
+ separable from, or merely link (or bind by name) to the interfaces of,
12946
+ the Work and Derivative Works thereof.
12947
+
12948
+ "Contribution" shall mean any work of authorship, including
12949
+ the original version of the Work and any modifications or additions
12950
+ to that Work or Derivative Works thereof, that is intentionally
12951
+ submitted to Licensor for inclusion in the Work by the copyright owner
12952
+ or by an individual or Legal Entity authorized to submit on behalf of
12953
+ the copyright owner. For the purposes of this definition, "submitted"
12954
+ means any form of electronic, verbal, or written communication sent
12955
+ to the Licensor or its representatives, including but not limited to
12956
+ communication on electronic mailing lists, source code control systems,
12957
+ and issue tracking systems that are managed by, or on behalf of, the
12958
+ Licensor for the purpose of discussing and improving the Work, but
12959
+ excluding communication that is conspicuously marked or otherwise
12960
+ designated in writing by the copyright owner as "Not a Contribution."
12961
+
12962
+ "Contributor" shall mean Licensor and any individual or Legal Entity
12963
+ on behalf of whom a Contribution has been received by Licensor and
12964
+ subsequently incorporated within the Work.
12965
+
12966
+ 2. Grant of Copyright License. Subject to the terms and conditions of
12967
+ this License, each Contributor hereby grants to You a perpetual,
12968
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
12969
+ copyright license to reproduce, prepare Derivative Works of,
12970
+ publicly display, publicly perform, sublicense, and distribute the
12971
+ Work and such Derivative Works in Source or Object form.
12972
+
12973
+ 3. Grant of Patent License. Subject to the terms and conditions of
12974
+ this License, each Contributor hereby grants to You a perpetual,
12975
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
12976
+ (except as stated in this section) patent license to make, have made,
12977
+ use, offer to sell, sell, import, and otherwise transfer the Work,
12978
+ where such license applies only to those patent claims licensable
12979
+ by such Contributor that are necessarily infringed by their
12980
+ Contribution(s) alone or by combination of their Contribution(s)
12981
+ with the Work to which such Contribution(s) was submitted. If You
12982
+ institute patent litigation against any entity (including a
12983
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
12984
+ or a Contribution incorporated within the Work constitutes direct
12985
+ or contributory patent infringement, then any patent licenses
12986
+ granted to You under this License for that Work shall terminate
12987
+ as of the date such litigation is filed.
12988
+
12989
+ 4. Redistribution. You may reproduce and distribute copies of the
12990
+ Work or Derivative Works thereof in any medium, with or without
12991
+ modifications, and in Source or Object form, provided that You
12992
+ meet the following conditions:
12993
+
12994
+ (a) You must give any other recipients of the Work or
12995
+ Derivative Works a copy of this License; and
12996
+
12997
+ (b) You must cause any modified files to carry prominent notices
12998
+ stating that You changed the files; and
12999
+
13000
+ (c) You must retain, in the Source form of any Derivative Works
13001
+ that You distribute, all copyright, patent, trademark, and
13002
+ attribution notices from the Source form of the Work,
13003
+ excluding those notices that do not pertain to any part of
13004
+ the Derivative Works; and
13005
+
13006
+ (d) If the Work includes a "NOTICE" text file as part of its
13007
+ distribution, then any Derivative Works that You distribute must
13008
+ include a readable copy of the attribution notices contained
13009
+ within such NOTICE file, excluding those notices that do not
13010
+ pertain to any part of the Derivative Works, in at least one
13011
+ of the following places: within a NOTICE text file distributed
13012
+ as part of the Derivative Works; within the Source form or
13013
+ documentation, if provided along with the Derivative Works; or,
13014
+ within a display generated by the Derivative Works, if and
13015
+ wherever such third-party notices normally appear. The contents
13016
+ of the NOTICE file are for informational purposes only and
13017
+ do not modify the License. You may add Your own attribution
13018
+ notices within Derivative Works that You distribute, alongside
13019
+ or as an addendum to the NOTICE text from the Work, provided
13020
+ that such additional attribution notices cannot be construed
13021
+ as modifying the License.
13022
+
13023
+ You may add Your own copyright statement to Your modifications and
13024
+ may provide additional or different license terms and conditions
13025
+ for use, reproduction, or distribution of Your modifications, or
13026
+ for any such Derivative Works as a whole, provided Your use,
13027
+ reproduction, and distribution of the Work otherwise complies with
13028
+ the conditions stated in this License.
13029
+
13030
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
13031
+ any Contribution intentionally submitted for inclusion in the Work
13032
+ by You to the Licensor shall be under the terms and conditions of
13033
+ this License, without any additional terms or conditions.
13034
+ Notwithstanding the above, nothing herein shall supersede or modify
13035
+ the terms of any separate license agreement you may have executed
13036
+ with Licensor regarding such Contributions.
13037
+
13038
+ 6. Trademarks. This License does not grant permission to use the trade
13039
+ names, trademarks, service marks, or product names of the Licensor,
13040
+ except as required for reasonable and customary use in describing the
13041
+ origin of the Work and reproducing the content of the NOTICE file.
13042
+
13043
+ 7. Disclaimer of Warranty. Unless required by applicable law or
13044
+ agreed to in writing, Licensor provides the Work (and each
13045
+ Contributor provides its Contributions) on an "AS IS" BASIS,
13046
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13047
+ implied, including, without limitation, any warranties or conditions
13048
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
13049
+ PARTICULAR PURPOSE. You are solely responsible for determining the
13050
+ appropriateness of using or redistributing the Work and assume any
13051
+ risks associated with Your exercise of permissions under this License.
13052
+
13053
+ 8. Limitation of Liability. In no event and under no legal theory,
13054
+ whether in tort (including negligence), contract, or otherwise,
13055
+ unless required by applicable law (such as deliberate and grossly
13056
+ negligent acts) or agreed to in writing, shall any Contributor be
13057
+ liable to You for damages, including any direct, indirect, special,
13058
+ incidental, or consequential damages of any character arising as a
13059
+ result of this License or out of the use or inability to use the
13060
+ Work (including but not limited to damages for loss of goodwill,
13061
+ work stoppage, computer failure or malfunction, or any and all
13062
+ other commercial damages or losses), even if such Contributor
13063
+ has been advised of the possibility of such damages.
13064
+
13065
+ 9. Accepting Warranty or Additional Liability. While redistributing
13066
+ the Work or Derivative Works thereof, You may choose to offer,
13067
+ and charge a fee for, acceptance of support, warranty, indemnity,
13068
+ or other liability obligations and/or rights consistent with this
13069
+ License. However, in accepting such obligations, You may act only
13070
+ on Your own behalf and on Your sole responsibility, not on behalf
13071
+ of any other Contributor, and only if You agree to indemnify,
13072
+ defend, and hold each Contributor harmless for any liability
13073
+ incurred by, or claims asserted against, such Contributor by reason
13074
+ of your accepting any such warranty or additional liability.
13075
+
13076
+ END OF TERMS AND CONDITIONS
13077
+
13078
+ APPENDIX: How to apply the Apache License to your work.
13079
+
13080
+ To apply the Apache License to your work, attach the following
13081
+ boilerplate notice, with the fields enclosed by brackets "[]"
13082
+ replaced with your own identifying information. (Don't include
13083
+ the brackets!) The text should be enclosed in the appropriate
13084
+ comment syntax for the file format. We also recommend that a
13085
+ file or class name and description of purpose be included on the
13086
+ same "printed page" as the copyright notice for easier
13087
+ identification within third-party archives.
13088
+
13089
+ Copyright [yyyy] [name of copyright owner]
13090
+
13091
+ Licensed under the Apache License, Version 2.0 (the "License");
13092
+ you may not use this file except in compliance with the License.
13093
+ You may obtain a copy of the License at
13094
+
13095
+ http://www.apache.org/licenses/LICENSE-2.0
13096
+
13097
+ Unless required by applicable law or agreed to in writing, software
13098
+ distributed under the License is distributed on an "AS IS" BASIS,
13099
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13100
+ See the License for the specific language governing permissions and
13101
+ limitations under the License.
13102
+
13103
+ ================================================================================
13104
+ regex-automata LICENSE-MIT
13105
+ ================================================================================
13106
+
13107
+ Copyright (c) 2014 The Rust Project Developers
13108
+
13109
+ Permission is hereby granted, free of charge, to any
13110
+ person obtaining a copy of this software and associated
13111
+ documentation files (the "Software"), to deal in the
13112
+ Software without restriction, including without
13113
+ limitation the rights to use, copy, modify, merge,
13114
+ publish, distribute, sublicense, and/or sell copies of
13115
+ the Software, and to permit persons to whom the Software
13116
+ is furnished to do so, subject to the following
13117
+ conditions:
13118
+
13119
+ The above copyright notice and this permission notice
13120
+ shall be included in all copies or substantial portions
13121
+ of the Software.
13122
+
13123
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
13124
+ ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
13125
+ TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
13126
+ PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
13127
+ SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
13128
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
13129
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
13130
+ IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
13131
+ DEALINGS IN THE SOFTWARE.
12845
13132
 
12846
13133
  ================================================================================
12847
13134
  regex-syntax LICENSE-APACHE
@@ -13820,6 +14107,215 @@ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
13820
14107
  IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
13821
14108
  DEALINGS IN THE SOFTWARE.
13822
14109
 
14110
+ ================================================================================
14111
+ seq-macro LICENSE-APACHE
14112
+ ================================================================================
14113
+
14114
+ Apache License
14115
+ Version 2.0, January 2004
14116
+ http://www.apache.org/licenses/
14117
+
14118
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
14119
+
14120
+ 1. Definitions.
14121
+
14122
+ "License" shall mean the terms and conditions for use, reproduction,
14123
+ and distribution as defined by Sections 1 through 9 of this document.
14124
+
14125
+ "Licensor" shall mean the copyright owner or entity authorized by
14126
+ the copyright owner that is granting the License.
14127
+
14128
+ "Legal Entity" shall mean the union of the acting entity and all
14129
+ other entities that control, are controlled by, or are under common
14130
+ control with that entity. For the purposes of this definition,
14131
+ "control" means (i) the power, direct or indirect, to cause the
14132
+ direction or management of such entity, whether by contract or
14133
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
14134
+ outstanding shares, or (iii) beneficial ownership of such entity.
14135
+
14136
+ "You" (or "Your") shall mean an individual or Legal Entity
14137
+ exercising permissions granted by this License.
14138
+
14139
+ "Source" form shall mean the preferred form for making modifications,
14140
+ including but not limited to software source code, documentation
14141
+ source, and configuration files.
14142
+
14143
+ "Object" form shall mean any form resulting from mechanical
14144
+ transformation or translation of a Source form, including but
14145
+ not limited to compiled object code, generated documentation,
14146
+ and conversions to other media types.
14147
+
14148
+ "Work" shall mean the work of authorship, whether in Source or
14149
+ Object form, made available under the License, as indicated by a
14150
+ copyright notice that is included in or attached to the work
14151
+ (an example is provided in the Appendix below).
14152
+
14153
+ "Derivative Works" shall mean any work, whether in Source or Object
14154
+ form, that is based on (or derived from) the Work and for which the
14155
+ editorial revisions, annotations, elaborations, or other modifications
14156
+ represent, as a whole, an original work of authorship. For the purposes
14157
+ of this License, Derivative Works shall not include works that remain
14158
+ separable from, or merely link (or bind by name) to the interfaces of,
14159
+ the Work and Derivative Works thereof.
14160
+
14161
+ "Contribution" shall mean any work of authorship, including
14162
+ the original version of the Work and any modifications or additions
14163
+ to that Work or Derivative Works thereof, that is intentionally
14164
+ submitted to Licensor for inclusion in the Work by the copyright owner
14165
+ or by an individual or Legal Entity authorized to submit on behalf of
14166
+ the copyright owner. For the purposes of this definition, "submitted"
14167
+ means any form of electronic, verbal, or written communication sent
14168
+ to the Licensor or its representatives, including but not limited to
14169
+ communication on electronic mailing lists, source code control systems,
14170
+ and issue tracking systems that are managed by, or on behalf of, the
14171
+ Licensor for the purpose of discussing and improving the Work, but
14172
+ excluding communication that is conspicuously marked or otherwise
14173
+ designated in writing by the copyright owner as "Not a Contribution."
14174
+
14175
+ "Contributor" shall mean Licensor and any individual or Legal Entity
14176
+ on behalf of whom a Contribution has been received by Licensor and
14177
+ subsequently incorporated within the Work.
14178
+
14179
+ 2. Grant of Copyright License. Subject to the terms and conditions of
14180
+ this License, each Contributor hereby grants to You a perpetual,
14181
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
14182
+ copyright license to reproduce, prepare Derivative Works of,
14183
+ publicly display, publicly perform, sublicense, and distribute the
14184
+ Work and such Derivative Works in Source or Object form.
14185
+
14186
+ 3. Grant of Patent License. Subject to the terms and conditions of
14187
+ this License, each Contributor hereby grants to You a perpetual,
14188
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
14189
+ (except as stated in this section) patent license to make, have made,
14190
+ use, offer to sell, sell, import, and otherwise transfer the Work,
14191
+ where such license applies only to those patent claims licensable
14192
+ by such Contributor that are necessarily infringed by their
14193
+ Contribution(s) alone or by combination of their Contribution(s)
14194
+ with the Work to which such Contribution(s) was submitted. If You
14195
+ institute patent litigation against any entity (including a
14196
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
14197
+ or a Contribution incorporated within the Work constitutes direct
14198
+ or contributory patent infringement, then any patent licenses
14199
+ granted to You under this License for that Work shall terminate
14200
+ as of the date such litigation is filed.
14201
+
14202
+ 4. Redistribution. You may reproduce and distribute copies of the
14203
+ Work or Derivative Works thereof in any medium, with or without
14204
+ modifications, and in Source or Object form, provided that You
14205
+ meet the following conditions:
14206
+
14207
+ (a) You must give any other recipients of the Work or
14208
+ Derivative Works a copy of this License; and
14209
+
14210
+ (b) You must cause any modified files to carry prominent notices
14211
+ stating that You changed the files; and
14212
+
14213
+ (c) You must retain, in the Source form of any Derivative Works
14214
+ that You distribute, all copyright, patent, trademark, and
14215
+ attribution notices from the Source form of the Work,
14216
+ excluding those notices that do not pertain to any part of
14217
+ the Derivative Works; and
14218
+
14219
+ (d) If the Work includes a "NOTICE" text file as part of its
14220
+ distribution, then any Derivative Works that You distribute must
14221
+ include a readable copy of the attribution notices contained
14222
+ within such NOTICE file, excluding those notices that do not
14223
+ pertain to any part of the Derivative Works, in at least one
14224
+ of the following places: within a NOTICE text file distributed
14225
+ as part of the Derivative Works; within the Source form or
14226
+ documentation, if provided along with the Derivative Works; or,
14227
+ within a display generated by the Derivative Works, if and
14228
+ wherever such third-party notices normally appear. The contents
14229
+ of the NOTICE file are for informational purposes only and
14230
+ do not modify the License. You may add Your own attribution
14231
+ notices within Derivative Works that You distribute, alongside
14232
+ or as an addendum to the NOTICE text from the Work, provided
14233
+ that such additional attribution notices cannot be construed
14234
+ as modifying the License.
14235
+
14236
+ You may add Your own copyright statement to Your modifications and
14237
+ may provide additional or different license terms and conditions
14238
+ for use, reproduction, or distribution of Your modifications, or
14239
+ for any such Derivative Works as a whole, provided Your use,
14240
+ reproduction, and distribution of the Work otherwise complies with
14241
+ the conditions stated in this License.
14242
+
14243
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
14244
+ any Contribution intentionally submitted for inclusion in the Work
14245
+ by You to the Licensor shall be under the terms and conditions of
14246
+ this License, without any additional terms or conditions.
14247
+ Notwithstanding the above, nothing herein shall supersede or modify
14248
+ the terms of any separate license agreement you may have executed
14249
+ with Licensor regarding such Contributions.
14250
+
14251
+ 6. Trademarks. This License does not grant permission to use the trade
14252
+ names, trademarks, service marks, or product names of the Licensor,
14253
+ except as required for reasonable and customary use in describing the
14254
+ origin of the Work and reproducing the content of the NOTICE file.
14255
+
14256
+ 7. Disclaimer of Warranty. Unless required by applicable law or
14257
+ agreed to in writing, Licensor provides the Work (and each
14258
+ Contributor provides its Contributions) on an "AS IS" BASIS,
14259
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
14260
+ implied, including, without limitation, any warranties or conditions
14261
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
14262
+ PARTICULAR PURPOSE. You are solely responsible for determining the
14263
+ appropriateness of using or redistributing the Work and assume any
14264
+ risks associated with Your exercise of permissions under this License.
14265
+
14266
+ 8. Limitation of Liability. In no event and under no legal theory,
14267
+ whether in tort (including negligence), contract, or otherwise,
14268
+ unless required by applicable law (such as deliberate and grossly
14269
+ negligent acts) or agreed to in writing, shall any Contributor be
14270
+ liable to You for damages, including any direct, indirect, special,
14271
+ incidental, or consequential damages of any character arising as a
14272
+ result of this License or out of the use or inability to use the
14273
+ Work (including but not limited to damages for loss of goodwill,
14274
+ work stoppage, computer failure or malfunction, or any and all
14275
+ other commercial damages or losses), even if such Contributor
14276
+ has been advised of the possibility of such damages.
14277
+
14278
+ 9. Accepting Warranty or Additional Liability. While redistributing
14279
+ the Work or Derivative Works thereof, You may choose to offer,
14280
+ and charge a fee for, acceptance of support, warranty, indemnity,
14281
+ or other liability obligations and/or rights consistent with this
14282
+ License. However, in accepting such obligations, You may act only
14283
+ on Your own behalf and on Your sole responsibility, not on behalf
14284
+ of any other Contributor, and only if You agree to indemnify,
14285
+ defend, and hold each Contributor harmless for any liability
14286
+ incurred by, or claims asserted against, such Contributor by reason
14287
+ of your accepting any such warranty or additional liability.
14288
+
14289
+ END OF TERMS AND CONDITIONS
14290
+
14291
+ ================================================================================
14292
+ seq-macro LICENSE-MIT
14293
+ ================================================================================
14294
+
14295
+ Permission is hereby granted, free of charge, to any
14296
+ person obtaining a copy of this software and associated
14297
+ documentation files (the "Software"), to deal in the
14298
+ Software without restriction, including without
14299
+ limitation the rights to use, copy, modify, merge,
14300
+ publish, distribute, sublicense, and/or sell copies of
14301
+ the Software, and to permit persons to whom the Software
14302
+ is furnished to do so, subject to the following
14303
+ conditions:
14304
+
14305
+ The above copyright notice and this permission notice
14306
+ shall be included in all copies or substantial portions
14307
+ of the Software.
14308
+
14309
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
14310
+ ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
14311
+ TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
14312
+ PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
14313
+ SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
14314
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
14315
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
14316
+ IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
14317
+ DEALINGS IN THE SOFTWARE.
14318
+
13823
14319
  ================================================================================
13824
14320
  serde LICENSE-APACHE
13825
14321
  ================================================================================
Binary file
Binary file
Binary file
@@ -1,7 +1,7 @@
1
1
  module Tokenizers
2
2
  module FromPretrained
3
3
  # for user agent
4
- TOKENIZERS_VERSION = "0.13.3"
4
+ TOKENIZERS_VERSION = "0.14.0"
5
5
 
6
6
  # use Ruby for downloads
7
7
  # this avoids the need to vendor OpenSSL on Linux
@@ -1,8 +1,8 @@
1
1
  module Tokenizers
2
2
  module Models
3
3
  class Unigram
4
- def self.new(vocab: nil, unk_id: nil)
5
- _new(vocab, unk_id)
4
+ def self.new(vocab: nil, unk_id: nil, byte_fallback: nil)
5
+ _new(vocab, unk_id, byte_fallback)
6
6
  end
7
7
  end
8
8
  end
@@ -1,3 +1,3 @@
1
1
  module Tokenizers
2
- VERSION = "0.3.3"
2
+ VERSION = "0.4.0"
3
3
  end
data/lib/tokenizers.rb CHANGED
@@ -1,8 +1,8 @@
1
1
  # ext
2
2
  begin
3
- require_relative "tokenizers/#{RUBY_VERSION.to_f}/tokenizers"
3
+ require "tokenizers/#{RUBY_VERSION.to_f}/tokenizers"
4
4
  rescue LoadError
5
- require_relative "tokenizers/tokenizers"
5
+ require "tokenizers/tokenizers"
6
6
  end
7
7
 
8
8
  # decoders
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tokenizers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.4.0
5
5
  platform: x86_64-linux
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-04-09 00:00:00.000000000 Z
11
+ date: 2023-09-21 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org
@@ -23,7 +23,6 @@ files:
23
23
  - LICENSE.txt
24
24
  - README.md
25
25
  - lib/tokenizers.rb
26
- - lib/tokenizers/2.7/tokenizers.so
27
26
  - lib/tokenizers/3.0/tokenizers.so
28
27
  - lib/tokenizers/3.1/tokenizers.so
29
28
  - lib/tokenizers/3.2/tokenizers.so
@@ -68,7 +67,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
68
67
  requirements:
69
68
  - - ">="
70
69
  - !ruby/object:Gem::Version
71
- version: '2.7'
70
+ version: '3.0'
72
71
  - - "<"
73
72
  - !ruby/object:Gem::Version
74
73
  version: 3.3.dev
Binary file