tokenizers 0.4.0-arm64-darwin → 0.4.2-arm64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -1
- data/Cargo.lock +70 -103
- data/Cargo.toml +1 -0
- data/LICENSE-THIRD-PARTY.txt +358 -1526
- data/lib/tokenizers/3.0/tokenizers.bundle +0 -0
- data/lib/tokenizers/3.1/tokenizers.bundle +0 -0
- data/lib/tokenizers/3.2/tokenizers.bundle +0 -0
- data/lib/tokenizers/from_pretrained.rb +23 -17
- data/lib/tokenizers/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fae6fe42900355b029ab2d895401ee41b2edec183f7c89996597b61a6b453a38
|
4
|
+
data.tar.gz: ed48f8cfb614ffd228a4978e01e216a1712f737997f995ebe7cdf3c1ae420e03
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dbdb6a09ad3ea6d4b5c9199f6dfaaa42534da5eed691197f196e6005757fd76796c1d387c57dc69a82b1407cd5f11c0ca1170454c3e2e3029d8d67a4a93d8ef7
|
7
|
+
data.tar.gz: 1d5bffa2e6483268bb2ea395b3a5206c96dd560ed31fe42ad19018ff6cf3113b2dafb41f6f7c4032c2688a2752876118fa94e1da4ecf6d07000a3e183385605b
|
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,13 @@
|
|
1
|
-
## 0.4.
|
1
|
+
## 0.4.2 (2023-11-16)
|
2
|
+
|
3
|
+
- Updated Tokenizers to 0.15.0
|
4
|
+
- Fixed issue with download caching
|
5
|
+
|
6
|
+
## 0.4.1 (2023-10-05)
|
7
|
+
|
8
|
+
- Fixed error loading gem
|
9
|
+
|
10
|
+
## 0.4.0 (2023-09-20)
|
2
11
|
|
3
12
|
- Updated Tokenizers to 0.14.0
|
4
13
|
- Dropped support for Ruby < 3
|
data/Cargo.lock
CHANGED
@@ -4,18 +4,9 @@ version = 3
|
|
4
4
|
|
5
5
|
[[package]]
|
6
6
|
name = "aho-corasick"
|
7
|
-
version = "
|
7
|
+
version = "1.1.1"
|
8
8
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
9
|
-
checksum = "
|
10
|
-
dependencies = [
|
11
|
-
"memchr",
|
12
|
-
]
|
13
|
-
|
14
|
-
[[package]]
|
15
|
-
name = "aho-corasick"
|
16
|
-
version = "1.0.5"
|
17
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
18
|
-
checksum = "0c378d78423fdad8089616f827526ee33c19f2fddbd5de1629152c9593ba4783"
|
9
|
+
checksum = "ea5d730647d4fadd988536d06fecce94b7b4f2a7efdae548f1cf4b63205518ab"
|
19
10
|
dependencies = [
|
20
11
|
"memchr",
|
21
12
|
]
|
@@ -103,16 +94,6 @@ dependencies = [
|
|
103
94
|
"windows-sys",
|
104
95
|
]
|
105
96
|
|
106
|
-
[[package]]
|
107
|
-
name = "crossbeam-channel"
|
108
|
-
version = "0.5.8"
|
109
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
110
|
-
checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200"
|
111
|
-
dependencies = [
|
112
|
-
"cfg-if",
|
113
|
-
"crossbeam-utils",
|
114
|
-
]
|
115
|
-
|
116
97
|
[[package]]
|
117
98
|
name = "crossbeam-deque"
|
118
99
|
version = "0.8.3"
|
@@ -226,9 +207,9 @@ checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
|
|
226
207
|
|
227
208
|
[[package]]
|
228
209
|
name = "esaxx-rs"
|
229
|
-
version = "0.1.
|
210
|
+
version = "0.1.10"
|
230
211
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
231
|
-
checksum = "
|
212
|
+
checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6"
|
232
213
|
dependencies = [
|
233
214
|
"cc",
|
234
215
|
]
|
@@ -241,9 +222,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
|
|
241
222
|
|
242
223
|
[[package]]
|
243
224
|
name = "getrandom"
|
244
|
-
version = "0.2.
|
225
|
+
version = "0.2.10"
|
245
226
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
246
|
-
checksum = "
|
227
|
+
checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427"
|
247
228
|
dependencies = [
|
248
229
|
"cfg-if",
|
249
230
|
"libc",
|
@@ -256,15 +237,6 @@ version = "0.3.1"
|
|
256
237
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
257
238
|
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
258
239
|
|
259
|
-
[[package]]
|
260
|
-
name = "hermit-abi"
|
261
|
-
version = "0.2.6"
|
262
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
263
|
-
checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7"
|
264
|
-
dependencies = [
|
265
|
-
"libc",
|
266
|
-
]
|
267
|
-
|
268
240
|
[[package]]
|
269
241
|
name = "ident_case"
|
270
242
|
version = "1.0.1"
|
@@ -273,30 +245,31 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
|
|
273
245
|
|
274
246
|
[[package]]
|
275
247
|
name = "indicatif"
|
276
|
-
version = "0.
|
248
|
+
version = "0.17.7"
|
277
249
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
278
|
-
checksum = "
|
250
|
+
checksum = "fb28741c9db9a713d93deb3bb9515c20788cef5815265bee4980e87bde7e0f25"
|
279
251
|
dependencies = [
|
280
252
|
"console",
|
281
|
-
"
|
253
|
+
"instant",
|
282
254
|
"number_prefix",
|
283
|
-
"
|
255
|
+
"portable-atomic",
|
256
|
+
"unicode-width",
|
284
257
|
]
|
285
258
|
|
286
259
|
[[package]]
|
287
|
-
name = "
|
288
|
-
version = "0.
|
260
|
+
name = "instant"
|
261
|
+
version = "0.1.12"
|
289
262
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
290
|
-
checksum = "
|
263
|
+
checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
|
291
264
|
dependencies = [
|
292
|
-
"
|
265
|
+
"cfg-if",
|
293
266
|
]
|
294
267
|
|
295
268
|
[[package]]
|
296
269
|
name = "itertools"
|
297
|
-
version = "0.
|
270
|
+
version = "0.11.0"
|
298
271
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
299
|
-
checksum = "
|
272
|
+
checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
|
300
273
|
dependencies = [
|
301
274
|
"either",
|
302
275
|
]
|
@@ -321,9 +294,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
|
321
294
|
|
322
295
|
[[package]]
|
323
296
|
name = "libc"
|
324
|
-
version = "0.2.
|
297
|
+
version = "0.2.149"
|
325
298
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
326
|
-
checksum = "
|
299
|
+
checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b"
|
327
300
|
|
328
301
|
[[package]]
|
329
302
|
name = "libloading"
|
@@ -346,9 +319,9 @@ dependencies = [
|
|
346
319
|
|
347
320
|
[[package]]
|
348
321
|
name = "macro_rules_attribute"
|
349
|
-
version = "0.
|
322
|
+
version = "0.2.0"
|
350
323
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
351
|
-
checksum = "
|
324
|
+
checksum = "8a82271f7bc033d84bbca59a3ce3e4159938cb08a9c3aebbe54d215131518a13"
|
352
325
|
dependencies = [
|
353
326
|
"macro_rules_attribute-proc_macro",
|
354
327
|
"paste",
|
@@ -356,9 +329,9 @@ dependencies = [
|
|
356
329
|
|
357
330
|
[[package]]
|
358
331
|
name = "macro_rules_attribute-proc_macro"
|
359
|
-
version = "0.
|
332
|
+
version = "0.2.0"
|
360
333
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
361
|
-
checksum = "
|
334
|
+
checksum = "b8dd856d451cc0da70e2ef2ce95a18e39a93b7558bedf10201ad28503f918568"
|
362
335
|
|
363
336
|
[[package]]
|
364
337
|
name = "magnus"
|
@@ -380,7 +353,7 @@ checksum = "5968c820e2960565f647819f5928a42d6e874551cab9d88d75e3e0660d7f71e3"
|
|
380
353
|
dependencies = [
|
381
354
|
"proc-macro2",
|
382
355
|
"quote",
|
383
|
-
"syn 2.0.
|
356
|
+
"syn 2.0.38",
|
384
357
|
]
|
385
358
|
|
386
359
|
[[package]]
|
@@ -406,9 +379,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
|
406
379
|
|
407
380
|
[[package]]
|
408
381
|
name = "monostate"
|
409
|
-
version = "0.1.
|
382
|
+
version = "0.1.9"
|
410
383
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
411
|
-
checksum = "
|
384
|
+
checksum = "15f370ae88093ec6b11a710dec51321a61d420fafd1bad6e30d01bd9c920e8ee"
|
412
385
|
dependencies = [
|
413
386
|
"monostate-impl",
|
414
387
|
"serde",
|
@@ -416,13 +389,13 @@ dependencies = [
|
|
416
389
|
|
417
390
|
[[package]]
|
418
391
|
name = "monostate-impl"
|
419
|
-
version = "0.1.
|
392
|
+
version = "0.1.9"
|
420
393
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
421
|
-
checksum = "
|
394
|
+
checksum = "371717c0a5543d6a800cac822eac735aa7d2d2fbb41002e9856a4089532dbdce"
|
422
395
|
dependencies = [
|
423
396
|
"proc-macro2",
|
424
397
|
"quote",
|
425
|
-
"syn 2.0.
|
398
|
+
"syn 2.0.38",
|
426
399
|
]
|
427
400
|
|
428
401
|
[[package]]
|
@@ -435,21 +408,11 @@ dependencies = [
|
|
435
408
|
"minimal-lexical",
|
436
409
|
]
|
437
410
|
|
438
|
-
[[package]]
|
439
|
-
name = "num_cpus"
|
440
|
-
version = "1.15.0"
|
441
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
442
|
-
checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b"
|
443
|
-
dependencies = [
|
444
|
-
"hermit-abi",
|
445
|
-
"libc",
|
446
|
-
]
|
447
|
-
|
448
411
|
[[package]]
|
449
412
|
name = "number_prefix"
|
450
|
-
version = "0.
|
413
|
+
version = "0.4.0"
|
451
414
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
452
|
-
checksum = "
|
415
|
+
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
|
453
416
|
|
454
417
|
[[package]]
|
455
418
|
name = "once_cell"
|
@@ -481,9 +444,9 @@ dependencies = [
|
|
481
444
|
|
482
445
|
[[package]]
|
483
446
|
name = "paste"
|
484
|
-
version = "1.0.
|
447
|
+
version = "1.0.14"
|
485
448
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
486
|
-
checksum = "
|
449
|
+
checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c"
|
487
450
|
|
488
451
|
[[package]]
|
489
452
|
name = "peeking_take_while"
|
@@ -497,6 +460,12 @@ version = "0.3.26"
|
|
497
460
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
498
461
|
checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160"
|
499
462
|
|
463
|
+
[[package]]
|
464
|
+
name = "portable-atomic"
|
465
|
+
version = "1.4.3"
|
466
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
467
|
+
checksum = "31114a898e107c51bb1609ffaf55a0e011cf6a4d7f1170d0015a165082c0338b"
|
468
|
+
|
500
469
|
[[package]]
|
501
470
|
name = "ppv-lite86"
|
502
471
|
version = "0.2.17"
|
@@ -505,18 +474,18 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
|
|
505
474
|
|
506
475
|
[[package]]
|
507
476
|
name = "proc-macro2"
|
508
|
-
version = "1.0.
|
477
|
+
version = "1.0.68"
|
509
478
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
510
|
-
checksum = "
|
479
|
+
checksum = "5b1106fec09662ec6dd98ccac0f81cef56984d0b49f75c92d8cbad76e20c005c"
|
511
480
|
dependencies = [
|
512
481
|
"unicode-ident",
|
513
482
|
]
|
514
483
|
|
515
484
|
[[package]]
|
516
485
|
name = "quote"
|
517
|
-
version = "1.0.
|
486
|
+
version = "1.0.33"
|
518
487
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
519
|
-
checksum = "
|
488
|
+
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
|
520
489
|
dependencies = [
|
521
490
|
"proc-macro2",
|
522
491
|
]
|
@@ -553,9 +522,9 @@ dependencies = [
|
|
553
522
|
|
554
523
|
[[package]]
|
555
524
|
name = "rayon"
|
556
|
-
version = "1.
|
525
|
+
version = "1.8.0"
|
557
526
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
558
|
-
checksum = "
|
527
|
+
checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1"
|
559
528
|
dependencies = [
|
560
529
|
"either",
|
561
530
|
"rayon-core",
|
@@ -563,25 +532,23 @@ dependencies = [
|
|
563
532
|
|
564
533
|
[[package]]
|
565
534
|
name = "rayon-cond"
|
566
|
-
version = "0.
|
535
|
+
version = "0.3.0"
|
567
536
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
568
|
-
checksum = "
|
537
|
+
checksum = "059f538b55efd2309c9794130bc149c6a553db90e9d99c2030785c82f0bd7df9"
|
569
538
|
dependencies = [
|
570
539
|
"either",
|
571
|
-
"itertools
|
540
|
+
"itertools",
|
572
541
|
"rayon",
|
573
542
|
]
|
574
543
|
|
575
544
|
[[package]]
|
576
545
|
name = "rayon-core"
|
577
|
-
version = "1.
|
546
|
+
version = "1.12.0"
|
578
547
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
579
|
-
checksum = "
|
548
|
+
checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed"
|
580
549
|
dependencies = [
|
581
|
-
"crossbeam-channel",
|
582
550
|
"crossbeam-deque",
|
583
551
|
"crossbeam-utils",
|
584
|
-
"num_cpus",
|
585
552
|
]
|
586
553
|
|
587
554
|
[[package]]
|
@@ -620,7 +587,7 @@ version = "1.9.5"
|
|
620
587
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
621
588
|
checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47"
|
622
589
|
dependencies = [
|
623
|
-
"aho-corasick
|
590
|
+
"aho-corasick",
|
624
591
|
"memchr",
|
625
592
|
"regex-automata",
|
626
593
|
"regex-syntax",
|
@@ -632,7 +599,7 @@ version = "0.3.8"
|
|
632
599
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
633
600
|
checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795"
|
634
601
|
dependencies = [
|
635
|
-
"aho-corasick
|
602
|
+
"aho-corasick",
|
636
603
|
"memchr",
|
637
604
|
"regex-syntax",
|
638
605
|
]
|
@@ -669,22 +636,22 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
|
|
669
636
|
|
670
637
|
[[package]]
|
671
638
|
name = "serde"
|
672
|
-
version = "1.0.
|
639
|
+
version = "1.0.188"
|
673
640
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
674
|
-
checksum = "
|
641
|
+
checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e"
|
675
642
|
dependencies = [
|
676
643
|
"serde_derive",
|
677
644
|
]
|
678
645
|
|
679
646
|
[[package]]
|
680
647
|
name = "serde_derive"
|
681
|
-
version = "1.0.
|
648
|
+
version = "1.0.188"
|
682
649
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
683
|
-
checksum = "
|
650
|
+
checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2"
|
684
651
|
dependencies = [
|
685
652
|
"proc-macro2",
|
686
653
|
"quote",
|
687
|
-
"syn 2.0.
|
654
|
+
"syn 2.0.38",
|
688
655
|
]
|
689
656
|
|
690
657
|
[[package]]
|
@@ -747,9 +714,9 @@ dependencies = [
|
|
747
714
|
|
748
715
|
[[package]]
|
749
716
|
name = "syn"
|
750
|
-
version = "2.0.
|
717
|
+
version = "2.0.38"
|
751
718
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
752
|
-
checksum = "
|
719
|
+
checksum = "e96b79aaa137db8f61e26363a0c9b47d8b4ec75da28b7d1d614c2303e232408b"
|
753
720
|
dependencies = [
|
754
721
|
"proc-macro2",
|
755
722
|
"quote",
|
@@ -758,46 +725,46 @@ dependencies = [
|
|
758
725
|
|
759
726
|
[[package]]
|
760
727
|
name = "thiserror"
|
761
|
-
version = "1.0.
|
728
|
+
version = "1.0.49"
|
762
729
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
763
|
-
checksum = "
|
730
|
+
checksum = "1177e8c6d7ede7afde3585fd2513e611227efd6481bd78d2e82ba1ce16557ed4"
|
764
731
|
dependencies = [
|
765
732
|
"thiserror-impl",
|
766
733
|
]
|
767
734
|
|
768
735
|
[[package]]
|
769
736
|
name = "thiserror-impl"
|
770
|
-
version = "1.0.
|
737
|
+
version = "1.0.49"
|
771
738
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
772
|
-
checksum = "
|
739
|
+
checksum = "10712f02019e9288794769fba95cd6847df9874d49d871d062172f9dd41bc4cc"
|
773
740
|
dependencies = [
|
774
741
|
"proc-macro2",
|
775
742
|
"quote",
|
776
|
-
"syn 2.0.
|
743
|
+
"syn 2.0.38",
|
777
744
|
]
|
778
745
|
|
779
746
|
[[package]]
|
780
747
|
name = "tokenizers"
|
781
|
-
version = "0.4.
|
748
|
+
version = "0.4.2"
|
782
749
|
dependencies = [
|
783
750
|
"magnus",
|
784
751
|
"onig",
|
785
752
|
"serde",
|
786
|
-
"tokenizers 0.
|
753
|
+
"tokenizers 0.15.0",
|
787
754
|
]
|
788
755
|
|
789
756
|
[[package]]
|
790
757
|
name = "tokenizers"
|
791
|
-
version = "0.
|
758
|
+
version = "0.15.0"
|
792
759
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
793
|
-
checksum = "
|
760
|
+
checksum = "062b8a9613d6017633b80fb55fbb33f1aff006c36225a3025630753398034b3c"
|
794
761
|
dependencies = [
|
795
|
-
"aho-corasick
|
762
|
+
"aho-corasick",
|
796
763
|
"derive_builder",
|
797
764
|
"esaxx-rs",
|
798
765
|
"getrandom",
|
799
766
|
"indicatif",
|
800
|
-
"itertools
|
767
|
+
"itertools",
|
801
768
|
"lazy_static",
|
802
769
|
"log",
|
803
770
|
"macro_rules_attribute",
|