tokenizers 0.4.0-arm64-darwin → 0.4.2-arm64-darwin
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -1
- data/Cargo.lock +70 -103
- data/Cargo.toml +1 -0
- data/LICENSE-THIRD-PARTY.txt +358 -1526
- data/lib/tokenizers/3.0/tokenizers.bundle +0 -0
- data/lib/tokenizers/3.1/tokenizers.bundle +0 -0
- data/lib/tokenizers/3.2/tokenizers.bundle +0 -0
- data/lib/tokenizers/from_pretrained.rb +23 -17
- data/lib/tokenizers/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fae6fe42900355b029ab2d895401ee41b2edec183f7c89996597b61a6b453a38
|
4
|
+
data.tar.gz: ed48f8cfb614ffd228a4978e01e216a1712f737997f995ebe7cdf3c1ae420e03
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dbdb6a09ad3ea6d4b5c9199f6dfaaa42534da5eed691197f196e6005757fd76796c1d387c57dc69a82b1407cd5f11c0ca1170454c3e2e3029d8d67a4a93d8ef7
|
7
|
+
data.tar.gz: 1d5bffa2e6483268bb2ea395b3a5206c96dd560ed31fe42ad19018ff6cf3113b2dafb41f6f7c4032c2688a2752876118fa94e1da4ecf6d07000a3e183385605b
|
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,13 @@
|
|
1
|
-
## 0.4.
|
1
|
+
## 0.4.2 (2023-11-16)
|
2
|
+
|
3
|
+
- Updated Tokenizers to 0.15.0
|
4
|
+
- Fixed issue with download caching
|
5
|
+
|
6
|
+
## 0.4.1 (2023-10-05)
|
7
|
+
|
8
|
+
- Fixed error loading gem
|
9
|
+
|
10
|
+
## 0.4.0 (2023-09-20)
|
2
11
|
|
3
12
|
- Updated Tokenizers to 0.14.0
|
4
13
|
- Dropped support for Ruby < 3
|
data/Cargo.lock
CHANGED
@@ -4,18 +4,9 @@ version = 3
|
|
4
4
|
|
5
5
|
[[package]]
|
6
6
|
name = "aho-corasick"
|
7
|
-
version = "
|
7
|
+
version = "1.1.1"
|
8
8
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
9
|
-
checksum = "
|
10
|
-
dependencies = [
|
11
|
-
"memchr",
|
12
|
-
]
|
13
|
-
|
14
|
-
[[package]]
|
15
|
-
name = "aho-corasick"
|
16
|
-
version = "1.0.5"
|
17
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
18
|
-
checksum = "0c378d78423fdad8089616f827526ee33c19f2fddbd5de1629152c9593ba4783"
|
9
|
+
checksum = "ea5d730647d4fadd988536d06fecce94b7b4f2a7efdae548f1cf4b63205518ab"
|
19
10
|
dependencies = [
|
20
11
|
"memchr",
|
21
12
|
]
|
@@ -103,16 +94,6 @@ dependencies = [
|
|
103
94
|
"windows-sys",
|
104
95
|
]
|
105
96
|
|
106
|
-
[[package]]
|
107
|
-
name = "crossbeam-channel"
|
108
|
-
version = "0.5.8"
|
109
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
110
|
-
checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200"
|
111
|
-
dependencies = [
|
112
|
-
"cfg-if",
|
113
|
-
"crossbeam-utils",
|
114
|
-
]
|
115
|
-
|
116
97
|
[[package]]
|
117
98
|
name = "crossbeam-deque"
|
118
99
|
version = "0.8.3"
|
@@ -226,9 +207,9 @@ checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
|
|
226
207
|
|
227
208
|
[[package]]
|
228
209
|
name = "esaxx-rs"
|
229
|
-
version = "0.1.
|
210
|
+
version = "0.1.10"
|
230
211
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
231
|
-
checksum = "
|
212
|
+
checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6"
|
232
213
|
dependencies = [
|
233
214
|
"cc",
|
234
215
|
]
|
@@ -241,9 +222,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
|
|
241
222
|
|
242
223
|
[[package]]
|
243
224
|
name = "getrandom"
|
244
|
-
version = "0.2.
|
225
|
+
version = "0.2.10"
|
245
226
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
246
|
-
checksum = "
|
227
|
+
checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427"
|
247
228
|
dependencies = [
|
248
229
|
"cfg-if",
|
249
230
|
"libc",
|
@@ -256,15 +237,6 @@ version = "0.3.1"
|
|
256
237
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
257
238
|
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
258
239
|
|
259
|
-
[[package]]
|
260
|
-
name = "hermit-abi"
|
261
|
-
version = "0.2.6"
|
262
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
263
|
-
checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7"
|
264
|
-
dependencies = [
|
265
|
-
"libc",
|
266
|
-
]
|
267
|
-
|
268
240
|
[[package]]
|
269
241
|
name = "ident_case"
|
270
242
|
version = "1.0.1"
|
@@ -273,30 +245,31 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
|
|
273
245
|
|
274
246
|
[[package]]
|
275
247
|
name = "indicatif"
|
276
|
-
version = "0.
|
248
|
+
version = "0.17.7"
|
277
249
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
278
|
-
checksum = "
|
250
|
+
checksum = "fb28741c9db9a713d93deb3bb9515c20788cef5815265bee4980e87bde7e0f25"
|
279
251
|
dependencies = [
|
280
252
|
"console",
|
281
|
-
"
|
253
|
+
"instant",
|
282
254
|
"number_prefix",
|
283
|
-
"
|
255
|
+
"portable-atomic",
|
256
|
+
"unicode-width",
|
284
257
|
]
|
285
258
|
|
286
259
|
[[package]]
|
287
|
-
name = "
|
288
|
-
version = "0.
|
260
|
+
name = "instant"
|
261
|
+
version = "0.1.12"
|
289
262
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
290
|
-
checksum = "
|
263
|
+
checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
|
291
264
|
dependencies = [
|
292
|
-
"
|
265
|
+
"cfg-if",
|
293
266
|
]
|
294
267
|
|
295
268
|
[[package]]
|
296
269
|
name = "itertools"
|
297
|
-
version = "0.
|
270
|
+
version = "0.11.0"
|
298
271
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
299
|
-
checksum = "
|
272
|
+
checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
|
300
273
|
dependencies = [
|
301
274
|
"either",
|
302
275
|
]
|
@@ -321,9 +294,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
|
321
294
|
|
322
295
|
[[package]]
|
323
296
|
name = "libc"
|
324
|
-
version = "0.2.
|
297
|
+
version = "0.2.149"
|
325
298
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
326
|
-
checksum = "
|
299
|
+
checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b"
|
327
300
|
|
328
301
|
[[package]]
|
329
302
|
name = "libloading"
|
@@ -346,9 +319,9 @@ dependencies = [
|
|
346
319
|
|
347
320
|
[[package]]
|
348
321
|
name = "macro_rules_attribute"
|
349
|
-
version = "0.
|
322
|
+
version = "0.2.0"
|
350
323
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
351
|
-
checksum = "
|
324
|
+
checksum = "8a82271f7bc033d84bbca59a3ce3e4159938cb08a9c3aebbe54d215131518a13"
|
352
325
|
dependencies = [
|
353
326
|
"macro_rules_attribute-proc_macro",
|
354
327
|
"paste",
|
@@ -356,9 +329,9 @@ dependencies = [
|
|
356
329
|
|
357
330
|
[[package]]
|
358
331
|
name = "macro_rules_attribute-proc_macro"
|
359
|
-
version = "0.
|
332
|
+
version = "0.2.0"
|
360
333
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
361
|
-
checksum = "
|
334
|
+
checksum = "b8dd856d451cc0da70e2ef2ce95a18e39a93b7558bedf10201ad28503f918568"
|
362
335
|
|
363
336
|
[[package]]
|
364
337
|
name = "magnus"
|
@@ -380,7 +353,7 @@ checksum = "5968c820e2960565f647819f5928a42d6e874551cab9d88d75e3e0660d7f71e3"
|
|
380
353
|
dependencies = [
|
381
354
|
"proc-macro2",
|
382
355
|
"quote",
|
383
|
-
"syn 2.0.
|
356
|
+
"syn 2.0.38",
|
384
357
|
]
|
385
358
|
|
386
359
|
[[package]]
|
@@ -406,9 +379,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
|
406
379
|
|
407
380
|
[[package]]
|
408
381
|
name = "monostate"
|
409
|
-
version = "0.1.
|
382
|
+
version = "0.1.9"
|
410
383
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
411
|
-
checksum = "
|
384
|
+
checksum = "15f370ae88093ec6b11a710dec51321a61d420fafd1bad6e30d01bd9c920e8ee"
|
412
385
|
dependencies = [
|
413
386
|
"monostate-impl",
|
414
387
|
"serde",
|
@@ -416,13 +389,13 @@ dependencies = [
|
|
416
389
|
|
417
390
|
[[package]]
|
418
391
|
name = "monostate-impl"
|
419
|
-
version = "0.1.
|
392
|
+
version = "0.1.9"
|
420
393
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
421
|
-
checksum = "
|
394
|
+
checksum = "371717c0a5543d6a800cac822eac735aa7d2d2fbb41002e9856a4089532dbdce"
|
422
395
|
dependencies = [
|
423
396
|
"proc-macro2",
|
424
397
|
"quote",
|
425
|
-
"syn 2.0.
|
398
|
+
"syn 2.0.38",
|
426
399
|
]
|
427
400
|
|
428
401
|
[[package]]
|
@@ -435,21 +408,11 @@ dependencies = [
|
|
435
408
|
"minimal-lexical",
|
436
409
|
]
|
437
410
|
|
438
|
-
[[package]]
|
439
|
-
name = "num_cpus"
|
440
|
-
version = "1.15.0"
|
441
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
442
|
-
checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b"
|
443
|
-
dependencies = [
|
444
|
-
"hermit-abi",
|
445
|
-
"libc",
|
446
|
-
]
|
447
|
-
|
448
411
|
[[package]]
|
449
412
|
name = "number_prefix"
|
450
|
-
version = "0.
|
413
|
+
version = "0.4.0"
|
451
414
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
452
|
-
checksum = "
|
415
|
+
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
|
453
416
|
|
454
417
|
[[package]]
|
455
418
|
name = "once_cell"
|
@@ -481,9 +444,9 @@ dependencies = [
|
|
481
444
|
|
482
445
|
[[package]]
|
483
446
|
name = "paste"
|
484
|
-
version = "1.0.
|
447
|
+
version = "1.0.14"
|
485
448
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
486
|
-
checksum = "
|
449
|
+
checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c"
|
487
450
|
|
488
451
|
[[package]]
|
489
452
|
name = "peeking_take_while"
|
@@ -497,6 +460,12 @@ version = "0.3.26"
|
|
497
460
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
498
461
|
checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160"
|
499
462
|
|
463
|
+
[[package]]
|
464
|
+
name = "portable-atomic"
|
465
|
+
version = "1.4.3"
|
466
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
467
|
+
checksum = "31114a898e107c51bb1609ffaf55a0e011cf6a4d7f1170d0015a165082c0338b"
|
468
|
+
|
500
469
|
[[package]]
|
501
470
|
name = "ppv-lite86"
|
502
471
|
version = "0.2.17"
|
@@ -505,18 +474,18 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
|
|
505
474
|
|
506
475
|
[[package]]
|
507
476
|
name = "proc-macro2"
|
508
|
-
version = "1.0.
|
477
|
+
version = "1.0.68"
|
509
478
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
510
|
-
checksum = "
|
479
|
+
checksum = "5b1106fec09662ec6dd98ccac0f81cef56984d0b49f75c92d8cbad76e20c005c"
|
511
480
|
dependencies = [
|
512
481
|
"unicode-ident",
|
513
482
|
]
|
514
483
|
|
515
484
|
[[package]]
|
516
485
|
name = "quote"
|
517
|
-
version = "1.0.
|
486
|
+
version = "1.0.33"
|
518
487
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
519
|
-
checksum = "
|
488
|
+
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
|
520
489
|
dependencies = [
|
521
490
|
"proc-macro2",
|
522
491
|
]
|
@@ -553,9 +522,9 @@ dependencies = [
|
|
553
522
|
|
554
523
|
[[package]]
|
555
524
|
name = "rayon"
|
556
|
-
version = "1.
|
525
|
+
version = "1.8.0"
|
557
526
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
558
|
-
checksum = "
|
527
|
+
checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1"
|
559
528
|
dependencies = [
|
560
529
|
"either",
|
561
530
|
"rayon-core",
|
@@ -563,25 +532,23 @@ dependencies = [
|
|
563
532
|
|
564
533
|
[[package]]
|
565
534
|
name = "rayon-cond"
|
566
|
-
version = "0.
|
535
|
+
version = "0.3.0"
|
567
536
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
568
|
-
checksum = "
|
537
|
+
checksum = "059f538b55efd2309c9794130bc149c6a553db90e9d99c2030785c82f0bd7df9"
|
569
538
|
dependencies = [
|
570
539
|
"either",
|
571
|
-
"itertools
|
540
|
+
"itertools",
|
572
541
|
"rayon",
|
573
542
|
]
|
574
543
|
|
575
544
|
[[package]]
|
576
545
|
name = "rayon-core"
|
577
|
-
version = "1.
|
546
|
+
version = "1.12.0"
|
578
547
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
579
|
-
checksum = "
|
548
|
+
checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed"
|
580
549
|
dependencies = [
|
581
|
-
"crossbeam-channel",
|
582
550
|
"crossbeam-deque",
|
583
551
|
"crossbeam-utils",
|
584
|
-
"num_cpus",
|
585
552
|
]
|
586
553
|
|
587
554
|
[[package]]
|
@@ -620,7 +587,7 @@ version = "1.9.5"
|
|
620
587
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
621
588
|
checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47"
|
622
589
|
dependencies = [
|
623
|
-
"aho-corasick
|
590
|
+
"aho-corasick",
|
624
591
|
"memchr",
|
625
592
|
"regex-automata",
|
626
593
|
"regex-syntax",
|
@@ -632,7 +599,7 @@ version = "0.3.8"
|
|
632
599
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
633
600
|
checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795"
|
634
601
|
dependencies = [
|
635
|
-
"aho-corasick
|
602
|
+
"aho-corasick",
|
636
603
|
"memchr",
|
637
604
|
"regex-syntax",
|
638
605
|
]
|
@@ -669,22 +636,22 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
|
|
669
636
|
|
670
637
|
[[package]]
|
671
638
|
name = "serde"
|
672
|
-
version = "1.0.
|
639
|
+
version = "1.0.188"
|
673
640
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
674
|
-
checksum = "
|
641
|
+
checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e"
|
675
642
|
dependencies = [
|
676
643
|
"serde_derive",
|
677
644
|
]
|
678
645
|
|
679
646
|
[[package]]
|
680
647
|
name = "serde_derive"
|
681
|
-
version = "1.0.
|
648
|
+
version = "1.0.188"
|
682
649
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
683
|
-
checksum = "
|
650
|
+
checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2"
|
684
651
|
dependencies = [
|
685
652
|
"proc-macro2",
|
686
653
|
"quote",
|
687
|
-
"syn 2.0.
|
654
|
+
"syn 2.0.38",
|
688
655
|
]
|
689
656
|
|
690
657
|
[[package]]
|
@@ -747,9 +714,9 @@ dependencies = [
|
|
747
714
|
|
748
715
|
[[package]]
|
749
716
|
name = "syn"
|
750
|
-
version = "2.0.
|
717
|
+
version = "2.0.38"
|
751
718
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
752
|
-
checksum = "
|
719
|
+
checksum = "e96b79aaa137db8f61e26363a0c9b47d8b4ec75da28b7d1d614c2303e232408b"
|
753
720
|
dependencies = [
|
754
721
|
"proc-macro2",
|
755
722
|
"quote",
|
@@ -758,46 +725,46 @@ dependencies = [
|
|
758
725
|
|
759
726
|
[[package]]
|
760
727
|
name = "thiserror"
|
761
|
-
version = "1.0.
|
728
|
+
version = "1.0.49"
|
762
729
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
763
|
-
checksum = "
|
730
|
+
checksum = "1177e8c6d7ede7afde3585fd2513e611227efd6481bd78d2e82ba1ce16557ed4"
|
764
731
|
dependencies = [
|
765
732
|
"thiserror-impl",
|
766
733
|
]
|
767
734
|
|
768
735
|
[[package]]
|
769
736
|
name = "thiserror-impl"
|
770
|
-
version = "1.0.
|
737
|
+
version = "1.0.49"
|
771
738
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
772
|
-
checksum = "
|
739
|
+
checksum = "10712f02019e9288794769fba95cd6847df9874d49d871d062172f9dd41bc4cc"
|
773
740
|
dependencies = [
|
774
741
|
"proc-macro2",
|
775
742
|
"quote",
|
776
|
-
"syn 2.0.
|
743
|
+
"syn 2.0.38",
|
777
744
|
]
|
778
745
|
|
779
746
|
[[package]]
|
780
747
|
name = "tokenizers"
|
781
|
-
version = "0.4.
|
748
|
+
version = "0.4.2"
|
782
749
|
dependencies = [
|
783
750
|
"magnus",
|
784
751
|
"onig",
|
785
752
|
"serde",
|
786
|
-
"tokenizers 0.
|
753
|
+
"tokenizers 0.15.0",
|
787
754
|
]
|
788
755
|
|
789
756
|
[[package]]
|
790
757
|
name = "tokenizers"
|
791
|
-
version = "0.
|
758
|
+
version = "0.15.0"
|
792
759
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
793
|
-
checksum = "
|
760
|
+
checksum = "062b8a9613d6017633b80fb55fbb33f1aff006c36225a3025630753398034b3c"
|
794
761
|
dependencies = [
|
795
|
-
"aho-corasick
|
762
|
+
"aho-corasick",
|
796
763
|
"derive_builder",
|
797
764
|
"esaxx-rs",
|
798
765
|
"getrandom",
|
799
766
|
"indicatif",
|
800
|
-
"itertools
|
767
|
+
"itertools",
|
801
768
|
"lazy_static",
|
802
769
|
"log",
|
803
770
|
"macro_rules_attribute",
|