tokenizers 0.4.3 → 0.4.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 31ba3313f98f5360a6e9b0434c674d113622cf0421c6e7610ba250a8a9c79402
4
- data.tar.gz: ba7c1913bdafa2b58835ac3689a34c37206b1a567c634f81aedaea0ca21a20cf
3
+ metadata.gz: fe2e8b2ec97ac4dabb4e401cb426079c065be420bf8da41c9484b3d38290589c
4
+ data.tar.gz: bba3b9d9d94a278e3f5189fda48b3b3a7f1ff369b66ae8b85945e92a2d8b90ed
5
5
  SHA512:
6
- metadata.gz: 7cac28260eea675e5cea80324fa755681e9a8a06ce38fc501c57da69056d36d0485576dac1237da21a7d23ca24dacc46b6b1a92d4c7b5f91644fda37b5550ada
7
- data.tar.gz: 84c361eadb625a96234b454f91b7f9a847010e42927042978003e3219e1b28d1da8d7665bc9cda7aa820b6053e5c6be1458cdaee6fe1d4709df632fb744155b4
6
+ metadata.gz: fd5876cc95c22b917f1a5715f9b9651f714291f017c00f83316ad2c3cc48c8819d4de70288b4067570c9f729cca9d06cbfa8562996a61ac694e202268a58cdea
7
+ data.tar.gz: cb793a73aabafe933ef8d7259b7a36848b1f38cacdf20ef3f6f8554671b7de6ece53b3c14f46f368f540ecbab5dfca087f730e99c22caf84c7b35d539b07b633
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.4.4 (2024-02-27)
2
+
3
+ - Updated Tokenizers to 0.15.2
4
+
1
5
  ## 0.4.3 (2024-01-03)
2
6
 
3
7
  - Added support for Ruby 3.3
data/Cargo.lock CHANGED
@@ -280,6 +280,15 @@ dependencies = [
280
280
  "either",
281
281
  ]
282
282
 
283
+ [[package]]
284
+ name = "itertools"
285
+ version = "0.12.1"
286
+ source = "registry+https://github.com/rust-lang/crates.io-index"
287
+ checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
288
+ dependencies = [
289
+ "either",
290
+ ]
291
+
283
292
  [[package]]
284
293
  name = "itoa"
285
294
  version = "1.0.6"
@@ -543,7 +552,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
543
552
  checksum = "059f538b55efd2309c9794130bc149c6a553db90e9d99c2030785c82f0bd7df9"
544
553
  dependencies = [
545
554
  "either",
546
- "itertools",
555
+ "itertools 0.11.0",
547
556
  "rayon",
548
557
  ]
549
558
 
@@ -559,18 +568,18 @@ dependencies = [
559
568
 
560
569
  [[package]]
561
570
  name = "rb-sys"
562
- version = "0.9.86"
571
+ version = "0.9.89"
563
572
  source = "registry+https://github.com/rust-lang/crates.io-index"
564
- checksum = "7285f2a7b92f58ab198e3fd59a71d2861478f9c4642f41e83582385818941697"
573
+ checksum = "0d197f2c03751ef006f29d593d22aa9068c9c358e04ca503afea0329c366147c"
565
574
  dependencies = [
566
575
  "rb-sys-build",
567
576
  ]
568
577
 
569
578
  [[package]]
570
579
  name = "rb-sys-build"
571
- version = "0.9.86"
580
+ version = "0.9.89"
572
581
  source = "registry+https://github.com/rust-lang/crates.io-index"
573
- checksum = "71583945f94dabb6c0dfa63f1b71e929c1901e1e288ef3739ab8bed3b7069550"
582
+ checksum = "2b50caf8fd028f12abe00d6debe2ae2adf6202c9ca3caa59487eda710d90fa28"
574
583
  dependencies = [
575
584
  "bindgen",
576
585
  "lazy_static",
@@ -596,7 +605,7 @@ dependencies = [
596
605
  "aho-corasick",
597
606
  "memchr",
598
607
  "regex-automata",
599
- "regex-syntax",
608
+ "regex-syntax 0.7.5",
600
609
  ]
601
610
 
602
611
  [[package]]
@@ -607,7 +616,7 @@ checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795"
607
616
  dependencies = [
608
617
  "aho-corasick",
609
618
  "memchr",
610
- "regex-syntax",
619
+ "regex-syntax 0.7.5",
611
620
  ]
612
621
 
613
622
  [[package]]
@@ -616,6 +625,12 @@ version = "0.7.5"
616
625
  source = "registry+https://github.com/rust-lang/crates.io-index"
617
626
  checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
618
627
 
628
+ [[package]]
629
+ name = "regex-syntax"
630
+ version = "0.8.2"
631
+ source = "registry+https://github.com/rust-lang/crates.io-index"
632
+ checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
633
+
619
634
  [[package]]
620
635
  name = "rustc-hash"
621
636
  version = "1.1.0"
@@ -751,26 +766,26 @@ dependencies = [
751
766
 
752
767
  [[package]]
753
768
  name = "tokenizers"
754
- version = "0.4.3"
769
+ version = "0.4.4"
755
770
  dependencies = [
756
771
  "magnus",
757
772
  "onig",
758
773
  "serde",
759
- "tokenizers 0.15.0",
774
+ "tokenizers 0.15.2",
760
775
  ]
761
776
 
762
777
  [[package]]
763
778
  name = "tokenizers"
764
- version = "0.15.0"
779
+ version = "0.15.2"
765
780
  source = "registry+https://github.com/rust-lang/crates.io-index"
766
- checksum = "062b8a9613d6017633b80fb55fbb33f1aff006c36225a3025630753398034b3c"
781
+ checksum = "3dd47962b0ba36e7fd33518fbf1754d136fd1474000162bbf2a8b5fcb2d3654d"
767
782
  dependencies = [
768
783
  "aho-corasick",
769
784
  "derive_builder",
770
785
  "esaxx-rs",
771
786
  "getrandom",
772
787
  "indicatif",
773
- "itertools",
788
+ "itertools 0.12.1",
774
789
  "lazy_static",
775
790
  "log",
776
791
  "macro_rules_attribute",
@@ -781,7 +796,7 @@ dependencies = [
781
796
  "rayon",
782
797
  "rayon-cond",
783
798
  "regex",
784
- "regex-syntax",
799
+ "regex-syntax 0.8.2",
785
800
  "serde",
786
801
  "serde_json",
787
802
  "spm_precompiled",
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  :slightly_smiling_face: Fast state-of-the-art [tokenizers](https://github.com/huggingface/tokenizers) for Ruby
4
4
 
5
- [![Build Status](https://github.com/ankane/tokenizers-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/tokenizers-ruby/actions)
5
+ [![Build Status](https://github.com/ankane/tokenizers-ruby/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/tokenizers-ruby/actions)
6
6
 
7
7
  ## Installation
8
8
 
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "tokenizers"
3
- version = "0.4.3"
3
+ version = "0.4.4"
4
4
  license = "Apache-2.0"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -16,6 +16,6 @@ onig = { version = "6", default-features = false }
16
16
  serde = { version = "1", features = ["rc", "derive"] }
17
17
 
18
18
  [dependencies.tokenizers]
19
- version = "=0.15.0" # also update in from_pretrained.rb
19
+ version = "=0.15.2" # also update in from_pretrained.rb
20
20
  default-features = false
21
21
  features = ["progressbar", "onig", "esaxx_fast"]
@@ -77,11 +77,11 @@ impl RbTrainer {
77
77
  setter!(self, BpeTrainer, vocab_size, vocab_size);
78
78
  }
79
79
 
80
- fn bpe_trainer_min_frequency(&self) -> u32 {
80
+ fn bpe_trainer_min_frequency(&self) -> u64 {
81
81
  getter!(self, BpeTrainer, min_frequency)
82
82
  }
83
83
 
84
- fn bpe_trainer_set_min_frequency(&self, freq: u32) {
84
+ fn bpe_trainer_set_min_frequency(&self, freq: u64) {
85
85
  setter!(self, BpeTrainer, min_frequency, freq);
86
86
  }
87
87
 
@@ -235,11 +235,11 @@ impl RbTrainer {
235
235
  setter!(self, WordLevelTrainer, vocab_size, vocab_size);
236
236
  }
237
237
 
238
- fn word_level_trainer_min_frequency(&self) -> u32 {
238
+ fn word_level_trainer_min_frequency(&self) -> u64 {
239
239
  getter!(self, WordLevelTrainer, min_frequency)
240
240
  }
241
241
 
242
- fn word_level_trainer_set_min_frequency(&self, freq: u32) {
242
+ fn word_level_trainer_set_min_frequency(&self, freq: u64) {
243
243
  setter!(self, WordLevelTrainer, min_frequency, freq);
244
244
  }
245
245
 
@@ -289,11 +289,11 @@ impl RbTrainer {
289
289
  setter!(self, WordPieceTrainer, @set_vocab_size, vocab_size);
290
290
  }
291
291
 
292
- fn word_piece_trainer_min_frequency(&self) -> u32 {
292
+ fn word_piece_trainer_min_frequency(&self) -> u64 {
293
293
  getter!(self, WordPieceTrainer, min_frequency())
294
294
  }
295
295
 
296
- fn word_piece_trainer_set_min_frequency(&self, freq: u32) {
296
+ fn word_piece_trainer_set_min_frequency(&self, freq: u64) {
297
297
  setter!(self, WordPieceTrainer, @set_min_frequency, freq);
298
298
  }
299
299
 
@@ -1,7 +1,7 @@
1
1
  module Tokenizers
2
2
  module FromPretrained
3
3
  # for user agent
4
- TOKENIZERS_VERSION = "0.15.0"
4
+ TOKENIZERS_VERSION = "0.15.2"
5
5
 
6
6
  # use Ruby for downloads
7
7
  # this avoids the need to vendor OpenSSL on Linux
@@ -1,3 +1,3 @@
1
1
  module Tokenizers
2
- VERSION = "0.4.3"
2
+ VERSION = "0.4.4"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tokenizers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.3
4
+ version: 0.4.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-01-04 00:00:00.000000000 Z
11
+ date: 2024-02-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys