tokenizers 0.3.0 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4ff4d1ad7b56010f603ead7a4794c003c5294e50f1b33de62c8089ddf150d5ad
4
- data.tar.gz: 295aaabb720971f2ddcc832ab0d5deedf1e0ed8dab03aca96ac1d396b5723de7
3
+ metadata.gz: 11fdc53989bc2285096bc5eb4a971426e153375ada43bc8c51d2a191b42fe02d
4
+ data.tar.gz: 92d4d8bef2c4013d5cf0d55bff30bc070ca6e59430f87b96f9147075216a1c1d
5
5
  SHA512:
6
- metadata.gz: e14207004cddeef40590229ea2c8a9bf54e5c5b75cdbcdd32cd6f23c24feb8544fcabe86fa9bced32cb41f2581ee0df4d36ed2b6a58ef2fc668aa33c270659df
7
- data.tar.gz: b2bb202c8c37bdd0d14ca64be147e99b224128c1461f56761f1d58d9326b40768e7b903bbbb4c2a0363bd4b1c9ef5a66be53210ad801d5e45c7d86dd0945bd82
6
+ metadata.gz: 7c91c33078b6c5b23a6080908fa184e2222922f35bd6e1c439329525f4554f939cf98ca4dcd3017d858cdb6ec2a96f6671ce5dc425ca643d482b49b04af00f4c
7
+ data.tar.gz: 74aad6c458792570ace93107aacccb8a9df7b61228dd062bd7bcaf015a53d4f6b4b1fcace09c69bc959bd9b04598d315344d4edc7e6abbe1603f3b92d29ab711
data/CHANGELOG.md CHANGED
@@ -1,4 +1,12 @@
1
- ## 0.3.0 (2022-02-07)
1
+ ## 0.3.2 (2023-03-06)
2
+
3
+ - Added precompiled gem for Linux x86-64 MUSL
4
+
5
+ ## 0.3.1 (2023-02-08)
6
+
7
+ - Fixed error with Ruby 2.7
8
+
9
+ ## 0.3.0 (2023-02-07)
2
10
 
3
11
  - Added support for training tokenizers
4
12
  - Added more methods to `Tokenizer`
@@ -7,20 +15,20 @@
7
15
  - Changed `encode` method to include special tokens by default
8
16
  - Changed how offsets are calculated for strings with multibyte characters
9
17
 
10
- ## 0.2.3 (2022-01-22)
18
+ ## 0.2.3 (2023-01-22)
11
19
 
12
20
  - Added `add_special_tokens` option to `encode` method
13
21
  - Added warning about `encode` method including special tokens by default in 0.3.0
14
22
  - Added more methods to `Encoding`
15
23
  - Fixed error with precompiled gem on Mac ARM
16
24
 
17
- ## 0.2.2 (2022-01-15)
25
+ ## 0.2.2 (2023-01-15)
18
26
 
19
27
  - Added precompiled gem for Linux ARM
20
28
  - Added `from_file` method
21
29
  - Fixed error with precompiled gem on Linux x86-64
22
30
 
23
- ## 0.2.1 (2022-01-12)
31
+ ## 0.2.1 (2023-01-12)
24
32
 
25
33
  - Added support for Ruby 3.2
26
34
 
data/Cargo.lock CHANGED
@@ -353,7 +353,8 @@ checksum = "58093314a45e00c77d5c508f76e77c3396afbbc0d01506e7fae47b018bac2b1d"
353
353
  [[package]]
354
354
  name = "magnus"
355
355
  version = "0.5.0"
356
- source = "git+https://github.com/matsadler/magnus#eda735faa7e03da2443eaf2c4058a184917d6b87"
356
+ source = "registry+https://github.com/rust-lang/crates.io-index"
357
+ checksum = "af37419a942477f606d227d0e6e92f3b68458bfc68fec3bc2629df6a2c1ccdf9"
357
358
  dependencies = [
358
359
  "magnus-macros",
359
360
  "rb-sys",
@@ -362,8 +363,9 @@ dependencies = [
362
363
 
363
364
  [[package]]
364
365
  name = "magnus-macros"
365
- version = "0.3.0"
366
- source = "git+https://github.com/matsadler/magnus#eda735faa7e03da2443eaf2c4058a184917d6b87"
366
+ version = "0.4.0"
367
+ source = "registry+https://github.com/rust-lang/crates.io-index"
368
+ checksum = "85aa71c9891b2732ff1157e1860a1ee578459fd25811fd3d72cc6e32b3fbdfea"
367
369
  dependencies = [
368
370
  "proc-macro2",
369
371
  "quote",
@@ -552,22 +554,25 @@ dependencies = [
552
554
 
553
555
  [[package]]
554
556
  name = "rb-sys"
555
- version = "0.9.64"
557
+ version = "0.9.65"
556
558
  source = "registry+https://github.com/rust-lang/crates.io-index"
557
- checksum = "cc8945662df8083245deda89e236647173cc7ad750f481ddcd7bbfd3afe3fa5e"
559
+ checksum = "e8fe617bad8e88fd7e5d6f432e35f09e5f94144dfb8e8ee4adde82fb920dc59b"
558
560
  dependencies = [
559
561
  "rb-sys-build",
560
562
  ]
561
563
 
562
564
  [[package]]
563
565
  name = "rb-sys-build"
564
- version = "0.9.64"
566
+ version = "0.9.65"
565
567
  source = "registry+https://github.com/rust-lang/crates.io-index"
566
- checksum = "ae8c3cdf9edc3908ee1555b7a1bca58ee1b499439b32cd1c1ec3e66736a8df48"
568
+ checksum = "007e63597f91c711cbb299e60fecbdb6f5ad4a066d6a20c81943893f1584c895"
567
569
  dependencies = [
568
570
  "bindgen",
571
+ "lazy_static",
572
+ "quote",
569
573
  "regex",
570
574
  "shell-words",
575
+ "syn",
571
576
  ]
572
577
 
573
578
  [[package]]
@@ -711,7 +716,7 @@ dependencies = [
711
716
 
712
717
  [[package]]
713
718
  name = "tokenizers"
714
- version = "0.2.3"
719
+ version = "0.3.1"
715
720
  dependencies = [
716
721
  "magnus",
717
722
  "onig",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "tokenizers"
3
- version = "0.2.3"
3
+ version = "0.3.1"
4
4
  license = "Apache-2.0"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -10,7 +10,7 @@ publish = false
10
10
  crate-type = ["cdylib"]
11
11
 
12
12
  [dependencies]
13
- magnus = { git = "https://github.com/matsadler/magnus" }
13
+ magnus = "0.5"
14
14
  onig = { version = "6.0", default-features = false }
15
15
  serde = { version = "1.0", features = ["rc", "derive"] }
16
16
 
@@ -44,7 +44,7 @@ module Tokenizers
44
44
  def cached_path(cache_dir, url, options)
45
45
  fsum = Digest::SHA256.hexdigest(url)
46
46
  meta_paths = Dir[File.join(cache_dir, "#{fsum}.*.meta")]
47
- meta = meta_paths.map { |f| JSON.load_file(f) }.max_by { |m| m["creation_time"] }
47
+ meta = meta_paths.map { |f| JSON.parse(File.read(f)) }.max_by { |m| m["creation_time"] }
48
48
  etag = meta["etag"] if meta
49
49
 
50
50
  if etag
@@ -1,3 +1,3 @@
1
1
  module Tokenizers
2
- VERSION = "0.3.0"
2
+ VERSION = "0.3.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tokenizers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-08 00:00:00.000000000 Z
11
+ date: 2023-03-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys