tokenizers 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4ff4d1ad7b56010f603ead7a4794c003c5294e50f1b33de62c8089ddf150d5ad
4
- data.tar.gz: 295aaabb720971f2ddcc832ab0d5deedf1e0ed8dab03aca96ac1d396b5723de7
3
+ metadata.gz: 11fdc53989bc2285096bc5eb4a971426e153375ada43bc8c51d2a191b42fe02d
4
+ data.tar.gz: 92d4d8bef2c4013d5cf0d55bff30bc070ca6e59430f87b96f9147075216a1c1d
5
5
  SHA512:
6
- metadata.gz: e14207004cddeef40590229ea2c8a9bf54e5c5b75cdbcdd32cd6f23c24feb8544fcabe86fa9bced32cb41f2581ee0df4d36ed2b6a58ef2fc668aa33c270659df
7
- data.tar.gz: b2bb202c8c37bdd0d14ca64be147e99b224128c1461f56761f1d58d9326b40768e7b903bbbb4c2a0363bd4b1c9ef5a66be53210ad801d5e45c7d86dd0945bd82
6
+ metadata.gz: 7c91c33078b6c5b23a6080908fa184e2222922f35bd6e1c439329525f4554f939cf98ca4dcd3017d858cdb6ec2a96f6671ce5dc425ca643d482b49b04af00f4c
7
+ data.tar.gz: 74aad6c458792570ace93107aacccb8a9df7b61228dd062bd7bcaf015a53d4f6b4b1fcace09c69bc959bd9b04598d315344d4edc7e6abbe1603f3b92d29ab711
data/CHANGELOG.md CHANGED
@@ -1,4 +1,12 @@
1
- ## 0.3.0 (2022-02-07)
1
+ ## 0.3.2 (2023-03-06)
2
+
3
+ - Added precompiled gem for Linux x86-64 MUSL
4
+
5
+ ## 0.3.1 (2023-02-08)
6
+
7
+ - Fixed error with Ruby 2.7
8
+
9
+ ## 0.3.0 (2023-02-07)
2
10
 
3
11
  - Added support for training tokenizers
4
12
  - Added more methods to `Tokenizer`
@@ -7,20 +15,20 @@
7
15
  - Changed `encode` method to include special tokens by default
8
16
  - Changed how offsets are calculated for strings with multibyte characters
9
17
 
10
- ## 0.2.3 (2022-01-22)
18
+ ## 0.2.3 (2023-01-22)
11
19
 
12
20
  - Added `add_special_tokens` option to `encode` method
13
21
  - Added warning about `encode` method including special tokens by default in 0.3.0
14
22
  - Added more methods to `Encoding`
15
23
  - Fixed error with precompiled gem on Mac ARM
16
24
 
17
- ## 0.2.2 (2022-01-15)
25
+ ## 0.2.2 (2023-01-15)
18
26
 
19
27
  - Added precompiled gem for Linux ARM
20
28
  - Added `from_file` method
21
29
  - Fixed error with precompiled gem on Linux x86-64
22
30
 
23
- ## 0.2.1 (2022-01-12)
31
+ ## 0.2.1 (2023-01-12)
24
32
 
25
33
  - Added support for Ruby 3.2
26
34
 
data/Cargo.lock CHANGED
@@ -353,7 +353,8 @@ checksum = "58093314a45e00c77d5c508f76e77c3396afbbc0d01506e7fae47b018bac2b1d"
353
353
  [[package]]
354
354
  name = "magnus"
355
355
  version = "0.5.0"
356
- source = "git+https://github.com/matsadler/magnus#eda735faa7e03da2443eaf2c4058a184917d6b87"
356
+ source = "registry+https://github.com/rust-lang/crates.io-index"
357
+ checksum = "af37419a942477f606d227d0e6e92f3b68458bfc68fec3bc2629df6a2c1ccdf9"
357
358
  dependencies = [
358
359
  "magnus-macros",
359
360
  "rb-sys",
@@ -362,8 +363,9 @@ dependencies = [
362
363
 
363
364
  [[package]]
364
365
  name = "magnus-macros"
365
- version = "0.3.0"
366
- source = "git+https://github.com/matsadler/magnus#eda735faa7e03da2443eaf2c4058a184917d6b87"
366
+ version = "0.4.0"
367
+ source = "registry+https://github.com/rust-lang/crates.io-index"
368
+ checksum = "85aa71c9891b2732ff1157e1860a1ee578459fd25811fd3d72cc6e32b3fbdfea"
367
369
  dependencies = [
368
370
  "proc-macro2",
369
371
  "quote",
@@ -552,22 +554,25 @@ dependencies = [
552
554
 
553
555
  [[package]]
554
556
  name = "rb-sys"
555
- version = "0.9.64"
557
+ version = "0.9.65"
556
558
  source = "registry+https://github.com/rust-lang/crates.io-index"
557
- checksum = "cc8945662df8083245deda89e236647173cc7ad750f481ddcd7bbfd3afe3fa5e"
559
+ checksum = "e8fe617bad8e88fd7e5d6f432e35f09e5f94144dfb8e8ee4adde82fb920dc59b"
558
560
  dependencies = [
559
561
  "rb-sys-build",
560
562
  ]
561
563
 
562
564
  [[package]]
563
565
  name = "rb-sys-build"
564
- version = "0.9.64"
566
+ version = "0.9.65"
565
567
  source = "registry+https://github.com/rust-lang/crates.io-index"
566
- checksum = "ae8c3cdf9edc3908ee1555b7a1bca58ee1b499439b32cd1c1ec3e66736a8df48"
568
+ checksum = "007e63597f91c711cbb299e60fecbdb6f5ad4a066d6a20c81943893f1584c895"
567
569
  dependencies = [
568
570
  "bindgen",
571
+ "lazy_static",
572
+ "quote",
569
573
  "regex",
570
574
  "shell-words",
575
+ "syn",
571
576
  ]
572
577
 
573
578
  [[package]]
@@ -711,7 +716,7 @@ dependencies = [
711
716
 
712
717
  [[package]]
713
718
  name = "tokenizers"
714
- version = "0.2.3"
719
+ version = "0.3.1"
715
720
  dependencies = [
716
721
  "magnus",
717
722
  "onig",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "tokenizers"
3
- version = "0.2.3"
3
+ version = "0.3.1"
4
4
  license = "Apache-2.0"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -10,7 +10,7 @@ publish = false
10
10
  crate-type = ["cdylib"]
11
11
 
12
12
  [dependencies]
13
- magnus = { git = "https://github.com/matsadler/magnus" }
13
+ magnus = "0.5"
14
14
  onig = { version = "6.0", default-features = false }
15
15
  serde = { version = "1.0", features = ["rc", "derive"] }
16
16
 
@@ -44,7 +44,7 @@ module Tokenizers
44
44
  def cached_path(cache_dir, url, options)
45
45
  fsum = Digest::SHA256.hexdigest(url)
46
46
  meta_paths = Dir[File.join(cache_dir, "#{fsum}.*.meta")]
47
- meta = meta_paths.map { |f| JSON.load_file(f) }.max_by { |m| m["creation_time"] }
47
+ meta = meta_paths.map { |f| JSON.parse(File.read(f)) }.max_by { |m| m["creation_time"] }
48
48
  etag = meta["etag"] if meta
49
49
 
50
50
  if etag
@@ -1,3 +1,3 @@
1
1
  module Tokenizers
2
- VERSION = "0.3.0"
2
+ VERSION = "0.3.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tokenizers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-08 00:00:00.000000000 Z
11
+ date: 2023-03-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys