tokenizers 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: aa9c5791104b7c5df1a6a2afe9c071ad0c03308e3393cbb1f6d2be87385b03d5
4
- data.tar.gz: 65fdbe5dff6259dfe13c9f3a0ee9a2330e1b4b11b872fdf98b8189f4c87232c0
3
+ metadata.gz: caf36f6de3318df84c66a3ff99963e1b57aeca78736892ad143f86c82bfde737
4
+ data.tar.gz: 1e8689c437f736aeb85e2163f5b4eb7a30b1b01ba3ec6c472afde16a7dd25040
5
5
  SHA512:
6
- metadata.gz: 2460479cc807467ff4c7b185aa2f49386a2465e2a8fb430acd6062210d522e550cd1b1aa401a9b902315cf51f0dcf0041c8e66cf3907b549a27bff7d8864228d
7
- data.tar.gz: c908e1daf079a35ac2e9da93fd43c7a02373f3aeefa06335830bec223984ed44e9c2748c15d5637e0c4e122a20d0f9f891c86244ea6c1050bbc60ac29762a3a9
6
+ metadata.gz: 88a48300b336c3afaf6ba2119835d548945419133e3f194b3ec8ed78c9a6b477389a4f60325c3233358e5f52b2e7b3ad79550a8e5b2828436757cad6796093ab
7
+ data.tar.gz: af05786f53957827b7094bbf464dd281c44839de13ecbb3811e2897a95c5fd42fe27573259b8d337b1c975746c20ea9051b128bf7a048784654bba6f5fa003e9
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.6.2 (2025-11-24)
2
+
3
+ - Updated Tokenizers to 0.22.1
4
+ - Fixed caching on Windows
5
+
1
6
  ## 0.6.1 (2025-09-17)
2
7
 
3
8
  - Fixed return type of `encode_batch` method
data/Cargo.lock CHANGED
@@ -385,9 +385,9 @@ checksum = "b8dd856d451cc0da70e2ef2ce95a18e39a93b7558bedf10201ad28503f918568"
385
385
 
386
386
  [[package]]
387
387
  name = "magnus"
388
- version = "0.8.1"
388
+ version = "0.8.2"
389
389
  source = "registry+https://github.com/rust-lang/crates.io-index"
390
- checksum = "bd2ac6e71886be00ac34db92aa732c793c5107c95191805b9a1c7e70e6d342e0"
390
+ checksum = "3b36a5b126bbe97eb0d02d07acfeb327036c6319fd816139a49824a83b7f9012"
391
391
  dependencies = [
392
392
  "magnus-macros",
393
393
  "rb-sys",
@@ -784,20 +784,20 @@ dependencies = [
784
784
 
785
785
  [[package]]
786
786
  name = "tokenizers"
787
- version = "0.6.1"
787
+ version = "0.6.2"
788
788
  dependencies = [
789
789
  "ahash",
790
790
  "magnus",
791
791
  "onig",
792
792
  "serde",
793
- "tokenizers 0.22.0",
793
+ "tokenizers 0.22.1",
794
794
  ]
795
795
 
796
796
  [[package]]
797
797
  name = "tokenizers"
798
- version = "0.22.0"
798
+ version = "0.22.1"
799
799
  source = "registry+https://github.com/rust-lang/crates.io-index"
800
- checksum = "af10f51be57162b69d90a15cb226eef12c9e4faecbd5e3ea98a86bfb920b3d71"
800
+ checksum = "6475a27088c98ea96d00b39a9ddfb63780d1ad4cceb6f48374349a96ab2b7842"
801
801
  dependencies = [
802
802
  "ahash",
803
803
  "aho-corasick",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "tokenizers"
3
- version = "0.6.1"
3
+ version = "0.6.2"
4
4
  license = "Apache-2.0"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -17,6 +17,6 @@ onig = { version = "6", default-features = false }
17
17
  serde = { version = "1", features = ["rc", "derive"] }
18
18
 
19
19
  [dependencies.tokenizers]
20
- version = "=0.22.0" # also update in from_pretrained.rb
20
+ version = "=0.22.1" # also update in from_pretrained.rb
21
21
  default-features = false
22
22
  features = ["progressbar", "onig", "esaxx_fast"]
@@ -1,7 +1,7 @@
1
1
  module Tokenizers
2
2
  module FromPretrained
3
3
  # for user agent
4
- TOKENIZERS_VERSION = "0.22.0"
4
+ TOKENIZERS_VERSION = "0.22.1"
5
5
 
6
6
  # use Ruby for downloads
7
7
  # this avoids the need to vendor OpenSSL on Linux
@@ -94,21 +94,29 @@ module Tokenizers
94
94
  end
95
95
 
96
96
  def cache_dir
97
- if ENV["TOKENIZERS_CACHE"]
98
- ENV["TOKENIZERS_CACHE"]
99
- else
100
- # use same directory as Rust version
101
- # https://docs.rs/dirs/latest/dirs/fn.cache_dir.html
102
- dir =
103
- if Gem.win_platform?
104
- ENV.fetch("LOCALAPPDATA")
105
- elsif mac?
106
- File.join(ENV.fetch("HOME"), "Library", "Caches")
107
- else
108
- ENV["XDG_CACHE_HOME"] || File.join(ENV.fetch("HOME"), ".cache")
109
- end
97
+ cache_dir =
98
+ if ENV["TOKENIZERS_CACHE"]
99
+ ENV["TOKENIZERS_CACHE"]
100
+ else
101
+ # use same directory as Rust version
102
+ # https://docs.rs/dirs/latest/dirs/fn.cache_dir.html
103
+ dir =
104
+ if Gem.win_platform?
105
+ ENV.fetch("LOCALAPPDATA")
106
+ elsif mac?
107
+ File.join(ENV.fetch("HOME"), "Library", "Caches")
108
+ else
109
+ ENV["XDG_CACHE_HOME"] || File.join(ENV.fetch("HOME"), ".cache")
110
+ end
111
+
112
+ File.join(dir, "huggingface", "tokenizers")
113
+ end
110
114
 
111
- File.join(dir, "huggingface", "tokenizers")
115
+ if Gem.win_platform?
116
+ # cannot use backslash for glob on Windows
117
+ cache_dir.gsub("\\", "/")
118
+ else
119
+ cache_dir
112
120
  end
113
121
  end
114
122
 
@@ -1,3 +1,3 @@
1
1
  module Tokenizers
2
- VERSION = "0.6.1"
2
+ VERSION = "0.6.2"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tokenizers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.1
4
+ version: 0.6.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane