tokenizers 0.6.1-arm64-darwin → 0.6.2-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5c4029dd67103ec66ee5450b0432a72b8be3ee81d9b1f5c694c0f2328f7b4f6d
4
- data.tar.gz: 81d9f62024ecab72f733775212bbc9895815107ee2d525914d40ea566b9ae2b7
3
+ metadata.gz: 3f3a073c8f69107784a3d85486add4dcfef820b47e7eda88192e9d1f978ff18b
4
+ data.tar.gz: 62114f4f5db440c3c548fc6feb24378d61478409c0e790315098251410ea70ba
5
5
  SHA512:
6
- metadata.gz: d6b8eb85583aaac0749b6863e1e76ceb7dcc32352077692ff63255a7aa22784781babfe2c341f6b04ad3d6c20d47adf5b82e13489cc4ac3aa11ba053423b3b90
7
- data.tar.gz: fcfb3464228edb76632894500a94e56d90a71d921955ae9f8b9fe0a891f2ef16a36bdb071ee1a6e031dea9a3348fb195eb4d5a8b9408078237b1c813be247e5e
6
+ metadata.gz: 47d565695d6b23a1edbe46453ca95a036c6313965c8b333b0243b8942f2a651ac2874bc34ab2d401ca4ba89614bb94557a9afa2f92a32c0c5f7c39db6c0f3bdc
7
+ data.tar.gz: e492b08a77138cfe1e8f1ddf74101ce046e230abbfdbf5a3543c316f92146b6bc3d3a06383bc1d2b08e34bf90e72b4f9c04ddebfff51aabdfef22fe561fcb2bf
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.6.2 (2025-11-24)
2
+
3
+ - Updated Tokenizers to 0.22.1
4
+ - Fixed caching on Windows
5
+
1
6
  ## 0.6.1 (2025-09-17)
2
7
 
3
8
  - Fixed return type of `encode_batch` method
data/Cargo.lock CHANGED
@@ -385,9 +385,9 @@ checksum = "b8dd856d451cc0da70e2ef2ce95a18e39a93b7558bedf10201ad28503f918568"
385
385
 
386
386
  [[package]]
387
387
  name = "magnus"
388
- version = "0.8.1"
388
+ version = "0.8.2"
389
389
  source = "registry+https://github.com/rust-lang/crates.io-index"
390
- checksum = "bd2ac6e71886be00ac34db92aa732c793c5107c95191805b9a1c7e70e6d342e0"
390
+ checksum = "3b36a5b126bbe97eb0d02d07acfeb327036c6319fd816139a49824a83b7f9012"
391
391
  dependencies = [
392
392
  "magnus-macros",
393
393
  "rb-sys",
@@ -784,20 +784,20 @@ dependencies = [
784
784
 
785
785
  [[package]]
786
786
  name = "tokenizers"
787
- version = "0.6.1"
787
+ version = "0.6.2"
788
788
  dependencies = [
789
789
  "ahash",
790
790
  "magnus",
791
791
  "onig",
792
792
  "serde",
793
- "tokenizers 0.22.0",
793
+ "tokenizers 0.22.1",
794
794
  ]
795
795
 
796
796
  [[package]]
797
797
  name = "tokenizers"
798
- version = "0.22.0"
798
+ version = "0.22.1"
799
799
  source = "registry+https://github.com/rust-lang/crates.io-index"
800
- checksum = "af10f51be57162b69d90a15cb226eef12c9e4faecbd5e3ea98a86bfb920b3d71"
800
+ checksum = "6475a27088c98ea96d00b39a9ddfb63780d1ad4cceb6f48374349a96ab2b7842"
801
801
  dependencies = [
802
802
  "ahash",
803
803
  "aho-corasick",
@@ -158,7 +158,7 @@ macro_rules_attribute-proc_macro v0.2.0
158
158
  https://github.com/danielhenrymantilla/macro_rules_attribute-rs
159
159
  MIT
160
160
 
161
- magnus v0.8.1
161
+ magnus v0.8.2
162
162
  https://github.com/matsadler/magnus
163
163
  MIT
164
164
 
@@ -338,7 +338,7 @@ thiserror-impl v2.0.12
338
338
  https://github.com/dtolnay/thiserror
339
339
  MIT OR Apache-2.0
340
340
 
341
- tokenizers v0.22.0
341
+ tokenizers v0.22.1
342
342
  https://github.com/huggingface/tokenizers
343
343
  Apache-2.0
344
344
 
Binary file
Binary file
Binary file
@@ -1,7 +1,7 @@
1
1
  module Tokenizers
2
2
  module FromPretrained
3
3
  # for user agent
4
- TOKENIZERS_VERSION = "0.22.0"
4
+ TOKENIZERS_VERSION = "0.22.1"
5
5
 
6
6
  # use Ruby for downloads
7
7
  # this avoids the need to vendor OpenSSL on Linux
@@ -94,21 +94,29 @@ module Tokenizers
94
94
  end
95
95
 
96
96
  def cache_dir
97
- if ENV["TOKENIZERS_CACHE"]
98
- ENV["TOKENIZERS_CACHE"]
99
- else
100
- # use same directory as Rust version
101
- # https://docs.rs/dirs/latest/dirs/fn.cache_dir.html
102
- dir =
103
- if Gem.win_platform?
104
- ENV.fetch("LOCALAPPDATA")
105
- elsif mac?
106
- File.join(ENV.fetch("HOME"), "Library", "Caches")
107
- else
108
- ENV["XDG_CACHE_HOME"] || File.join(ENV.fetch("HOME"), ".cache")
109
- end
97
+ cache_dir =
98
+ if ENV["TOKENIZERS_CACHE"]
99
+ ENV["TOKENIZERS_CACHE"]
100
+ else
101
+ # use same directory as Rust version
102
+ # https://docs.rs/dirs/latest/dirs/fn.cache_dir.html
103
+ dir =
104
+ if Gem.win_platform?
105
+ ENV.fetch("LOCALAPPDATA")
106
+ elsif mac?
107
+ File.join(ENV.fetch("HOME"), "Library", "Caches")
108
+ else
109
+ ENV["XDG_CACHE_HOME"] || File.join(ENV.fetch("HOME"), ".cache")
110
+ end
111
+
112
+ File.join(dir, "huggingface", "tokenizers")
113
+ end
110
114
 
111
- File.join(dir, "huggingface", "tokenizers")
115
+ if Gem.win_platform?
116
+ # cannot use backslash for glob on Windows
117
+ cache_dir.gsub("\\", "/")
118
+ else
119
+ cache_dir
112
120
  end
113
121
  end
114
122
 
@@ -1,3 +1,3 @@
1
1
  module Tokenizers
2
- VERSION = "0.6.1"
2
+ VERSION = "0.6.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tokenizers
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.1
4
+ version: 0.6.2
5
5
  platform: arm64-darwin
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-09-17 00:00:00.000000000 Z
11
+ date: 2025-11-25 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org