durable_huggingface_hub 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.editorconfig +29 -0
- data/.rubocop.yml +108 -0
- data/CHANGELOG.md +127 -0
- data/README.md +547 -0
- data/Rakefile +106 -0
- data/devenv.lock +171 -0
- data/devenv.nix +15 -0
- data/devenv.yaml +8 -0
- data/huggingface_hub.gemspec +63 -0
- data/lib/durable_huggingface_hub/authentication.rb +245 -0
- data/lib/durable_huggingface_hub/cache.rb +508 -0
- data/lib/durable_huggingface_hub/configuration.rb +191 -0
- data/lib/durable_huggingface_hub/constants.rb +145 -0
- data/lib/durable_huggingface_hub/errors.rb +412 -0
- data/lib/durable_huggingface_hub/file_download.rb +831 -0
- data/lib/durable_huggingface_hub/hf_api.rb +1278 -0
- data/lib/durable_huggingface_hub/repo_card.rb +430 -0
- data/lib/durable_huggingface_hub/types/cache_info.rb +298 -0
- data/lib/durable_huggingface_hub/types/commit_info.rb +149 -0
- data/lib/durable_huggingface_hub/types/dataset_info.rb +158 -0
- data/lib/durable_huggingface_hub/types/model_info.rb +154 -0
- data/lib/durable_huggingface_hub/types/space_info.rb +158 -0
- data/lib/durable_huggingface_hub/types/user.rb +179 -0
- data/lib/durable_huggingface_hub/types.rb +205 -0
- data/lib/durable_huggingface_hub/utils/auth.rb +174 -0
- data/lib/durable_huggingface_hub/utils/headers.rb +220 -0
- data/lib/durable_huggingface_hub/utils/http.rb +329 -0
- data/lib/durable_huggingface_hub/utils/paths.rb +230 -0
- data/lib/durable_huggingface_hub/utils/progress.rb +217 -0
- data/lib/durable_huggingface_hub/utils/retry.rb +165 -0
- data/lib/durable_huggingface_hub/utils/validators.rb +236 -0
- data/lib/durable_huggingface_hub/version.rb +8 -0
- data/lib/huggingface_hub.rb +205 -0
- metadata +334 -0
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
require "fileutils"
|
|
5
|
+
|
|
6
|
+
module DurableHuggingfaceHub
|
|
7
|
+
module Utils
|
|
8
|
+
# Authentication token management utilities.
|
|
9
|
+
#
|
|
10
|
+
# This module provides functions for retrieving, storing, and managing
|
|
11
|
+
# HuggingFace authentication tokens.
|
|
12
|
+
module Auth
|
|
13
|
+
# File permissions for token storage (owner read/write only)
|
|
14
|
+
TOKEN_FILE_PERMISSIONS = 0o600
|
|
15
|
+
|
|
16
|
+
# Retrieves the authentication token from multiple sources.
|
|
17
|
+
#
|
|
18
|
+
# Priority order:
|
|
19
|
+
# 1. Explicitly provided token parameter
|
|
20
|
+
# 2. HF_TOKEN environment variable
|
|
21
|
+
# 3. HUGGING_FACE_HUB_TOKEN environment variable
|
|
22
|
+
# 4. Token file (~/.cache/huggingface/token)
|
|
23
|
+
#
|
|
24
|
+
# @param token [String, nil] Explicitly provided token
|
|
25
|
+
# @return [String, nil] Authentication token or nil if not found
|
|
26
|
+
#
|
|
27
|
+
# @example Explicit token
|
|
28
|
+
# Auth.get_token(token: "hf_...")
|
|
29
|
+
#
|
|
30
|
+
# @example From environment or file
|
|
31
|
+
# Auth.get_token # Checks ENV then file
|
|
32
|
+
def self.get_token(token: nil)
|
|
33
|
+
# Priority 1: Explicit parameter
|
|
34
|
+
return token if token && !token.empty?
|
|
35
|
+
|
|
36
|
+
# Priority 2: HF_TOKEN environment variable
|
|
37
|
+
env_token = ENV["HF_TOKEN"]
|
|
38
|
+
return env_token if env_token && !env_token.empty?
|
|
39
|
+
|
|
40
|
+
# Priority 3: HUGGING_FACE_HUB_TOKEN environment variable
|
|
41
|
+
legacy_token = ENV["HUGGING_FACE_HUB_TOKEN"]
|
|
42
|
+
return legacy_token if legacy_token && !legacy_token.empty?
|
|
43
|
+
|
|
44
|
+
# Priority 4: Token file
|
|
45
|
+
read_token_from_file
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Reads the authentication token from the token file.
|
|
49
|
+
#
|
|
50
|
+
# @return [String, nil] Token from file or nil if not found
|
|
51
|
+
def self.read_token_from_file
|
|
52
|
+
token_path = get_token_path
|
|
53
|
+
return nil unless File.exist?(token_path)
|
|
54
|
+
|
|
55
|
+
token = File.read(token_path).strip
|
|
56
|
+
token.empty? ? nil : token
|
|
57
|
+
rescue Errno::EACCES, Errno::ENOENT
|
|
58
|
+
nil
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Writes the authentication token to the token file.
|
|
62
|
+
#
|
|
63
|
+
# Creates the cache directory if it doesn't exist and sets
|
|
64
|
+
# appropriate file permissions for security.
|
|
65
|
+
#
|
|
66
|
+
# @param token [String] Token to store
|
|
67
|
+
# @return [Boolean] True if successful
|
|
68
|
+
# @raise [IOError] If unable to write token
|
|
69
|
+
#
|
|
70
|
+
# @example
|
|
71
|
+
# Auth.write_token_to_file("hf_...")
|
|
72
|
+
def self.write_token_to_file(token)
|
|
73
|
+
token_path = get_token_path
|
|
74
|
+
|
|
75
|
+
# Ensure cache directory exists
|
|
76
|
+
token_path.dirname.mkpath unless token_path.dirname.exist?
|
|
77
|
+
|
|
78
|
+
# Write token atomically
|
|
79
|
+
temp_path = Pathname.new("#{token_path}.tmp")
|
|
80
|
+
temp_path.write(token)
|
|
81
|
+
|
|
82
|
+
# Set restrictive permissions before moving
|
|
83
|
+
File.chmod(TOKEN_FILE_PERMISSIONS, temp_path)
|
|
84
|
+
|
|
85
|
+
# Atomic move
|
|
86
|
+
File.rename(temp_path, token_path)
|
|
87
|
+
|
|
88
|
+
true
|
|
89
|
+
rescue => e
|
|
90
|
+
# Clean up temp file if it exists
|
|
91
|
+
temp_path&.delete if temp_path&.exist?
|
|
92
|
+
raise IOError, "Failed to write token: #{e.message}"
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Deletes the token file.
|
|
96
|
+
#
|
|
97
|
+
# @return [Boolean] True if file was deleted, false if it didn't exist
|
|
98
|
+
def self.delete_token_file
|
|
99
|
+
token_path = get_token_path
|
|
100
|
+
return false unless token_path.exist?
|
|
101
|
+
|
|
102
|
+
token_path.delete
|
|
103
|
+
true
|
|
104
|
+
rescue Errno::EACCES, Errno::ENOENT
|
|
105
|
+
false
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Returns the path to the token file.
|
|
109
|
+
#
|
|
110
|
+
# @return [Pathname] Path to token file
|
|
111
|
+
def self.get_token_path
|
|
112
|
+
Configuration.instance.token_path
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Validates a token format.
|
|
116
|
+
#
|
|
117
|
+
# HuggingFace tokens typically start with "hf_" and are alphanumeric.
|
|
118
|
+
#
|
|
119
|
+
# @param token [String] Token to validate
|
|
120
|
+
# @return [Boolean] True if token format appears valid
|
|
121
|
+
#
|
|
122
|
+
# @example
|
|
123
|
+
# Auth.valid_token_format?("hf_abc123") # => true
|
|
124
|
+
# Auth.valid_token_format?("invalid") # => false
|
|
125
|
+
def self.valid_token_format?(token)
|
|
126
|
+
return false if token.nil? || token.empty?
|
|
127
|
+
|
|
128
|
+
# HuggingFace tokens start with "hf_" followed by alphanumeric characters
|
|
129
|
+
# Minimum reasonable length is around 10 characters
|
|
130
|
+
token.match?(/\Ahf_[A-Za-z0-9_-]{8,}\z/)
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Retrieves a token and raises an error if not found.
|
|
134
|
+
#
|
|
135
|
+
# @param token [String, nil] Explicitly provided token
|
|
136
|
+
# @return [String] Authentication token
|
|
137
|
+
# @raise [LocalTokenNotFoundError] If no token is available
|
|
138
|
+
#
|
|
139
|
+
# @example
|
|
140
|
+
# token = Auth.get_token! # Raises if not found
|
|
141
|
+
def self.get_token!(token: nil)
|
|
142
|
+
result = get_token(token: token)
|
|
143
|
+
return result if result
|
|
144
|
+
|
|
145
|
+
raise LocalTokenNotFoundError.new
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Masks a token for safe display.
|
|
149
|
+
#
|
|
150
|
+
# Shows first 7 characters and last 4 characters, masking the middle.
|
|
151
|
+
#
|
|
152
|
+
# @param token [String] Token to mask
|
|
153
|
+
# @return [String] Masked token
|
|
154
|
+
#
|
|
155
|
+
# @example
|
|
156
|
+
# Auth.mask_token("hf_abc123def456ghi789")
|
|
157
|
+
# # => "hf_abc1...h789"
|
|
158
|
+
def self.mask_token(token)
|
|
159
|
+
return "" if token.nil? || token.empty?
|
|
160
|
+
return token if token.length <= 11
|
|
161
|
+
|
|
162
|
+
if token.length <= 15
|
|
163
|
+
prefix = token[0, 4]
|
|
164
|
+
suffix = token[-1]
|
|
165
|
+
"#{prefix}...#{suffix}"
|
|
166
|
+
else
|
|
167
|
+
prefix = token[0, 7]
|
|
168
|
+
suffix = token[-4..]
|
|
169
|
+
"#{prefix}...#{suffix}"
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
end
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "validators"
|
|
4
|
+
|
|
5
|
+
module DurableHuggingfaceHub
|
|
6
|
+
module Utils
|
|
7
|
+
# HTTP header building utilities for HuggingFace Hub API requests.
|
|
8
|
+
#
|
|
9
|
+
# This module provides functions for constructing proper HTTP headers
|
|
10
|
+
# including User-Agent, Authorization, and custom headers.
|
|
11
|
+
module Headers
|
|
12
|
+
# Builds standard headers for HuggingFace Hub API requests.
|
|
13
|
+
#
|
|
14
|
+
# @param token [String, nil] Authentication token
|
|
15
|
+
# @param library_name [String, nil] Name of the library using this client
|
|
16
|
+
# @param library_version [String, nil] Version of the library
|
|
17
|
+
# @param user_agent [String, nil] Custom user agent string
|
|
18
|
+
# @param headers [Hash, nil] Additional custom headers
|
|
19
|
+
# @return [Hash] Complete headers hash
|
|
20
|
+
# @raise [ValidationError] If any parameter has invalid type or format
|
|
21
|
+
#
|
|
22
|
+
# @example Basic usage
|
|
23
|
+
# headers = Headers.build_hf_headers(token: "hf_...")
|
|
24
|
+
#
|
|
25
|
+
# @example With custom library info
|
|
26
|
+
# headers = Headers.build_hf_headers(
|
|
27
|
+
# token: "hf_...",
|
|
28
|
+
# library_name: "my_app",
|
|
29
|
+
# library_version: "1.0.0"
|
|
30
|
+
# )
|
|
31
|
+
def self.build_hf_headers(token: nil, library_name: nil, library_version: nil, user_agent: nil, headers: nil)
|
|
32
|
+
# Validate parameters
|
|
33
|
+
if token && !token.is_a?(String)
|
|
34
|
+
raise ValidationError.new("token", "Token must be a string")
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
if library_name && !library_name.is_a?(String)
|
|
38
|
+
raise ValidationError.new("library_name", "Library name must be a string")
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
if library_version && !library_version.is_a?(String)
|
|
42
|
+
raise ValidationError.new("library_version", "Library version must be a string")
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
if user_agent && !user_agent.is_a?(String)
|
|
46
|
+
raise ValidationError.new("user_agent", "User agent must be a string")
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
if headers && !headers.is_a?(Hash)
|
|
50
|
+
raise ValidationError.new("headers", "Custom headers must be a hash")
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
result = {}
|
|
54
|
+
|
|
55
|
+
# User-Agent header
|
|
56
|
+
result["User-Agent"] = build_user_agent(
|
|
57
|
+
library_name: library_name,
|
|
58
|
+
library_version: library_version,
|
|
59
|
+
custom_agent: user_agent
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
# Authorization header
|
|
63
|
+
if token
|
|
64
|
+
result["Authorization"] = "Bearer #{token}"
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Merge custom headers
|
|
68
|
+
if headers
|
|
69
|
+
result.merge!(headers)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
result
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Builds a User-Agent string for HTTP requests.
|
|
76
|
+
#
|
|
77
|
+
# Format: "[custom] [library/version] huggingface_hub/version; ruby/version"
|
|
78
|
+
#
|
|
79
|
+
# @param library_name [String, nil] Name of the calling library
|
|
80
|
+
# @param library_version [String, nil] Version of the calling library
|
|
81
|
+
# @param custom_agent [String, nil] Custom user agent to prepend
|
|
82
|
+
# @return [String] User-Agent string
|
|
83
|
+
# @raise [ValidationError] If any parameter has invalid type
|
|
84
|
+
#
|
|
85
|
+
# @example
|
|
86
|
+
# Headers.build_user_agent
|
|
87
|
+
# # => "huggingface_hub/0.1.0; ruby/3.3.0"
|
|
88
|
+
#
|
|
89
|
+
# @example With library info
|
|
90
|
+
# Headers.build_user_agent(library_name: "transformers", library_version: "4.0.0")
|
|
91
|
+
# # => "transformers/4.0.0 huggingface_hub/0.1.0; ruby/3.3.0"
|
|
92
|
+
def self.build_user_agent(library_name: nil, library_version: nil, custom_agent: nil)
|
|
93
|
+
# Validate parameters
|
|
94
|
+
if library_name && !library_name.is_a?(String)
|
|
95
|
+
raise ValidationError.new("library_name", "Library name must be a string")
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
if library_version && !library_version.is_a?(String)
|
|
99
|
+
raise ValidationError.new("library_version", "Library version must be a string")
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
if custom_agent && !custom_agent.is_a?(String)
|
|
103
|
+
raise ValidationError.new("custom_agent", "Custom agent must be a string")
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
parts = []
|
|
107
|
+
|
|
108
|
+
# Custom agent
|
|
109
|
+
parts << custom_agent if custom_agent
|
|
110
|
+
|
|
111
|
+
# Library identification
|
|
112
|
+
if library_name && library_version && !library_version.empty?
|
|
113
|
+
parts << "#{library_name}/#{library_version}"
|
|
114
|
+
elsif library_name && !library_name.empty?
|
|
115
|
+
parts << library_name
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# HuggingFace Hub client identification
|
|
119
|
+
hf_part = "huggingface_hub/#{DurableHuggingfaceHub::VERSION}"
|
|
120
|
+
|
|
121
|
+
# Ruby version
|
|
122
|
+
ruby_part = "ruby/#{RUBY_VERSION}"
|
|
123
|
+
|
|
124
|
+
# Join library/custom parts with space, then add hf; ruby
|
|
125
|
+
library_part = parts.empty? ? "" : "#{parts.join(" ")} "
|
|
126
|
+
"#{library_part}#{hf_part}; #{ruby_part}"
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Extracts request ID from response headers.
|
|
130
|
+
#
|
|
131
|
+
# @param headers [Hash] Response headers
|
|
132
|
+
# @return [String, nil] Request ID if present
|
|
133
|
+
# @raise [ValidationError] If headers is not a hash
|
|
134
|
+
def self.extract_request_id(headers)
|
|
135
|
+
if headers && !headers.is_a?(Hash)
|
|
136
|
+
raise ValidationError.new("headers", "Headers must be a hash")
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
return nil unless headers
|
|
140
|
+
|
|
141
|
+
# Try common header names
|
|
142
|
+
headers["X-Request-Id"] ||
|
|
143
|
+
headers["x-request-id"] ||
|
|
144
|
+
headers["Request-Id"] ||
|
|
145
|
+
headers["request-id"]
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Extracts commit SHA from response headers.
|
|
149
|
+
#
|
|
150
|
+
# @param headers [Hash] Response headers
|
|
151
|
+
# @return [String, nil] Commit SHA if present
|
|
152
|
+
# @raise [ValidationError] If headers is not a hash
|
|
153
|
+
def self.extract_commit_sha(headers)
|
|
154
|
+
if headers && !headers.is_a?(Hash)
|
|
155
|
+
raise ValidationError.new("headers", "Headers must be a hash")
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
return nil unless headers
|
|
159
|
+
|
|
160
|
+
headers[Constants::HEADER_X_REPO_COMMIT] ||
|
|
161
|
+
headers[Constants::HEADER_X_REPO_COMMIT.downcase]
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Extracts ETag from response headers.
|
|
165
|
+
#
|
|
166
|
+
# @param headers [Hash] Response headers
|
|
167
|
+
# @return [String, nil] ETag value (with quotes removed)
|
|
168
|
+
# @raise [ValidationError] If headers is not a hash
|
|
169
|
+
def self.extract_etag(headers)
|
|
170
|
+
if headers && !headers.is_a?(Hash)
|
|
171
|
+
raise ValidationError.new("headers", "Headers must be a hash")
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
return nil unless headers
|
|
175
|
+
|
|
176
|
+
etag = headers["ETag"] || headers["etag"]
|
|
177
|
+
return nil unless etag
|
|
178
|
+
|
|
179
|
+
# Remove surrounding quotes if present
|
|
180
|
+
etag.gsub(/^"|"$/, "")
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Extracts linked file size from response headers.
|
|
184
|
+
#
|
|
185
|
+
# @param headers [Hash] Response headers
|
|
186
|
+
# @return [Integer, nil] File size in bytes
|
|
187
|
+
# @raise [ValidationError] If headers is not a hash
|
|
188
|
+
def self.extract_linked_size(headers)
|
|
189
|
+
if headers && !headers.is_a?(Hash)
|
|
190
|
+
raise ValidationError.new("headers", "Headers must be a hash")
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
return nil unless headers
|
|
194
|
+
|
|
195
|
+
size = headers[Constants::HEADER_X_LINKED_SIZE] ||
|
|
196
|
+
headers[Constants::HEADER_X_LINKED_SIZE.downcase]
|
|
197
|
+
|
|
198
|
+
size&.to_i
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
# Checks if response indicates the file is stored in LFS.
|
|
202
|
+
#
|
|
203
|
+
# @param headers [Hash] Response headers
|
|
204
|
+
# @return [Boolean] True if file is in LFS
|
|
205
|
+
# @raise [ValidationError] If headers is not a hash
|
|
206
|
+
def self.lfs_file?(headers)
|
|
207
|
+
if headers && !headers.is_a?(Hash)
|
|
208
|
+
raise ValidationError.new("headers", "Headers must be a hash")
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
return false unless headers
|
|
212
|
+
|
|
213
|
+
etag = headers[Constants::HEADER_X_LINKED_ETAG] ||
|
|
214
|
+
headers[Constants::HEADER_X_LINKED_ETAG.downcase]
|
|
215
|
+
|
|
216
|
+
!etag.nil?
|
|
217
|
+
end
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
end
|