durable_huggingface_hub 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +7 -0
  2. data/.editorconfig +29 -0
  3. data/.rubocop.yml +108 -0
  4. data/CHANGELOG.md +127 -0
  5. data/README.md +547 -0
  6. data/Rakefile +106 -0
  7. data/devenv.lock +171 -0
  8. data/devenv.nix +15 -0
  9. data/devenv.yaml +8 -0
  10. data/huggingface_hub.gemspec +63 -0
  11. data/lib/durable_huggingface_hub/authentication.rb +245 -0
  12. data/lib/durable_huggingface_hub/cache.rb +508 -0
  13. data/lib/durable_huggingface_hub/configuration.rb +191 -0
  14. data/lib/durable_huggingface_hub/constants.rb +145 -0
  15. data/lib/durable_huggingface_hub/errors.rb +412 -0
  16. data/lib/durable_huggingface_hub/file_download.rb +831 -0
  17. data/lib/durable_huggingface_hub/hf_api.rb +1278 -0
  18. data/lib/durable_huggingface_hub/repo_card.rb +430 -0
  19. data/lib/durable_huggingface_hub/types/cache_info.rb +298 -0
  20. data/lib/durable_huggingface_hub/types/commit_info.rb +149 -0
  21. data/lib/durable_huggingface_hub/types/dataset_info.rb +158 -0
  22. data/lib/durable_huggingface_hub/types/model_info.rb +154 -0
  23. data/lib/durable_huggingface_hub/types/space_info.rb +158 -0
  24. data/lib/durable_huggingface_hub/types/user.rb +179 -0
  25. data/lib/durable_huggingface_hub/types.rb +205 -0
  26. data/lib/durable_huggingface_hub/utils/auth.rb +174 -0
  27. data/lib/durable_huggingface_hub/utils/headers.rb +220 -0
  28. data/lib/durable_huggingface_hub/utils/http.rb +329 -0
  29. data/lib/durable_huggingface_hub/utils/paths.rb +230 -0
  30. data/lib/durable_huggingface_hub/utils/progress.rb +217 -0
  31. data/lib/durable_huggingface_hub/utils/retry.rb +165 -0
  32. data/lib/durable_huggingface_hub/utils/validators.rb +236 -0
  33. data/lib/durable_huggingface_hub/version.rb +8 -0
  34. data/lib/huggingface_hub.rb +205 -0
  35. metadata +334 -0
@@ -0,0 +1,174 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+ require "fileutils"
5
+
6
+ module DurableHuggingfaceHub
7
+ module Utils
8
+ # Authentication token management utilities.
9
+ #
10
+ # This module provides functions for retrieving, storing, and managing
11
+ # HuggingFace authentication tokens.
12
+ module Auth
13
+ # File permissions for token storage (owner read/write only)
14
+ TOKEN_FILE_PERMISSIONS = 0o600
15
+
16
+ # Retrieves the authentication token from multiple sources.
17
+ #
18
+ # Priority order:
19
+ # 1. Explicitly provided token parameter
20
+ # 2. HF_TOKEN environment variable
21
+ # 3. HUGGING_FACE_HUB_TOKEN environment variable
22
+ # 4. Token file (~/.cache/huggingface/token)
23
+ #
24
+ # @param token [String, nil] Explicitly provided token
25
+ # @return [String, nil] Authentication token or nil if not found
26
+ #
27
+ # @example Explicit token
28
+ # Auth.get_token(token: "hf_...")
29
+ #
30
+ # @example From environment or file
31
+ # Auth.get_token # Checks ENV then file
32
+ def self.get_token(token: nil)
33
+ # Priority 1: Explicit parameter
34
+ return token if token && !token.empty?
35
+
36
+ # Priority 2: HF_TOKEN environment variable
37
+ env_token = ENV["HF_TOKEN"]
38
+ return env_token if env_token && !env_token.empty?
39
+
40
+ # Priority 3: HUGGING_FACE_HUB_TOKEN environment variable
41
+ legacy_token = ENV["HUGGING_FACE_HUB_TOKEN"]
42
+ return legacy_token if legacy_token && !legacy_token.empty?
43
+
44
+ # Priority 4: Token file
45
+ read_token_from_file
46
+ end
47
+
48
+ # Reads the authentication token from the token file.
49
+ #
50
+ # @return [String, nil] Token from file or nil if not found
51
+ def self.read_token_from_file
52
+ token_path = get_token_path
53
+ return nil unless File.exist?(token_path)
54
+
55
+ token = File.read(token_path).strip
56
+ token.empty? ? nil : token
57
+ rescue Errno::EACCES, Errno::ENOENT
58
+ nil
59
+ end
60
+
61
+ # Writes the authentication token to the token file.
62
+ #
63
+ # Creates the cache directory if it doesn't exist and sets
64
+ # appropriate file permissions for security.
65
+ #
66
+ # @param token [String] Token to store
67
+ # @return [Boolean] True if successful
68
+ # @raise [IOError] If unable to write token
69
+ #
70
+ # @example
71
+ # Auth.write_token_to_file("hf_...")
72
+ def self.write_token_to_file(token)
73
+ token_path = get_token_path
74
+
75
+ # Ensure cache directory exists
76
+ token_path.dirname.mkpath unless token_path.dirname.exist?
77
+
78
+ # Write token atomically
79
+ temp_path = Pathname.new("#{token_path}.tmp")
80
+ temp_path.write(token)
81
+
82
+ # Set restrictive permissions before moving
83
+ File.chmod(TOKEN_FILE_PERMISSIONS, temp_path)
84
+
85
+ # Atomic move
86
+ File.rename(temp_path, token_path)
87
+
88
+ true
89
+ rescue => e
90
+ # Clean up temp file if it exists
91
+ temp_path&.delete if temp_path&.exist?
92
+ raise IOError, "Failed to write token: #{e.message}"
93
+ end
94
+
95
+ # Deletes the token file.
96
+ #
97
+ # @return [Boolean] True if file was deleted, false if it didn't exist
98
+ def self.delete_token_file
99
+ token_path = get_token_path
100
+ return false unless token_path.exist?
101
+
102
+ token_path.delete
103
+ true
104
+ rescue Errno::EACCES, Errno::ENOENT
105
+ false
106
+ end
107
+
108
+ # Returns the path to the token file.
109
+ #
110
+ # @return [Pathname] Path to token file
111
+ def self.get_token_path
112
+ Configuration.instance.token_path
113
+ end
114
+
115
+ # Validates a token format.
116
+ #
117
+ # HuggingFace tokens typically start with "hf_" and are alphanumeric.
118
+ #
119
+ # @param token [String] Token to validate
120
+ # @return [Boolean] True if token format appears valid
121
+ #
122
+ # @example
123
+ # Auth.valid_token_format?("hf_abc123") # => true
124
+ # Auth.valid_token_format?("invalid") # => false
125
+ def self.valid_token_format?(token)
126
+ return false if token.nil? || token.empty?
127
+
128
+ # HuggingFace tokens start with "hf_" followed by alphanumeric characters
129
+ # Minimum reasonable length is around 10 characters
130
+ token.match?(/\Ahf_[A-Za-z0-9_-]{8,}\z/)
131
+ end
132
+
133
+ # Retrieves a token and raises an error if not found.
134
+ #
135
+ # @param token [String, nil] Explicitly provided token
136
+ # @return [String] Authentication token
137
+ # @raise [LocalTokenNotFoundError] If no token is available
138
+ #
139
+ # @example
140
+ # token = Auth.get_token! # Raises if not found
141
+ def self.get_token!(token: nil)
142
+ result = get_token(token: token)
143
+ return result if result
144
+
145
+ raise LocalTokenNotFoundError.new
146
+ end
147
+
148
+ # Masks a token for safe display.
149
+ #
150
+ # Shows first 7 characters and last 4 characters, masking the middle.
151
+ #
152
+ # @param token [String] Token to mask
153
+ # @return [String] Masked token
154
+ #
155
+ # @example
156
+ # Auth.mask_token("hf_abc123def456ghi789")
157
+ # # => "hf_abc1...h789"
158
+ def self.mask_token(token)
159
+ return "" if token.nil? || token.empty?
160
+ return token if token.length <= 11
161
+
162
+ if token.length <= 15
163
+ prefix = token[0, 4]
164
+ suffix = token[-1]
165
+ "#{prefix}...#{suffix}"
166
+ else
167
+ prefix = token[0, 7]
168
+ suffix = token[-4..]
169
+ "#{prefix}...#{suffix}"
170
+ end
171
+ end
172
+ end
173
+ end
174
+ end
@@ -0,0 +1,220 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "validators"
4
+
5
+ module DurableHuggingfaceHub
6
+ module Utils
7
+ # HTTP header building utilities for HuggingFace Hub API requests.
8
+ #
9
+ # This module provides functions for constructing proper HTTP headers
10
+ # including User-Agent, Authorization, and custom headers.
11
+ module Headers
12
+ # Builds standard headers for HuggingFace Hub API requests.
13
+ #
14
+ # @param token [String, nil] Authentication token
15
+ # @param library_name [String, nil] Name of the library using this client
16
+ # @param library_version [String, nil] Version of the library
17
+ # @param user_agent [String, nil] Custom user agent string
18
+ # @param headers [Hash, nil] Additional custom headers
19
+ # @return [Hash] Complete headers hash
20
+ # @raise [ValidationError] If any parameter has invalid type or format
21
+ #
22
+ # @example Basic usage
23
+ # headers = Headers.build_hf_headers(token: "hf_...")
24
+ #
25
+ # @example With custom library info
26
+ # headers = Headers.build_hf_headers(
27
+ # token: "hf_...",
28
+ # library_name: "my_app",
29
+ # library_version: "1.0.0"
30
+ # )
31
+ def self.build_hf_headers(token: nil, library_name: nil, library_version: nil, user_agent: nil, headers: nil)
32
+ # Validate parameters
33
+ if token && !token.is_a?(String)
34
+ raise ValidationError.new("token", "Token must be a string")
35
+ end
36
+
37
+ if library_name && !library_name.is_a?(String)
38
+ raise ValidationError.new("library_name", "Library name must be a string")
39
+ end
40
+
41
+ if library_version && !library_version.is_a?(String)
42
+ raise ValidationError.new("library_version", "Library version must be a string")
43
+ end
44
+
45
+ if user_agent && !user_agent.is_a?(String)
46
+ raise ValidationError.new("user_agent", "User agent must be a string")
47
+ end
48
+
49
+ if headers && !headers.is_a?(Hash)
50
+ raise ValidationError.new("headers", "Custom headers must be a hash")
51
+ end
52
+
53
+ result = {}
54
+
55
+ # User-Agent header
56
+ result["User-Agent"] = build_user_agent(
57
+ library_name: library_name,
58
+ library_version: library_version,
59
+ custom_agent: user_agent
60
+ )
61
+
62
+ # Authorization header
63
+ if token
64
+ result["Authorization"] = "Bearer #{token}"
65
+ end
66
+
67
+ # Merge custom headers
68
+ if headers
69
+ result.merge!(headers)
70
+ end
71
+
72
+ result
73
+ end
74
+
75
+ # Builds a User-Agent string for HTTP requests.
76
+ #
77
+ # Format: "[custom] [library/version] huggingface_hub/version; ruby/version"
78
+ #
79
+ # @param library_name [String, nil] Name of the calling library
80
+ # @param library_version [String, nil] Version of the calling library
81
+ # @param custom_agent [String, nil] Custom user agent to prepend
82
+ # @return [String] User-Agent string
83
+ # @raise [ValidationError] If any parameter has invalid type
84
+ #
85
+ # @example
86
+ # Headers.build_user_agent
87
+ # # => "huggingface_hub/0.1.0; ruby/3.3.0"
88
+ #
89
+ # @example With library info
90
+ # Headers.build_user_agent(library_name: "transformers", library_version: "4.0.0")
91
+ # # => "transformers/4.0.0 huggingface_hub/0.1.0; ruby/3.3.0"
92
+ def self.build_user_agent(library_name: nil, library_version: nil, custom_agent: nil)
93
+ # Validate parameters
94
+ if library_name && !library_name.is_a?(String)
95
+ raise ValidationError.new("library_name", "Library name must be a string")
96
+ end
97
+
98
+ if library_version && !library_version.is_a?(String)
99
+ raise ValidationError.new("library_version", "Library version must be a string")
100
+ end
101
+
102
+ if custom_agent && !custom_agent.is_a?(String)
103
+ raise ValidationError.new("custom_agent", "Custom agent must be a string")
104
+ end
105
+
106
+ parts = []
107
+
108
+ # Custom agent
109
+ parts << custom_agent if custom_agent
110
+
111
+ # Library identification
112
+ if library_name && library_version && !library_version.empty?
113
+ parts << "#{library_name}/#{library_version}"
114
+ elsif library_name && !library_name.empty?
115
+ parts << library_name
116
+ end
117
+
118
+ # HuggingFace Hub client identification
119
+ hf_part = "huggingface_hub/#{DurableHuggingfaceHub::VERSION}"
120
+
121
+ # Ruby version
122
+ ruby_part = "ruby/#{RUBY_VERSION}"
123
+
124
+ # Join library/custom parts with space, then add hf; ruby
125
+ library_part = parts.empty? ? "" : "#{parts.join(" ")} "
126
+ "#{library_part}#{hf_part}; #{ruby_part}"
127
+ end
128
+
129
+ # Extracts request ID from response headers.
130
+ #
131
+ # @param headers [Hash] Response headers
132
+ # @return [String, nil] Request ID if present
133
+ # @raise [ValidationError] If headers is not a hash
134
+ def self.extract_request_id(headers)
135
+ if headers && !headers.is_a?(Hash)
136
+ raise ValidationError.new("headers", "Headers must be a hash")
137
+ end
138
+
139
+ return nil unless headers
140
+
141
+ # Try common header names
142
+ headers["X-Request-Id"] ||
143
+ headers["x-request-id"] ||
144
+ headers["Request-Id"] ||
145
+ headers["request-id"]
146
+ end
147
+
148
+ # Extracts commit SHA from response headers.
149
+ #
150
+ # @param headers [Hash] Response headers
151
+ # @return [String, nil] Commit SHA if present
152
+ # @raise [ValidationError] If headers is not a hash
153
+ def self.extract_commit_sha(headers)
154
+ if headers && !headers.is_a?(Hash)
155
+ raise ValidationError.new("headers", "Headers must be a hash")
156
+ end
157
+
158
+ return nil unless headers
159
+
160
+ headers[Constants::HEADER_X_REPO_COMMIT] ||
161
+ headers[Constants::HEADER_X_REPO_COMMIT.downcase]
162
+ end
163
+
164
+ # Extracts ETag from response headers.
165
+ #
166
+ # @param headers [Hash] Response headers
167
+ # @return [String, nil] ETag value (with quotes removed)
168
+ # @raise [ValidationError] If headers is not a hash
169
+ def self.extract_etag(headers)
170
+ if headers && !headers.is_a?(Hash)
171
+ raise ValidationError.new("headers", "Headers must be a hash")
172
+ end
173
+
174
+ return nil unless headers
175
+
176
+ etag = headers["ETag"] || headers["etag"]
177
+ return nil unless etag
178
+
179
+ # Remove surrounding quotes if present
180
+ etag.gsub(/^"|"$/, "")
181
+ end
182
+
183
+ # Extracts linked file size from response headers.
184
+ #
185
+ # @param headers [Hash] Response headers
186
+ # @return [Integer, nil] File size in bytes
187
+ # @raise [ValidationError] If headers is not a hash
188
+ def self.extract_linked_size(headers)
189
+ if headers && !headers.is_a?(Hash)
190
+ raise ValidationError.new("headers", "Headers must be a hash")
191
+ end
192
+
193
+ return nil unless headers
194
+
195
+ size = headers[Constants::HEADER_X_LINKED_SIZE] ||
196
+ headers[Constants::HEADER_X_LINKED_SIZE.downcase]
197
+
198
+ size&.to_i
199
+ end
200
+
201
+ # Checks if response indicates the file is stored in LFS.
202
+ #
203
+ # @param headers [Hash] Response headers
204
+ # @return [Boolean] True if file is in LFS
205
+ # @raise [ValidationError] If headers is not a hash
206
+ def self.lfs_file?(headers)
207
+ if headers && !headers.is_a?(Hash)
208
+ raise ValidationError.new("headers", "Headers must be a hash")
209
+ end
210
+
211
+ return false unless headers
212
+
213
+ etag = headers[Constants::HEADER_X_LINKED_ETAG] ||
214
+ headers[Constants::HEADER_X_LINKED_ETAG.downcase]
215
+
216
+ !etag.nil?
217
+ end
218
+ end
219
+ end
220
+ end