durable_huggingface_hub 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +7 -0
  2. data/.editorconfig +29 -0
  3. data/.rubocop.yml +108 -0
  4. data/CHANGELOG.md +127 -0
  5. data/README.md +547 -0
  6. data/Rakefile +106 -0
  7. data/devenv.lock +171 -0
  8. data/devenv.nix +15 -0
  9. data/devenv.yaml +8 -0
  10. data/huggingface_hub.gemspec +63 -0
  11. data/lib/durable_huggingface_hub/authentication.rb +245 -0
  12. data/lib/durable_huggingface_hub/cache.rb +508 -0
  13. data/lib/durable_huggingface_hub/configuration.rb +191 -0
  14. data/lib/durable_huggingface_hub/constants.rb +145 -0
  15. data/lib/durable_huggingface_hub/errors.rb +412 -0
  16. data/lib/durable_huggingface_hub/file_download.rb +831 -0
  17. data/lib/durable_huggingface_hub/hf_api.rb +1278 -0
  18. data/lib/durable_huggingface_hub/repo_card.rb +430 -0
  19. data/lib/durable_huggingface_hub/types/cache_info.rb +298 -0
  20. data/lib/durable_huggingface_hub/types/commit_info.rb +149 -0
  21. data/lib/durable_huggingface_hub/types/dataset_info.rb +158 -0
  22. data/lib/durable_huggingface_hub/types/model_info.rb +154 -0
  23. data/lib/durable_huggingface_hub/types/space_info.rb +158 -0
  24. data/lib/durable_huggingface_hub/types/user.rb +179 -0
  25. data/lib/durable_huggingface_hub/types.rb +205 -0
  26. data/lib/durable_huggingface_hub/utils/auth.rb +174 -0
  27. data/lib/durable_huggingface_hub/utils/headers.rb +220 -0
  28. data/lib/durable_huggingface_hub/utils/http.rb +329 -0
  29. data/lib/durable_huggingface_hub/utils/paths.rb +230 -0
  30. data/lib/durable_huggingface_hub/utils/progress.rb +217 -0
  31. data/lib/durable_huggingface_hub/utils/retry.rb +165 -0
  32. data/lib/durable_huggingface_hub/utils/validators.rb +236 -0
  33. data/lib/durable_huggingface_hub/version.rb +8 -0
  34. data/lib/huggingface_hub.rb +205 -0
  35. metadata +334 -0
@@ -0,0 +1,145 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DurableHuggingfaceHub
4
+ # Constants used throughout the HuggingFace Hub client library.
5
+ #
6
+ # This module contains URL endpoints, file patterns, timeout values, size limits,
7
+ # and other configuration constants required for interacting with the HuggingFace Hub API.
8
+ module Constants
9
+ # Default HuggingFace Hub endpoint URL
10
+ ENDPOINT = "https://huggingface.co"
11
+
12
+ # HuggingFace Hub home URL
13
+ HUGGINGFACE_CO_URL_HOME = "https://huggingface.co/"
14
+
15
+ # Template for constructing HuggingFace Hub URLs
16
+ HUGGINGFACE_CO_URL_TEMPLATE = "https://huggingface.co/{repo_id}/resolve/{revision}/{filename}"
17
+
18
+ # Default inference endpoint URL
19
+ INFERENCE_ENDPOINT = "https://api-inference.huggingface.co"
20
+
21
+ # Inference endpoints API URL
22
+ INFERENCE_ENDPOINTS_ENDPOINT = "https://api.endpoints.huggingface.cloud/v2"
23
+
24
+ # Inference catalog endpoint URL
25
+ INFERENCE_CATALOG_ENDPOINT = "https://endpoints.huggingface.co/api/catalog"
26
+
27
+ # Inference proxy template
28
+ INFERENCE_PROXY_TEMPLATE = "https://router.huggingface.co/{provider}"
29
+
30
+ # Inference endpoint image keys
31
+ INFERENCE_ENDPOINT_IMAGE_KEYS = [
32
+ "custom",
33
+ "huggingface",
34
+ "huggingfaceNeuron",
35
+ "llamacpp",
36
+ "tei",
37
+ "tgi",
38
+ "tgiNeuron"
39
+ ].freeze
40
+
41
+ # Separator used in repository IDs (e.g., organization/model-name)
42
+ REPO_ID_SEPARATOR = "/"
43
+
44
+ # Default revision/branch name for repositories
45
+ DEFAULT_REVISION = "main"
46
+
47
+ # Regular expression pattern for validating commit OIDs (Git SHA-1 hashes)
48
+ # Matches 40 hexadecimal characters
49
+ REGEX_COMMIT_OID = /\A[0-9a-f]{40}\z/i
50
+
51
+ # File naming conventions for PyTorch models
52
+ PYTORCH_WEIGHTS_NAME = "pytorch_model.bin"
53
+ PYTORCH_WEIGHTS_INDEX_NAME = "pytorch_model.bin.index.json"
54
+ PYTORCH_WEIGHTS_FILE_PATTERN = "pytorch_model{suffix}.bin"
55
+
56
+ # TensorFlow model file names
57
+ TF2_WEIGHTS_NAME = "tf_model.h5"
58
+ TF_WEIGHTS_NAME = "model.ckpt"
59
+ TF2_WEIGHTS_FILE_PATTERN = "tf_model{suffix}.h5"
60
+
61
+ # Flax model file names
62
+ FLAX_WEIGHTS_NAME = "flax_model.msgpack"
63
+
64
+ # SafeTensors file patterns (preferred format for model weights)
65
+ SAFETENSORS_WEIGHTS_FILE_PATTERN = "model*.safetensors"
66
+ SAFETENSORS_WEIGHTS_FILE_PATTERN_SUFFIX = "model{suffix}.safetensors"
67
+ SAFETENSORS_SINGLE_FILE = "model.safetensors"
68
+ SAFETENSORS_INDEX_FILE = "model.safetensors.index.json"
69
+ SAFETENSORS_MAX_HEADER_LENGTH = 25_000_000
70
+
71
+ # Configuration and metadata file names
72
+ CONFIG_NAME = "config.json"
73
+ REPOCARD_NAME = "README.md"
74
+
75
+ # Timeout configuration (in seconds)
76
+
77
+ # Timeout for ETag validation requests
78
+ DEFAULT_ETAG_TIMEOUT = 10
79
+
80
+ # Timeout for file download operations
81
+ DEFAULT_DOWNLOAD_TIMEOUT = 600 # 10 minutes
82
+
83
+ # Timeout for general API requests
84
+ DEFAULT_REQUEST_TIMEOUT = 10
85
+
86
+ # Download and file size configuration
87
+
88
+ # Size of chunks for streaming downloads (10 MB)
89
+ DOWNLOAD_CHUNK_SIZE = 10 * 1024 * 1024
90
+
91
+ # Maximum size for HTTP downloads before requiring streaming (50 GB)
92
+ MAX_HTTP_DOWNLOAD_SIZE = 50 * 1024 * 1024 * 1024
93
+
94
+ # LFS (Large File Storage) threshold - files larger than this use LFS (10 MB)
95
+ LFS_THRESHOLD = 10 * 1024 * 1024
96
+
97
+ # File lock logging interval (in seconds)
98
+ FILELOCK_LOG_EVERY_SECONDS = 10
99
+
100
+ # Repository type constants
101
+ REPO_TYPE_MODEL = "model"
102
+ REPO_TYPE_DATASET = "dataset"
103
+ REPO_TYPE_SPACE = "space"
104
+
105
+ # Valid repository types (including nil for backward compatibility)
106
+ REPO_TYPES = [nil, REPO_TYPE_MODEL, REPO_TYPE_DATASET, REPO_TYPE_SPACE].freeze
107
+
108
+ # Repository ID serialization separator (used for serialization of repo ids elsewhere)
109
+ REPO_ID_SERIALIZATION_SEPARATOR = "--"
110
+
111
+ # Space SDK types
112
+ SPACES_SDK_TYPES = ["gradio", "streamlit", "docker", "static"].freeze
113
+
114
+ # Repository type URL prefixes
115
+ REPO_TYPES_URL_PREFIXES = {
116
+ REPO_TYPE_DATASET => "datasets/",
117
+ REPO_TYPE_SPACE => "spaces/"
118
+ }.freeze
119
+
120
+ # Repository type mappings
121
+ REPO_TYPES_MAPPING = {
122
+ "datasets" => REPO_TYPE_DATASET,
123
+ "spaces" => REPO_TYPE_SPACE,
124
+ "models" => REPO_TYPE_MODEL
125
+ }.freeze
126
+
127
+ # Cache directory structure
128
+ HF_CACHE_SUBDIR = "hub"
129
+ MODELS_CACHE_SUBDIR = "models"
130
+
131
+ # HTTP header names
132
+ HEADER_X_REPO_COMMIT = "X-Repo-Commit"
133
+ HEADER_X_LINKED_SIZE = "X-Linked-Size"
134
+ HEADER_X_LINKED_ETAG = "X-Linked-Etag"
135
+ HEADER_X_BILL_TO = "X-HF-Bill-To"
136
+ HEADER_X_XET_ENDPOINT = "X-Xet-Cas-Url"
137
+ HEADER_X_XET_ACCESS_TOKEN = "X-Xet-Access-Token"
138
+ HEADER_X_XET_EXPIRATION = "X-Xet-Token-Expiration"
139
+ HEADER_X_XET_HASH = "X-Xet-Hash"
140
+ HEADER_X_XET_REFRESH_ROUTE = "X-Xet-Refresh-Route"
141
+
142
+ # User agent string for API requests
143
+ USER_AGENT = "huggingface_hub/#{VERSION}; ruby/#{RUBY_VERSION}"
144
+ end
145
+ end
@@ -0,0 +1,412 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module DurableHuggingfaceHub
6
+ # Base error class for all HuggingFace Hub errors.
7
+ #
8
+ # All exceptions raised by this library inherit from this class,
9
+ # allowing users to rescue all library-specific errors with a single rescue clause.
10
+ #
11
+ # @example Catching all library errors
12
+ # begin
13
+ # # HuggingFace Hub operations
14
+ # rescue DurableHuggingfaceHub::DurableHuggingfaceHubError => e
15
+ # puts "HuggingFace Hub error: #{e.message}"
16
+ # end
17
+ class DurableHuggingfaceHubError < StandardError
18
+ end
19
+
20
+ # Base class for HTTP-related errors from the HuggingFace Hub API.
21
+ #
22
+ # This error captures HTTP response details including status code,
23
+ # response body, request ID, and parsed server messages.
24
+ #
25
+ # @example Accessing error details
26
+ # begin
27
+ # # API call
28
+ # rescue DurableHuggingfaceHub::HfHubHTTPError => e
29
+ # puts "Status: #{e.status_code}"
30
+ # puts "Message: #{e.server_message}"
31
+ # puts "Request ID: #{e.request_id}"
32
+ # end
33
+ class HfHubHTTPError < DurableHuggingfaceHubError
34
+ # @return [Integer] HTTP status code
35
+ attr_reader :status_code
36
+
37
+ # @return [String, nil] Response body from the server
38
+ attr_reader :response_body
39
+
40
+ # @return [String, nil] Request ID for tracking purposes
41
+ attr_reader :request_id
42
+
43
+ # @return [String, nil] Parsed server error message
44
+ attr_reader :server_message
45
+
46
+ # Creates a new HTTP error.
47
+ #
48
+ # @param message [String] Error message
49
+ # @param status_code [Integer] HTTP status code
50
+ # @param response_body [String, nil] Response body from server
51
+ # @param request_id [String, nil] Request ID for tracking
52
+ def initialize(message, status_code: nil, response_body: nil, request_id: nil)
53
+ super(message)
54
+ @status_code = status_code
55
+ @response_body = response_body
56
+ @request_id = request_id
57
+ @server_message = parse_server_message(response_body)
58
+ end
59
+
60
+ private
61
+
62
+ # Parses the server error message from response body.
63
+ #
64
+ # Attempts to extract error message from JSON response body.
65
+ # Falls back to raw response if JSON parsing fails.
66
+ #
67
+ # @param body [String, nil] Response body
68
+ # @return [String, nil] Parsed error message
69
+ def parse_server_message(body)
70
+ return nil if body.nil? || body.empty?
71
+
72
+ parsed = JSON.parse(body)
73
+ parsed["error"] || parsed["message"]
74
+ rescue JSON::ParserError
75
+ # If JSON parsing fails, return truncated body
76
+ body.length > 200 ? "#{body[0...200]}..." : body
77
+ end
78
+ end
79
+
80
+ # MARK: - Repository Errors
81
+
82
+ # Error raised when a repository is not found on HuggingFace Hub.
83
+ #
84
+ # This error occurs when attempting to access a repository that doesn't exist
85
+ # or when the user doesn't have permission to access it.
86
+ #
87
+ # @example
88
+ # # Raised when repository doesn't exist
89
+ # raise RepositoryNotFoundError.new("organization/nonexistent-model")
90
+ class RepositoryNotFoundError < HfHubHTTPError
91
+ # @return [String] The repository ID that was not found
92
+ attr_reader :repo_id
93
+
94
+ # Creates a new repository not found error.
95
+ #
96
+ # @param repo_id [String] Repository ID
97
+ # @param message [String, nil] Custom error message
98
+ def initialize(repo_id, message: nil)
99
+ @repo_id = repo_id
100
+ message ||= "Repository not found: #{repo_id}"
101
+ super(message, status_code: 404)
102
+ end
103
+ end
104
+
105
+ # Error raised when a specific revision is not found in a repository.
106
+ #
107
+ # @example
108
+ # raise RevisionNotFoundError.new("main", repo_id: "bert-base")
109
+ class RevisionNotFoundError < HfHubHTTPError
110
+ # @return [String] The revision that was not found
111
+ attr_reader :revision
112
+
113
+ # @return [String] The repository ID
114
+ attr_reader :repo_id
115
+
116
+ # Creates a new revision not found error.
117
+ #
118
+ # @param revision [String] Revision (branch, tag, or commit)
119
+ # @param repo_id [String, nil] Repository ID
120
+ # @param message [String, nil] Custom error message
121
+ def initialize(revision, repo_id: nil, message: nil)
122
+ @revision = revision
123
+ @repo_id = repo_id
124
+ message ||= build_message
125
+ super(message, status_code: 404)
126
+ end
127
+
128
+ private
129
+
130
+ def build_message
131
+ if repo_id
132
+ "Revision '#{revision}' not found in repository '#{repo_id}'"
133
+ else
134
+ "Revision not found: #{revision}"
135
+ end
136
+ end
137
+ end
138
+
139
+ # Error raised when a specific file or entry is not found in a repository.
140
+ #
141
+ # @example
142
+ # raise EntryNotFoundError.new("config.json", repo_id: "bert-base")
143
+ class EntryNotFoundError < HfHubHTTPError
144
+ # @return [String] The file path that was not found
145
+ attr_reader :path
146
+
147
+ # @return [String, nil] The repository ID
148
+ attr_reader :repo_id
149
+
150
+ # Creates a new entry not found error.
151
+ #
152
+ # @param path [String] File path in repository
153
+ # @param repo_id [String, nil] Repository ID
154
+ # @param revision [String, nil] Revision
155
+ # @param message [String, nil] Custom error message
156
+ def initialize(path, repo_id: nil, revision: nil, message: nil)
157
+ @path = path
158
+ @repo_id = repo_id
159
+ @revision = revision
160
+ message ||= build_message
161
+ super(message, status_code: 404)
162
+ end
163
+
164
+ private
165
+
166
+ def build_message
167
+ parts = ["Entry not found: #{path}"]
168
+ parts << "in repository '#{repo_id}'" if repo_id
169
+ parts << "at revision '#{@revision}'" if @revision
170
+ parts.join(" ")
171
+ end
172
+ end
173
+
174
+ # Error raised when a file is not found in the local cache.
175
+ #
176
+ # This error occurs when local_files_only mode is enabled and the requested
177
+ # file is not available in the local cache.
178
+ #
179
+ # @example
180
+ # raise LocalEntryNotFoundError.new("File not found in cache")
181
+ class LocalEntryNotFoundError < DurableHuggingfaceHubError
182
+ # Creates a new local entry not found error.
183
+ #
184
+ # @param message [String] Error message
185
+ def initialize(message)
186
+ super(message)
187
+ end
188
+ end
189
+
190
+ # Error raised when attempting to access a gated repository without proper access.
191
+ #
192
+ # Gated repositories require users to accept terms or have special permissions.
193
+ #
194
+ # @example
195
+ # raise GatedRepoError.new("meta-llama/Llama-2-7b")
196
+ class GatedRepoError < HfHubHTTPError
197
+ # @return [String] The gated repository ID
198
+ attr_reader :repo_id
199
+
200
+ # Creates a new gated repository error.
201
+ #
202
+ # @param repo_id [String] Repository ID
203
+ # @param message [String, nil] Custom error message
204
+ def initialize(repo_id, message: nil)
205
+ @repo_id = repo_id
206
+ message ||= "Repository '#{repo_id}' is gated. You must be authenticated and have access."
207
+ super(message, status_code: 403)
208
+ end
209
+ end
210
+
211
+ # Error raised when attempting to access a disabled repository.
212
+ #
213
+ # Repositories may be disabled due to policy violations or other reasons.
214
+ #
215
+ # @example
216
+ # raise DisabledRepoError.new("disabled/repo")
217
+ class DisabledRepoError < HfHubHTTPError
218
+ # @return [String] The disabled repository ID
219
+ attr_reader :repo_id
220
+
221
+ # Creates a new disabled repository error.
222
+ #
223
+ # @param repo_id [String] Repository ID
224
+ # @param message [String, nil] Custom error message
225
+ def initialize(repo_id, message: nil)
226
+ @repo_id = repo_id
227
+ message ||= "Repository '#{repo_id}' has been disabled."
228
+ super(message, status_code: 403)
229
+ end
230
+ end
231
+
232
+ # MARK: - Authentication Errors
233
+
234
+ # Error raised when a request fails due to bad request parameters.
235
+ #
236
+ # @example
237
+ # raise BadRequestError.new("Invalid repository ID format")
238
+ class BadRequestError < HfHubHTTPError
239
+ # Creates a new bad request error.
240
+ #
241
+ # @param message [String] Error message
242
+ # @param response_body [String, nil] Response body
243
+ def initialize(message, response_body: nil)
244
+ super(message, status_code: 400, response_body: response_body)
245
+ end
246
+ end
247
+
248
+ # Error raised when no local authentication token is found.
249
+ #
250
+ # This error occurs when an operation requires authentication but no token
251
+ # is available in environment variables or the token file.
252
+ #
253
+ # @example
254
+ # raise LocalTokenNotFoundError.new
255
+ class LocalTokenNotFoundError < DurableHuggingfaceHubError
256
+ # Creates a new local token not found error.
257
+ #
258
+ # @param message [String, nil] Custom error message
259
+ def initialize(message: nil)
260
+ message ||= "No HuggingFace token found. " \
261
+ "Please login using DurableHuggingfaceHub.login or set the HF_TOKEN environment variable."
262
+ super(message)
263
+ end
264
+ end
265
+
266
+ # MARK: - File Operation Errors
267
+
268
+ # Error raised when file metadata cannot be retrieved or is invalid.
269
+ #
270
+ # @example
271
+ # raise FileMetadataError.new("config.json", "Missing ETag header")
272
+ class FileMetadataError < DurableHuggingfaceHubError
273
+ # @return [String] The file path
274
+ attr_reader :path
275
+
276
+ # Creates a new file metadata error.
277
+ #
278
+ # @param path [String] File path
279
+ # @param message [String] Error message
280
+ def initialize(path, message)
281
+ @path = path
282
+ super("File metadata error for '#{path}': #{message}")
283
+ end
284
+ end
285
+
286
+ # Error raised when the cache directory or cached files are not found.
287
+ #
288
+ # @example
289
+ # raise CacheNotFoundError.new("/path/to/cache")
290
+ class CacheNotFoundError < DurableHuggingfaceHubError
291
+ # @return [String] The cache path
292
+ attr_reader :cache_path
293
+
294
+ # Creates a new cache not found error.
295
+ #
296
+ # @param cache_path [String] Path to cache directory or file
297
+ # @param message [String, nil] Custom error message
298
+ def initialize(cache_path, message: nil)
299
+ @cache_path = cache_path
300
+ message ||= "Cache not found at: #{cache_path}"
301
+ super(message)
302
+ end
303
+ end
304
+
305
+ # Error raised when cached files are corrupted or invalid.
306
+ #
307
+ # @example
308
+ # raise CorruptedCacheError.new("/path/to/file", "Checksum mismatch")
309
+ class CorruptedCacheError < DurableHuggingfaceHubError
310
+ # @return [String] The corrupted file path
311
+ attr_reader :path
312
+
313
+ # Creates a new corrupted cache error.
314
+ #
315
+ # @param path [String] Path to corrupted file
316
+ # @param reason [String] Reason for corruption
317
+ def initialize(path, reason)
318
+ @path = path
319
+ super("Corrupted cache file at '#{path}': #{reason}")
320
+ end
321
+ end
322
+
323
+ # MARK: - Inference Errors
324
+
325
+ # Error raised when an inference request times out.
326
+ #
327
+ # @example
328
+ # raise InferenceTimeoutError.new("text-generation", 30)
329
+ class InferenceTimeoutError < DurableHuggingfaceHubError
330
+ # @return [String] The task that timed out
331
+ attr_reader :task
332
+
333
+ # @return [Integer] Timeout duration in seconds
334
+ attr_reader :timeout
335
+
336
+ # Creates a new inference timeout error.
337
+ #
338
+ # @param task [String, nil] Inference task type
339
+ # @param timeout [Integer, nil] Timeout value in seconds
340
+ # @param message [String, nil] Custom error message
341
+ def initialize(task: nil, timeout: nil, message: nil)
342
+ @task = task
343
+ @timeout = timeout
344
+ message ||= build_message
345
+ super(message)
346
+ end
347
+
348
+ private
349
+
350
+ def build_message
351
+ parts = ["Inference request timed out"]
352
+ parts << "for task '#{task}'" if task
353
+ parts << "after #{timeout} seconds" if timeout
354
+ parts.join(" ")
355
+ end
356
+ end
357
+
358
+ # Error raised when an inference endpoint returns an error.
359
+ #
360
+ # @example
361
+ # raise InferenceEndpointError.new("Model not loaded", status_code: 503)
362
+ class InferenceEndpointError < HfHubHTTPError
363
+ # Creates a new inference endpoint error.
364
+ #
365
+ # @param message [String] Error message
366
+ # @param status_code [Integer, nil] HTTP status code
367
+ # @param response_body [String, nil] Response body
368
+ def initialize(message, status_code: nil, response_body: nil)
369
+ super(message, status_code: status_code, response_body: response_body)
370
+ end
371
+ end
372
+
373
+ # MARK: - Validation Errors
374
+
375
+ # Error raised when input validation fails.
376
+ #
377
+ # @example
378
+ # raise ValidationError.new("repo_id", "Invalid format")
379
+ class ValidationError < DurableHuggingfaceHubError
380
+ # @return [String, nil] The field that failed validation
381
+ attr_reader :field
382
+
383
+ # Creates a new validation error.
384
+ #
385
+ # @param field [String, nil] Field name
386
+ # @param message [String] Error message
387
+ def initialize(field, message)
388
+ @field = field
389
+ error_msg = field ? "Validation error for '#{field}': #{message}" : "Validation error: #{message}"
390
+ super(error_msg)
391
+ end
392
+ end
393
+
394
+ # Error raised when LFS (Large File Storage) operations fail.
395
+ #
396
+ # @example
397
+ # raise LFSError.new("Upload failed", file: "large_model.bin")
398
+ class LFSError < DurableHuggingfaceHubError
399
+ # @return [String, nil] The file involved in the LFS operation
400
+ attr_reader :file
401
+
402
+ # Creates a new LFS error.
403
+ #
404
+ # @param message [String] Error message
405
+ # @param file [String, nil] File path
406
+ def initialize(message, file: nil)
407
+ @file = file
408
+ error_msg = file ? "LFS error for '#{file}': #{message}" : "LFS error: #{message}"
409
+ super(error_msg)
410
+ end
411
+ end
412
+ end