durable_huggingface_hub 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +7 -0
  2. data/.editorconfig +29 -0
  3. data/.rubocop.yml +108 -0
  4. data/CHANGELOG.md +127 -0
  5. data/README.md +547 -0
  6. data/Rakefile +106 -0
  7. data/devenv.lock +171 -0
  8. data/devenv.nix +15 -0
  9. data/devenv.yaml +8 -0
  10. data/huggingface_hub.gemspec +63 -0
  11. data/lib/durable_huggingface_hub/authentication.rb +245 -0
  12. data/lib/durable_huggingface_hub/cache.rb +508 -0
  13. data/lib/durable_huggingface_hub/configuration.rb +191 -0
  14. data/lib/durable_huggingface_hub/constants.rb +145 -0
  15. data/lib/durable_huggingface_hub/errors.rb +412 -0
  16. data/lib/durable_huggingface_hub/file_download.rb +831 -0
  17. data/lib/durable_huggingface_hub/hf_api.rb +1278 -0
  18. data/lib/durable_huggingface_hub/repo_card.rb +430 -0
  19. data/lib/durable_huggingface_hub/types/cache_info.rb +298 -0
  20. data/lib/durable_huggingface_hub/types/commit_info.rb +149 -0
  21. data/lib/durable_huggingface_hub/types/dataset_info.rb +158 -0
  22. data/lib/durable_huggingface_hub/types/model_info.rb +154 -0
  23. data/lib/durable_huggingface_hub/types/space_info.rb +158 -0
  24. data/lib/durable_huggingface_hub/types/user.rb +179 -0
  25. data/lib/durable_huggingface_hub/types.rb +205 -0
  26. data/lib/durable_huggingface_hub/utils/auth.rb +174 -0
  27. data/lib/durable_huggingface_hub/utils/headers.rb +220 -0
  28. data/lib/durable_huggingface_hub/utils/http.rb +329 -0
  29. data/lib/durable_huggingface_hub/utils/paths.rb +230 -0
  30. data/lib/durable_huggingface_hub/utils/progress.rb +217 -0
  31. data/lib/durable_huggingface_hub/utils/retry.rb +165 -0
  32. data/lib/durable_huggingface_hub/utils/validators.rb +236 -0
  33. data/lib/durable_huggingface_hub/version.rb +8 -0
  34. data/lib/huggingface_hub.rb +205 -0
  35. metadata +334 -0
@@ -0,0 +1,508 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+ require "fileutils"
5
+ require_relative "types"
6
+ require_relative "file_download"
7
+
8
+ module DurableHuggingfaceHub
9
+ module Cache
10
+ # Scans the cache directory and returns comprehensive information about cached content.
11
+ #
12
+ # This method analyzes the cache structure and provides detailed information
13
+ # about all cached repositories, revisions, and files.
14
+ #
15
+ # @param cache_dir [String, Pathname, nil] Custom cache directory path.
16
+ # If nil, uses the default cache directory.
17
+ #
18
+ # @return [DurableHuggingfaceHub::Types::HFCacheInfo] Comprehensive cache information
19
+ #
20
+ # @raise [ArgumentError] If cache_dir is invalid
21
+ #
22
+ # @example Scan default cache directory
23
+ # cache_info = DurableHuggingfaceHub.scan_cache_dir
24
+ #
25
+ # @example Scan custom cache directory
26
+ # cache_info = DurableHuggingfaceHub.scan_cache_dir(cache_dir: "/custom/cache")
27
+ def self.scan_cache_dir(cache_dir: nil)
28
+ cache_dir = FileDownload.resolve_cache_dir(cache_dir)
29
+
30
+ unless cache_dir.exist?
31
+ # Return empty cache info if directory doesn't exist
32
+ return DurableHuggingfaceHub::Types::HFCacheInfo.new(
33
+ cache_dir: cache_dir,
34
+ repos: [],
35
+ size: 0
36
+ )
37
+ end
38
+
39
+ repos = []
40
+ total_size = 0
41
+
42
+ # Scan each repository directory
43
+ cache_dir.each_child do |repo_dir|
44
+ next unless repo_dir.directory?
45
+
46
+ repo_info = scan_repository(repo_dir)
47
+ next unless repo_info
48
+
49
+ repos << repo_info
50
+ total_size += repo_info.size
51
+ end
52
+
53
+ DurableHuggingfaceHub::Types::HFCacheInfo.new(
54
+ cache_dir: cache_dir,
55
+ repos: repos,
56
+ size: total_size
57
+ )
58
+ end
59
+
60
+ # Scans a single repository directory and returns repository information.
61
+ #
62
+ # @param repo_dir [Pathname] Repository directory to scan
63
+ # @return [DurableHuggingfaceHub::Types::CachedRepoInfo, nil] Repository info or nil if invalid
64
+ def self.scan_repository(repo_dir)
65
+ # Parse repo_id and repo_type from directory name
66
+ # Format: {repo_type}s--{namespace}--{name} or {repo_type}s--{name}
67
+ dir_name = repo_dir.basename.to_s
68
+ match = dir_name.match(/^(\w+)s--(.+)$/)
69
+ return nil unless match
70
+
71
+ repo_type = match[1] # "model", "dataset", or "space"
72
+ repo_id_part = match[2]
73
+
74
+ # Convert back to repo_id format (handle both namespace/name and just name)
75
+ if repo_id_part.include?("--")
76
+ repo_id = repo_id_part.gsub("--", "/")
77
+ else
78
+ repo_id = repo_id_part
79
+ end
80
+
81
+ revisions = []
82
+ total_size = 0
83
+ last_accessed = nil
84
+ last_modified = nil
85
+
86
+ # Scan snapshots directory
87
+ snapshots_dir = repo_dir.join("snapshots")
88
+ if snapshots_dir.exist?
89
+ snapshots_dir.each_child do |revision_dir|
90
+ next unless revision_dir.directory?
91
+
92
+ revision_info = scan_revision(repo_dir, revision_dir, repo_type)
93
+ next unless revision_info
94
+
95
+ revisions << revision_info
96
+ total_size += revision_info.size
97
+
98
+ # Track last accessed/modified times
99
+ if revision_info.last_modified
100
+ last_modified = [last_modified, revision_info.last_modified].compact.max
101
+ end
102
+
103
+ revision_info.files.each do |file_info|
104
+ if file_info.last_accessed
105
+ last_accessed = [last_accessed, file_info.last_accessed].compact.max
106
+ end
107
+ end
108
+ end
109
+ end
110
+
111
+ return nil if revisions.empty?
112
+
113
+ DurableHuggingfaceHub::Types::CachedRepoInfo.new(
114
+ repo_id: repo_id,
115
+ repo_type: repo_type,
116
+ revisions: revisions,
117
+ size: total_size,
118
+ last_accessed: last_accessed,
119
+ last_modified: last_modified
120
+ )
121
+ end
122
+
123
+ # Scans a revision directory and returns revision information.
124
+ #
125
+ # @param repo_dir [Pathname] Repository directory
126
+ # @param revision_dir [Pathname] Revision directory to scan
127
+ # @param repo_type [String] Type of repository
128
+ # @return [DurableHuggingfaceHub::Types::CachedRevisionInfo, nil] Revision info or nil if invalid
129
+ def self.scan_revision(repo_dir, revision_dir, repo_type)
130
+ commit_hash = revision_dir.basename.to_s
131
+ files = []
132
+ total_size = 0
133
+ last_modified = nil
134
+
135
+ # Get refs pointing to this commit
136
+ refs = get_refs_for_commit(repo_dir, commit_hash)
137
+
138
+ # Scan all files in the revision
139
+ revision_dir.glob("**/*") do |file_path|
140
+ next if file_path.directory?
141
+
142
+ begin
143
+ file_info = scan_file(file_path, commit_hash)
144
+ files << file_info
145
+ total_size += file_info.size
146
+
147
+ if file_info.last_modified
148
+ last_modified = [last_modified, file_info.last_modified].compact.max
149
+ end
150
+ rescue => e
151
+ # Skip files that can't be analyzed
152
+ next
153
+ end
154
+ end
155
+
156
+ return nil if files.empty?
157
+
158
+ DurableHuggingfaceHub::Types::CachedRevisionInfo.new(
159
+ commit_hash: commit_hash,
160
+ refs: refs,
161
+ files: files,
162
+ size: total_size,
163
+ last_modified: last_modified
164
+ )
165
+ end
166
+
167
+ # Scans a single file and returns file information.
168
+ #
169
+ # @param file_path [Pathname] Path to the file
170
+ # @param commit_hash [String] Commit hash this file belongs to
171
+ # @return [DurableHuggingfaceHub::Types::CachedFileInfo] File information
172
+ def self.scan_file(file_path, commit_hash)
173
+ # Get file stats, handling broken symlinks
174
+ stat = begin
175
+ file_path.stat
176
+ rescue Errno::ENOENT
177
+ # For broken symlinks, use lstat to get link info
178
+ file_path.lstat
179
+ end
180
+
181
+ # Try to get ETag from blob metadata if this is a symlink
182
+ etag = nil
183
+ if file_path.symlink?
184
+ begin
185
+ target_path = file_path.readlink
186
+ if target_path.absolute?
187
+ # This should point to a blob file
188
+ blob_name = target_path.basename.to_s
189
+ etag = blob_name if blob_name.match?(/^[a-f0-9]{40,}$/) # SHA-like hash
190
+ end
191
+ rescue Errno::ENOENT
192
+ # Broken symlink, no ETag available
193
+ etag = nil
194
+ end
195
+ else
196
+ # For direct files, we might not have ETag info
197
+ etag = nil
198
+ end
199
+
200
+ # Build attributes hash
201
+ attrs = {
202
+ file_path: file_path,
203
+ size: stat.size,
204
+ etag: etag,
205
+ commit_hash: commit_hash,
206
+ last_accessed: stat.atime,
207
+ last_modified: stat.mtime
208
+ }
209
+
210
+ DurableHuggingfaceHub::Types::CachedFileInfo.new(attrs)
211
+ end
212
+
213
+ # Gets refs (branches/tags) that point to a specific commit.
214
+ #
215
+ # @param repo_dir [Pathname] Repository directory
216
+ # @param commit_hash [String] Commit hash to find refs for
217
+ # @return [Array<String>] List of refs pointing to this commit
218
+ def self.get_refs_for_commit(repo_dir, commit_hash)
219
+ refs = []
220
+ refs_dir = repo_dir.join("refs")
221
+
222
+ return refs unless refs_dir.exist?
223
+
224
+ refs_dir.glob("**/*") do |ref_file|
225
+ next if ref_file.directory?
226
+
227
+ begin
228
+ ref_commit = ref_file.read.strip
229
+ if ref_commit == commit_hash
230
+ # Get relative path from refs directory
231
+ rel_path = ref_file.relative_path_from(refs_dir).to_s
232
+ refs << rel_path
233
+ end
234
+ rescue
235
+ # Skip unreadable ref files
236
+ next
237
+ end
238
+ end
239
+
240
+ refs
241
+ end
242
+
243
+ # Get the path to cached assets for a repository.
244
+ #
245
+ # This utility function helps locate cached files and directories for a specific repository.
246
+ #
247
+ # @param repo_id [String] Repository ID
248
+ # @param repo_type [String] Type of repository ("model", "dataset", or "space")
249
+ # @param cache_dir [String, Pathname, nil] Custom cache directory
250
+ # @return [Pathname, nil] Path to the repository's cache directory, or nil if not found
251
+ #
252
+ # @example Get cache path for a model
253
+ # cache_path = DurableHuggingfaceHub::Cache.cached_assets_path(
254
+ # repo_id: "bert-base-uncased",
255
+ # repo_type: "model"
256
+ # )
257
+ # puts cache_path # /home/user/.cache/huggingface/hub/models--bert-base-uncased
258
+ def self.cached_assets_path(repo_id:, repo_type: "model", cache_dir: nil)
259
+ DurableHuggingfaceHub::Utils::Validators.validate_repo_id(repo_id)
260
+ repo_type = DurableHuggingfaceHub::Utils::Validators.validate_repo_type(repo_type)
261
+
262
+ cache_dir = FileDownload.resolve_cache_dir(cache_dir)
263
+
264
+ # Build the expected repository directory name
265
+ repo_id_parts = repo_id.split("/")
266
+ if repo_id_parts.length == 2
267
+ folder_name = "#{repo_type}s--#{repo_id_parts[0]}--#{repo_id_parts[1]}"
268
+ else
269
+ folder_name = "#{repo_type}s--#{repo_id}"
270
+ end
271
+
272
+ repo_path = cache_dir.join(folder_name)
273
+ repo_path.exist? ? repo_path : nil
274
+ end
275
+
276
+ # Strategy for deleting cache entries.
277
+ #
278
+ # This class provides a safe way to plan and execute cache cleanup operations.
279
+ # It allows previewing what will be deleted before actually performing the deletion.
280
+ #
281
+ # @example Delete specific repositories
282
+ # cache_info = DurableHuggingfaceHub.scan_cache_dir
283
+ # repos_to_delete = cache_info.repos.select { |repo| repo.size > 1_000_000_000 } # > 1GB
284
+ # strategy = DeleteCacheStrategy.new(repos: repos_to_delete)
285
+ # puts "Will delete #{strategy.size_to_delete_str}"
286
+ # strategy.execute
287
+ #
288
+ # @example Delete old revisions
289
+ # old_revisions = cache_info.repos.flat_map do |repo|
290
+ # repo.revisions.select { |rev| rev.last_accessed < 30.days.ago }
291
+ # end
292
+ # strategy = DeleteCacheStrategy.new(revisions: old_revisions)
293
+ # strategy.execute
294
+ class DeleteCacheStrategy
295
+ # @return [Array<DurableHuggingfaceHub::Types::CachedRepoInfo>] Repositories to delete
296
+ attr_reader :repos
297
+
298
+ # @return [Array<DurableHuggingfaceHub::Types::CachedRevisionInfo>] Revisions to delete
299
+ attr_reader :revisions
300
+
301
+ # @return [Array<DurableHuggingfaceHub::Types::CachedFileInfo>] Individual files to delete
302
+ attr_reader :files
303
+
304
+ # Initialize a new delete strategy.
305
+ #
306
+ # @param cache_dir [Pathname] The cache directory
307
+ # @param repos [Array<DurableHuggingfaceHub::Types::CachedRepoInfo>] Repositories to delete
308
+ # @param revisions [Array<DurableHuggingfaceHub::Types::CachedRevisionInfo>] Revisions to delete
309
+ # @param files [Array<DurableHuggingfaceHub::Types::CachedFileInfo>] Individual files to delete
310
+ def initialize(cache_dir:, repos: [], revisions: [], files: [])
311
+ @cache_dir = cache_dir
312
+ @repos = repos
313
+ @revisions = revisions
314
+ @files = files
315
+ end
316
+
317
+ # Total size that will be deleted in bytes.
318
+ #
319
+ # @return [Integer] Size in bytes
320
+ def size_to_delete
321
+ @repos.sum(&:size) + @revisions.sum(&:size) + @files.sum(&:size)
322
+ end
323
+
324
+ # Human-readable size string for what will be deleted.
325
+ #
326
+ # @return [String] Size formatted as human-readable string
327
+ def size_to_delete_str
328
+ units = ["B", "KB", "MB", "GB", "TB"]
329
+ size = size_to_delete.to_f
330
+ unit_index = 0
331
+
332
+ while size >= 1024 && unit_index < units.length - 1
333
+ size /= 1024.0
334
+ unit_index += 1
335
+ end
336
+
337
+ format("%.1f %s", size, units[unit_index])
338
+ end
339
+
340
+ # Number of repositories that will be deleted.
341
+ #
342
+ # @return [Integer] Repository count
343
+ def repo_count
344
+ @repos.length
345
+ end
346
+
347
+ # Number of revisions that will be deleted.
348
+ #
349
+ # @return [Integer] Revision count
350
+ def revision_count
351
+ @revisions.length
352
+ end
353
+
354
+ # Number of files that will be deleted.
355
+ #
356
+ # @return [Integer] File count
357
+ def file_count
358
+ @files.length
359
+ end
360
+
361
+ # Preview what will be deleted.
362
+ #
363
+ # @return [String] Human-readable summary of what will be deleted
364
+ def preview
365
+ summary = []
366
+ has_items = repo_count.positive? || revision_count.positive? || file_count.positive?
367
+
368
+ if has_items
369
+ summary << "Will delete:"
370
+ summary << " #{repo_count} repositories" if repo_count.positive?
371
+ summary << " #{revision_count} revisions" if revision_count.positive?
372
+ summary << " #{file_count} files" if file_count.positive?
373
+ summary << "Total size: #{size_to_delete_str}"
374
+
375
+ if repo_count.positive?
376
+ summary << ""
377
+ summary << "Repositories:"
378
+ @repos.each { |repo| summary << " #{repo.repo_id} (#{repo.size_str})" }
379
+ end
380
+ end
381
+
382
+ summary.join("\n")
383
+ end
384
+
385
+ # Execute the deletion strategy.
386
+ #
387
+ # This method will delete all specified repositories, revisions, and files.
388
+ # Use with caution - deletions are permanent.
389
+ #
390
+ # @return [Boolean] True if successful
391
+ def execute
392
+ # Delete individual files first
393
+ @files.each do |file_info|
394
+ delete_file_safely(file_info.file_path)
395
+ end
396
+
397
+ # Delete revisions
398
+ @revisions.each do |revision_info|
399
+ delete_revision_safely(revision_info)
400
+ end
401
+
402
+ # Delete entire repositories
403
+ @repos.each do |repo_info|
404
+ delete_repository_safely(repo_info)
405
+ end
406
+
407
+ true
408
+ end
409
+
410
+ private
411
+
412
+ # Safely delete a file.
413
+ #
414
+ # @param file_path [Pathname] Path to file to delete
415
+ def delete_file_safely(file_path)
416
+ return unless file_path.exist?
417
+
418
+ # If it's a symlink, just remove the symlink
419
+ if file_path.symlink?
420
+ file_path.unlink
421
+ else
422
+ # For regular files, remove them
423
+ file_path.unlink
424
+ end
425
+ rescue => e
426
+ # Log error but continue with other deletions
427
+ warn "Failed to delete #{file_path}: #{e.message}"
428
+ end
429
+
430
+ # Safely delete a revision.
431
+ #
432
+ # @param revision_info [DurableHuggingfaceHub::Types::CachedRevisionInfo] Revision to delete
433
+ def delete_revision_safely(revision_info)
434
+ # Find the revision directory
435
+ repo_dir = find_repo_dir_for_revision(revision_info)
436
+ return unless repo_dir
437
+
438
+ revision_dir = repo_dir.join("snapshots", revision_info.commit_hash)
439
+ return unless revision_dir.exist?
440
+
441
+ # Remove the entire revision directory
442
+ FileUtils.rm_rf(revision_dir)
443
+
444
+ # Clean up refs that pointed to this revision
445
+ cleanup_refs_for_revision(repo_dir, revision_info.commit_hash)
446
+ rescue => e
447
+ warn "Failed to delete revision #{revision_info.commit_hash}: #{e.message}"
448
+ end
449
+
450
+ # Safely delete an entire repository.
451
+ #
452
+ # @param repo_info [DurableHuggingfaceHub::Types::CachedRepoInfo] Repository to delete
453
+ def delete_repository_safely(repo_info)
454
+ # Find the repository directory
455
+ repo_dir_name = "#{repo_info.repo_type}s--#{repo_info.repo_id.gsub('/', '--')}"
456
+ repo_dir = @cache_dir.join(repo_dir_name)
457
+
458
+ return unless repo_dir.exist?
459
+
460
+ # Remove the entire repository directory
461
+ FileUtils.rm_rf(repo_dir)
462
+ rescue => e
463
+ warn "Failed to delete repository #{repo_info.repo_id}: #{e.message}"
464
+ end
465
+
466
+ # Find repository directory for a revision.
467
+ #
468
+ # @param revision_info [DurableHuggingfaceHub::Types::CachedRevisionInfo] Revision info
469
+ # @return [Pathname, nil] Repository directory or nil if not found
470
+ def find_repo_dir_for_revision(revision_info)
471
+ # This is a simplified implementation - in practice we'd need to track
472
+ # which repository each revision belongs to
473
+ @cache_dir.each_child do |repo_dir|
474
+ next unless repo_dir.directory?
475
+
476
+ snapshots_dir = repo_dir.join("snapshots")
477
+ next unless snapshots_dir.exist?
478
+
479
+ revision_dir = snapshots_dir.join(revision_info.commit_hash)
480
+ return repo_dir if revision_dir.exist?
481
+ end
482
+
483
+ nil
484
+ end
485
+
486
+ # Clean up refs that pointed to a deleted revision.
487
+ #
488
+ # @param repo_dir [Pathname] Repository directory
489
+ # @param commit_hash [String] Commit hash that was deleted
490
+ def cleanup_refs_for_revision(repo_dir, commit_hash)
491
+ refs_dir = repo_dir.join("refs")
492
+ return unless refs_dir.exist?
493
+
494
+ refs_dir.glob("**/*") do |ref_file|
495
+ next if ref_file.directory?
496
+
497
+ begin
498
+ ref_commit = ref_file.read.strip
499
+ ref_file.unlink if ref_commit == commit_hash
500
+ rescue
501
+ # Skip unreadable ref files
502
+ next
503
+ end
504
+ end
505
+ end
506
+ end
507
+ end
508
+ end
@@ -0,0 +1,191 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+
5
+ module DurableHuggingfaceHub
6
+ # Configuration management for the HuggingFace Hub client.
7
+ #
8
+ # This class provides a singleton configuration object that can be accessed
9
+ # and modified throughout the library. Configuration values are read from
10
+ # environment variables or can be set programmatically.
11
+ #
12
+ # @example Accessing the configuration
13
+ # DurableHuggingfaceHub::Configuration.instance.token
14
+ #
15
+ # @example Configuring programmatically
16
+ # DurableHuggingfaceHub.configure do |config|
17
+ # config.token = "hf_your_token_here"
18
+ # config.cache_dir = "/custom/cache/path"
19
+ # end
20
+ class Configuration
21
+ # @return [String, nil] HuggingFace API token
22
+ attr_accessor :token
23
+
24
+ # @return [String] Base cache directory for HuggingFace Hub files
25
+ attr_accessor :cache_dir
26
+
27
+ # @return [String] HuggingFace Hub endpoint URL
28
+ attr_accessor :endpoint
29
+
30
+ # @return [Boolean] Whether to operate in offline mode
31
+ attr_accessor :offline
32
+
33
+ # @return [Boolean] Whether to disable progress bars during downloads
34
+ attr_accessor :disable_progress_bars
35
+
36
+ # @return [Boolean] Whether to disable telemetry
37
+ attr_accessor :disable_telemetry
38
+
39
+ # @return [Integer] Default timeout for API requests
40
+ attr_accessor :request_timeout
41
+
42
+ # @return [Integer] Default timeout for downloads
43
+ attr_accessor :download_timeout
44
+
45
+ # Creates a new Configuration instance with default values.
46
+ #
47
+ # Configuration values are read from environment variables if available,
48
+ # otherwise sensible defaults are used.
49
+ def initialize
50
+ @token = env_var("HF_TOKEN") || env_var("HUGGING_FACE_HUB_TOKEN")
51
+ @cache_dir = determine_cache_dir
52
+ @endpoint = env_var("HF_ENDPOINT") || Constants::ENDPOINT
53
+ @offline = parse_boolean(env_var("HF_HUB_OFFLINE"), default: false)
54
+ @disable_progress_bars = parse_boolean(env_var("HF_HUB_DISABLE_PROGRESS_BARS"), default: false)
55
+ @disable_telemetry = parse_boolean(env_var("HF_HUB_DISABLE_TELEMETRY"), default: true)
56
+ @request_timeout = parse_integer(env_var("HF_HUB_REQUEST_TIMEOUT"),
57
+ default: Constants::DEFAULT_REQUEST_TIMEOUT)
58
+ @download_timeout = parse_integer(env_var("HF_HUB_DOWNLOAD_TIMEOUT"),
59
+ default: Constants::DEFAULT_DOWNLOAD_TIMEOUT)
60
+ end
61
+
62
+ # Returns the singleton configuration instance.
63
+ #
64
+ # @return [Configuration] The singleton configuration object
65
+ def self.instance
66
+ @instance ||= new
67
+ end
68
+
69
+ # Resets the configuration to default values.
70
+ # Primarily used for testing.
71
+ #
72
+ # @return [Configuration] A new configuration instance
73
+ def self.reset!
74
+ @instance = new
75
+ end
76
+
77
+ # Returns the path to the HuggingFace Hub cache directory.
78
+ #
79
+ # The cache directory is created if it doesn't exist.
80
+ #
81
+ # @return [Pathname] Path to the HuggingFace Hub cache
82
+ def hub_cache_dir
83
+ path = Pathname.new(cache_dir).join(Constants::HF_CACHE_SUBDIR)
84
+ path.mkpath unless path.exist?
85
+ path
86
+ end
87
+
88
+ # Returns the path to the token file.
89
+ #
90
+ # @return [Pathname] Path to the token storage file
91
+ def token_path
92
+ Pathname.new(cache_dir).join("token")
93
+ end
94
+
95
+ private
96
+
97
+ # Retrieves an environment variable value.
98
+ #
99
+ # @param key [String] The environment variable name
100
+ # @return [String, nil] The environment variable value or nil if not set
101
+ def env_var(key)
102
+ value = ENV[key]
103
+ value&.empty? ? nil : value
104
+ end
105
+
106
+ # Parses a boolean value from a string.
107
+ #
108
+ # Recognizes common boolean representations:
109
+ # - true: "1", "true", "yes", "on" (case-insensitive)
110
+ # - false: "0", "false", "no", "off" (case-insensitive)
111
+ #
112
+ # @param value [String, nil] The string value to parse
113
+ # @param default [Boolean] Default value if parsing fails
114
+ # @return [Boolean] The parsed boolean value
115
+ def parse_boolean(value, default: false)
116
+ return default if value.nil?
117
+
118
+ case value.downcase.strip
119
+ when "1", "true", "yes", "on"
120
+ true
121
+ when "0", "false", "no", "off"
122
+ false
123
+ else
124
+ default
125
+ end
126
+ end
127
+
128
+ # Parses an integer value from a string.
129
+ #
130
+ # @param value [String, nil] The string value to parse
131
+ # @param default [Integer] Default value if parsing fails
132
+ # @return [Integer] The parsed integer value
133
+ def parse_integer(value, default:)
134
+ return default if value.nil?
135
+
136
+ Integer(value)
137
+ rescue ArgumentError
138
+ default
139
+ end
140
+
141
+ # Determines the cache directory from environment variables or defaults.
142
+ #
143
+ # Priority order:
144
+ # 1. HF_HOME
145
+ # 2. XDG_CACHE_HOME/huggingface
146
+ # 3. ~/.cache/huggingface (Linux/Mac)
147
+ # 4. ~/AppData/Local/huggingface (Windows)
148
+ #
149
+ # @return [String] Path to the cache directory
150
+ def determine_cache_dir
151
+ if (hf_home = env_var("HF_HOME"))
152
+ return hf_home
153
+ end
154
+
155
+ if (xdg_cache = env_var("XDG_CACHE_HOME"))
156
+ return Pathname.new(xdg_cache).join("huggingface").to_s
157
+ end
158
+
159
+ # Default cache locations by platform
160
+ home = Dir.home
161
+ if Gem.win_platform?
162
+ Pathname.new(home).join("AppData", "Local", "huggingface").to_s
163
+ else
164
+ Pathname.new(home).join(".cache", "huggingface").to_s
165
+ end
166
+ end
167
+ end
168
+
169
+ # Provides a convenient way to configure the library.
170
+ #
171
+ # @example
172
+ # DurableHuggingfaceHub.configure do |config|
173
+ # config.token = "hf_your_token"
174
+ # config.cache_dir = "/tmp/hf_cache"
175
+ # end
176
+ #
177
+ # @yield [config] Yields the configuration object for modification
178
+ # @yieldparam config [Configuration] The configuration object
179
+ # @return [Configuration] The configuration object
180
+ def self.configure
181
+ yield(Configuration.instance) if block_given?
182
+ Configuration.instance
183
+ end
184
+
185
+ # Returns the current configuration.
186
+ #
187
+ # @return [Configuration] The current configuration object
188
+ def self.config
189
+ Configuration.instance
190
+ end
191
+ end