durable_huggingface_hub 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.editorconfig +29 -0
- data/.rubocop.yml +108 -0
- data/CHANGELOG.md +127 -0
- data/README.md +547 -0
- data/Rakefile +106 -0
- data/devenv.lock +171 -0
- data/devenv.nix +15 -0
- data/devenv.yaml +8 -0
- data/huggingface_hub.gemspec +63 -0
- data/lib/durable_huggingface_hub/authentication.rb +245 -0
- data/lib/durable_huggingface_hub/cache.rb +508 -0
- data/lib/durable_huggingface_hub/configuration.rb +191 -0
- data/lib/durable_huggingface_hub/constants.rb +145 -0
- data/lib/durable_huggingface_hub/errors.rb +412 -0
- data/lib/durable_huggingface_hub/file_download.rb +831 -0
- data/lib/durable_huggingface_hub/hf_api.rb +1278 -0
- data/lib/durable_huggingface_hub/repo_card.rb +430 -0
- data/lib/durable_huggingface_hub/types/cache_info.rb +298 -0
- data/lib/durable_huggingface_hub/types/commit_info.rb +149 -0
- data/lib/durable_huggingface_hub/types/dataset_info.rb +158 -0
- data/lib/durable_huggingface_hub/types/model_info.rb +154 -0
- data/lib/durable_huggingface_hub/types/space_info.rb +158 -0
- data/lib/durable_huggingface_hub/types/user.rb +179 -0
- data/lib/durable_huggingface_hub/types.rb +205 -0
- data/lib/durable_huggingface_hub/utils/auth.rb +174 -0
- data/lib/durable_huggingface_hub/utils/headers.rb +220 -0
- data/lib/durable_huggingface_hub/utils/http.rb +329 -0
- data/lib/durable_huggingface_hub/utils/paths.rb +230 -0
- data/lib/durable_huggingface_hub/utils/progress.rb +217 -0
- data/lib/durable_huggingface_hub/utils/retry.rb +165 -0
- data/lib/durable_huggingface_hub/utils/validators.rb +236 -0
- data/lib/durable_huggingface_hub/version.rb +8 -0
- data/lib/huggingface_hub.rb +205 -0
- metadata +334 -0
|
@@ -0,0 +1,508 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
require "fileutils"
|
|
5
|
+
require_relative "types"
|
|
6
|
+
require_relative "file_download"
|
|
7
|
+
|
|
8
|
+
module DurableHuggingfaceHub
|
|
9
|
+
module Cache
|
|
10
|
+
# Scans the cache directory and returns comprehensive information about cached content.
|
|
11
|
+
#
|
|
12
|
+
# This method analyzes the cache structure and provides detailed information
|
|
13
|
+
# about all cached repositories, revisions, and files.
|
|
14
|
+
#
|
|
15
|
+
# @param cache_dir [String, Pathname, nil] Custom cache directory path.
|
|
16
|
+
# If nil, uses the default cache directory.
|
|
17
|
+
#
|
|
18
|
+
# @return [DurableHuggingfaceHub::Types::HFCacheInfo] Comprehensive cache information
|
|
19
|
+
#
|
|
20
|
+
# @raise [ArgumentError] If cache_dir is invalid
|
|
21
|
+
#
|
|
22
|
+
# @example Scan default cache directory
|
|
23
|
+
# cache_info = DurableHuggingfaceHub.scan_cache_dir
|
|
24
|
+
#
|
|
25
|
+
# @example Scan custom cache directory
|
|
26
|
+
# cache_info = DurableHuggingfaceHub.scan_cache_dir(cache_dir: "/custom/cache")
|
|
27
|
+
def self.scan_cache_dir(cache_dir: nil)
|
|
28
|
+
cache_dir = FileDownload.resolve_cache_dir(cache_dir)
|
|
29
|
+
|
|
30
|
+
unless cache_dir.exist?
|
|
31
|
+
# Return empty cache info if directory doesn't exist
|
|
32
|
+
return DurableHuggingfaceHub::Types::HFCacheInfo.new(
|
|
33
|
+
cache_dir: cache_dir,
|
|
34
|
+
repos: [],
|
|
35
|
+
size: 0
|
|
36
|
+
)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
repos = []
|
|
40
|
+
total_size = 0
|
|
41
|
+
|
|
42
|
+
# Scan each repository directory
|
|
43
|
+
cache_dir.each_child do |repo_dir|
|
|
44
|
+
next unless repo_dir.directory?
|
|
45
|
+
|
|
46
|
+
repo_info = scan_repository(repo_dir)
|
|
47
|
+
next unless repo_info
|
|
48
|
+
|
|
49
|
+
repos << repo_info
|
|
50
|
+
total_size += repo_info.size
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
DurableHuggingfaceHub::Types::HFCacheInfo.new(
|
|
54
|
+
cache_dir: cache_dir,
|
|
55
|
+
repos: repos,
|
|
56
|
+
size: total_size
|
|
57
|
+
)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Scans a single repository directory and returns repository information.
|
|
61
|
+
#
|
|
62
|
+
# @param repo_dir [Pathname] Repository directory to scan
|
|
63
|
+
# @return [DurableHuggingfaceHub::Types::CachedRepoInfo, nil] Repository info or nil if invalid
|
|
64
|
+
def self.scan_repository(repo_dir)
|
|
65
|
+
# Parse repo_id and repo_type from directory name
|
|
66
|
+
# Format: {repo_type}s--{namespace}--{name} or {repo_type}s--{name}
|
|
67
|
+
dir_name = repo_dir.basename.to_s
|
|
68
|
+
match = dir_name.match(/^(\w+)s--(.+)$/)
|
|
69
|
+
return nil unless match
|
|
70
|
+
|
|
71
|
+
repo_type = match[1] # "model", "dataset", or "space"
|
|
72
|
+
repo_id_part = match[2]
|
|
73
|
+
|
|
74
|
+
# Convert back to repo_id format (handle both namespace/name and just name)
|
|
75
|
+
if repo_id_part.include?("--")
|
|
76
|
+
repo_id = repo_id_part.gsub("--", "/")
|
|
77
|
+
else
|
|
78
|
+
repo_id = repo_id_part
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
revisions = []
|
|
82
|
+
total_size = 0
|
|
83
|
+
last_accessed = nil
|
|
84
|
+
last_modified = nil
|
|
85
|
+
|
|
86
|
+
# Scan snapshots directory
|
|
87
|
+
snapshots_dir = repo_dir.join("snapshots")
|
|
88
|
+
if snapshots_dir.exist?
|
|
89
|
+
snapshots_dir.each_child do |revision_dir|
|
|
90
|
+
next unless revision_dir.directory?
|
|
91
|
+
|
|
92
|
+
revision_info = scan_revision(repo_dir, revision_dir, repo_type)
|
|
93
|
+
next unless revision_info
|
|
94
|
+
|
|
95
|
+
revisions << revision_info
|
|
96
|
+
total_size += revision_info.size
|
|
97
|
+
|
|
98
|
+
# Track last accessed/modified times
|
|
99
|
+
if revision_info.last_modified
|
|
100
|
+
last_modified = [last_modified, revision_info.last_modified].compact.max
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
revision_info.files.each do |file_info|
|
|
104
|
+
if file_info.last_accessed
|
|
105
|
+
last_accessed = [last_accessed, file_info.last_accessed].compact.max
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
return nil if revisions.empty?
|
|
112
|
+
|
|
113
|
+
DurableHuggingfaceHub::Types::CachedRepoInfo.new(
|
|
114
|
+
repo_id: repo_id,
|
|
115
|
+
repo_type: repo_type,
|
|
116
|
+
revisions: revisions,
|
|
117
|
+
size: total_size,
|
|
118
|
+
last_accessed: last_accessed,
|
|
119
|
+
last_modified: last_modified
|
|
120
|
+
)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Scans a revision directory and returns revision information.
|
|
124
|
+
#
|
|
125
|
+
# @param repo_dir [Pathname] Repository directory
|
|
126
|
+
# @param revision_dir [Pathname] Revision directory to scan
|
|
127
|
+
# @param repo_type [String] Type of repository
|
|
128
|
+
# @return [DurableHuggingfaceHub::Types::CachedRevisionInfo, nil] Revision info or nil if invalid
|
|
129
|
+
def self.scan_revision(repo_dir, revision_dir, repo_type)
|
|
130
|
+
commit_hash = revision_dir.basename.to_s
|
|
131
|
+
files = []
|
|
132
|
+
total_size = 0
|
|
133
|
+
last_modified = nil
|
|
134
|
+
|
|
135
|
+
# Get refs pointing to this commit
|
|
136
|
+
refs = get_refs_for_commit(repo_dir, commit_hash)
|
|
137
|
+
|
|
138
|
+
# Scan all files in the revision
|
|
139
|
+
revision_dir.glob("**/*") do |file_path|
|
|
140
|
+
next if file_path.directory?
|
|
141
|
+
|
|
142
|
+
begin
|
|
143
|
+
file_info = scan_file(file_path, commit_hash)
|
|
144
|
+
files << file_info
|
|
145
|
+
total_size += file_info.size
|
|
146
|
+
|
|
147
|
+
if file_info.last_modified
|
|
148
|
+
last_modified = [last_modified, file_info.last_modified].compact.max
|
|
149
|
+
end
|
|
150
|
+
rescue => e
|
|
151
|
+
# Skip files that can't be analyzed
|
|
152
|
+
next
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
return nil if files.empty?
|
|
157
|
+
|
|
158
|
+
DurableHuggingfaceHub::Types::CachedRevisionInfo.new(
|
|
159
|
+
commit_hash: commit_hash,
|
|
160
|
+
refs: refs,
|
|
161
|
+
files: files,
|
|
162
|
+
size: total_size,
|
|
163
|
+
last_modified: last_modified
|
|
164
|
+
)
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Scans a single file and returns file information.
|
|
168
|
+
#
|
|
169
|
+
# @param file_path [Pathname] Path to the file
|
|
170
|
+
# @param commit_hash [String] Commit hash this file belongs to
|
|
171
|
+
# @return [DurableHuggingfaceHub::Types::CachedFileInfo] File information
|
|
172
|
+
def self.scan_file(file_path, commit_hash)
|
|
173
|
+
# Get file stats, handling broken symlinks
|
|
174
|
+
stat = begin
|
|
175
|
+
file_path.stat
|
|
176
|
+
rescue Errno::ENOENT
|
|
177
|
+
# For broken symlinks, use lstat to get link info
|
|
178
|
+
file_path.lstat
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# Try to get ETag from blob metadata if this is a symlink
|
|
182
|
+
etag = nil
|
|
183
|
+
if file_path.symlink?
|
|
184
|
+
begin
|
|
185
|
+
target_path = file_path.readlink
|
|
186
|
+
if target_path.absolute?
|
|
187
|
+
# This should point to a blob file
|
|
188
|
+
blob_name = target_path.basename.to_s
|
|
189
|
+
etag = blob_name if blob_name.match?(/^[a-f0-9]{40,}$/) # SHA-like hash
|
|
190
|
+
end
|
|
191
|
+
rescue Errno::ENOENT
|
|
192
|
+
# Broken symlink, no ETag available
|
|
193
|
+
etag = nil
|
|
194
|
+
end
|
|
195
|
+
else
|
|
196
|
+
# For direct files, we might not have ETag info
|
|
197
|
+
etag = nil
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# Build attributes hash
|
|
201
|
+
attrs = {
|
|
202
|
+
file_path: file_path,
|
|
203
|
+
size: stat.size,
|
|
204
|
+
etag: etag,
|
|
205
|
+
commit_hash: commit_hash,
|
|
206
|
+
last_accessed: stat.atime,
|
|
207
|
+
last_modified: stat.mtime
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
DurableHuggingfaceHub::Types::CachedFileInfo.new(attrs)
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
# Gets refs (branches/tags) that point to a specific commit.
|
|
214
|
+
#
|
|
215
|
+
# @param repo_dir [Pathname] Repository directory
|
|
216
|
+
# @param commit_hash [String] Commit hash to find refs for
|
|
217
|
+
# @return [Array<String>] List of refs pointing to this commit
|
|
218
|
+
def self.get_refs_for_commit(repo_dir, commit_hash)
|
|
219
|
+
refs = []
|
|
220
|
+
refs_dir = repo_dir.join("refs")
|
|
221
|
+
|
|
222
|
+
return refs unless refs_dir.exist?
|
|
223
|
+
|
|
224
|
+
refs_dir.glob("**/*") do |ref_file|
|
|
225
|
+
next if ref_file.directory?
|
|
226
|
+
|
|
227
|
+
begin
|
|
228
|
+
ref_commit = ref_file.read.strip
|
|
229
|
+
if ref_commit == commit_hash
|
|
230
|
+
# Get relative path from refs directory
|
|
231
|
+
rel_path = ref_file.relative_path_from(refs_dir).to_s
|
|
232
|
+
refs << rel_path
|
|
233
|
+
end
|
|
234
|
+
rescue
|
|
235
|
+
# Skip unreadable ref files
|
|
236
|
+
next
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
refs
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
# Get the path to cached assets for a repository.
|
|
244
|
+
#
|
|
245
|
+
# This utility function helps locate cached files and directories for a specific repository.
|
|
246
|
+
#
|
|
247
|
+
# @param repo_id [String] Repository ID
|
|
248
|
+
# @param repo_type [String] Type of repository ("model", "dataset", or "space")
|
|
249
|
+
# @param cache_dir [String, Pathname, nil] Custom cache directory
|
|
250
|
+
# @return [Pathname, nil] Path to the repository's cache directory, or nil if not found
|
|
251
|
+
#
|
|
252
|
+
# @example Get cache path for a model
|
|
253
|
+
# cache_path = DurableHuggingfaceHub::Cache.cached_assets_path(
|
|
254
|
+
# repo_id: "bert-base-uncased",
|
|
255
|
+
# repo_type: "model"
|
|
256
|
+
# )
|
|
257
|
+
# puts cache_path # /home/user/.cache/huggingface/hub/models--bert-base-uncased
|
|
258
|
+
def self.cached_assets_path(repo_id:, repo_type: "model", cache_dir: nil)
|
|
259
|
+
DurableHuggingfaceHub::Utils::Validators.validate_repo_id(repo_id)
|
|
260
|
+
repo_type = DurableHuggingfaceHub::Utils::Validators.validate_repo_type(repo_type)
|
|
261
|
+
|
|
262
|
+
cache_dir = FileDownload.resolve_cache_dir(cache_dir)
|
|
263
|
+
|
|
264
|
+
# Build the expected repository directory name
|
|
265
|
+
repo_id_parts = repo_id.split("/")
|
|
266
|
+
if repo_id_parts.length == 2
|
|
267
|
+
folder_name = "#{repo_type}s--#{repo_id_parts[0]}--#{repo_id_parts[1]}"
|
|
268
|
+
else
|
|
269
|
+
folder_name = "#{repo_type}s--#{repo_id}"
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
repo_path = cache_dir.join(folder_name)
|
|
273
|
+
repo_path.exist? ? repo_path : nil
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
# Strategy for deleting cache entries.
|
|
277
|
+
#
|
|
278
|
+
# This class provides a safe way to plan and execute cache cleanup operations.
|
|
279
|
+
# It allows previewing what will be deleted before actually performing the deletion.
|
|
280
|
+
#
|
|
281
|
+
# @example Delete specific repositories
|
|
282
|
+
# cache_info = DurableHuggingfaceHub.scan_cache_dir
|
|
283
|
+
# repos_to_delete = cache_info.repos.select { |repo| repo.size > 1_000_000_000 } # > 1GB
|
|
284
|
+
# strategy = DeleteCacheStrategy.new(repos: repos_to_delete)
|
|
285
|
+
# puts "Will delete #{strategy.size_to_delete_str}"
|
|
286
|
+
# strategy.execute
|
|
287
|
+
#
|
|
288
|
+
# @example Delete old revisions
|
|
289
|
+
# old_revisions = cache_info.repos.flat_map do |repo|
|
|
290
|
+
# repo.revisions.select { |rev| rev.last_accessed < 30.days.ago }
|
|
291
|
+
# end
|
|
292
|
+
# strategy = DeleteCacheStrategy.new(revisions: old_revisions)
|
|
293
|
+
# strategy.execute
|
|
294
|
+
class DeleteCacheStrategy
|
|
295
|
+
# @return [Array<DurableHuggingfaceHub::Types::CachedRepoInfo>] Repositories to delete
|
|
296
|
+
attr_reader :repos
|
|
297
|
+
|
|
298
|
+
# @return [Array<DurableHuggingfaceHub::Types::CachedRevisionInfo>] Revisions to delete
|
|
299
|
+
attr_reader :revisions
|
|
300
|
+
|
|
301
|
+
# @return [Array<DurableHuggingfaceHub::Types::CachedFileInfo>] Individual files to delete
|
|
302
|
+
attr_reader :files
|
|
303
|
+
|
|
304
|
+
# Initialize a new delete strategy.
|
|
305
|
+
#
|
|
306
|
+
# @param cache_dir [Pathname] The cache directory
|
|
307
|
+
# @param repos [Array<DurableHuggingfaceHub::Types::CachedRepoInfo>] Repositories to delete
|
|
308
|
+
# @param revisions [Array<DurableHuggingfaceHub::Types::CachedRevisionInfo>] Revisions to delete
|
|
309
|
+
# @param files [Array<DurableHuggingfaceHub::Types::CachedFileInfo>] Individual files to delete
|
|
310
|
+
def initialize(cache_dir:, repos: [], revisions: [], files: [])
|
|
311
|
+
@cache_dir = cache_dir
|
|
312
|
+
@repos = repos
|
|
313
|
+
@revisions = revisions
|
|
314
|
+
@files = files
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
# Total size that will be deleted in bytes.
|
|
318
|
+
#
|
|
319
|
+
# @return [Integer] Size in bytes
|
|
320
|
+
def size_to_delete
|
|
321
|
+
@repos.sum(&:size) + @revisions.sum(&:size) + @files.sum(&:size)
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
# Human-readable size string for what will be deleted.
|
|
325
|
+
#
|
|
326
|
+
# @return [String] Size formatted as human-readable string
|
|
327
|
+
def size_to_delete_str
|
|
328
|
+
units = ["B", "KB", "MB", "GB", "TB"]
|
|
329
|
+
size = size_to_delete.to_f
|
|
330
|
+
unit_index = 0
|
|
331
|
+
|
|
332
|
+
while size >= 1024 && unit_index < units.length - 1
|
|
333
|
+
size /= 1024.0
|
|
334
|
+
unit_index += 1
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
format("%.1f %s", size, units[unit_index])
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
# Number of repositories that will be deleted.
|
|
341
|
+
#
|
|
342
|
+
# @return [Integer] Repository count
|
|
343
|
+
def repo_count
|
|
344
|
+
@repos.length
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
# Number of revisions that will be deleted.
|
|
348
|
+
#
|
|
349
|
+
# @return [Integer] Revision count
|
|
350
|
+
def revision_count
|
|
351
|
+
@revisions.length
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
# Number of files that will be deleted.
|
|
355
|
+
#
|
|
356
|
+
# @return [Integer] File count
|
|
357
|
+
def file_count
|
|
358
|
+
@files.length
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
# Preview what will be deleted.
|
|
362
|
+
#
|
|
363
|
+
# @return [String] Human-readable summary of what will be deleted
|
|
364
|
+
def preview
|
|
365
|
+
summary = []
|
|
366
|
+
has_items = repo_count.positive? || revision_count.positive? || file_count.positive?
|
|
367
|
+
|
|
368
|
+
if has_items
|
|
369
|
+
summary << "Will delete:"
|
|
370
|
+
summary << " #{repo_count} repositories" if repo_count.positive?
|
|
371
|
+
summary << " #{revision_count} revisions" if revision_count.positive?
|
|
372
|
+
summary << " #{file_count} files" if file_count.positive?
|
|
373
|
+
summary << "Total size: #{size_to_delete_str}"
|
|
374
|
+
|
|
375
|
+
if repo_count.positive?
|
|
376
|
+
summary << ""
|
|
377
|
+
summary << "Repositories:"
|
|
378
|
+
@repos.each { |repo| summary << " #{repo.repo_id} (#{repo.size_str})" }
|
|
379
|
+
end
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
summary.join("\n")
|
|
383
|
+
end
|
|
384
|
+
|
|
385
|
+
# Execute the deletion strategy.
|
|
386
|
+
#
|
|
387
|
+
# This method will delete all specified repositories, revisions, and files.
|
|
388
|
+
# Use with caution - deletions are permanent.
|
|
389
|
+
#
|
|
390
|
+
# @return [Boolean] True if successful
|
|
391
|
+
def execute
|
|
392
|
+
# Delete individual files first
|
|
393
|
+
@files.each do |file_info|
|
|
394
|
+
delete_file_safely(file_info.file_path)
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
# Delete revisions
|
|
398
|
+
@revisions.each do |revision_info|
|
|
399
|
+
delete_revision_safely(revision_info)
|
|
400
|
+
end
|
|
401
|
+
|
|
402
|
+
# Delete entire repositories
|
|
403
|
+
@repos.each do |repo_info|
|
|
404
|
+
delete_repository_safely(repo_info)
|
|
405
|
+
end
|
|
406
|
+
|
|
407
|
+
true
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
private
|
|
411
|
+
|
|
412
|
+
# Safely delete a file.
|
|
413
|
+
#
|
|
414
|
+
# @param file_path [Pathname] Path to file to delete
|
|
415
|
+
def delete_file_safely(file_path)
|
|
416
|
+
return unless file_path.exist?
|
|
417
|
+
|
|
418
|
+
# If it's a symlink, just remove the symlink
|
|
419
|
+
if file_path.symlink?
|
|
420
|
+
file_path.unlink
|
|
421
|
+
else
|
|
422
|
+
# For regular files, remove them
|
|
423
|
+
file_path.unlink
|
|
424
|
+
end
|
|
425
|
+
rescue => e
|
|
426
|
+
# Log error but continue with other deletions
|
|
427
|
+
warn "Failed to delete #{file_path}: #{e.message}"
|
|
428
|
+
end
|
|
429
|
+
|
|
430
|
+
# Safely delete a revision.
|
|
431
|
+
#
|
|
432
|
+
# @param revision_info [DurableHuggingfaceHub::Types::CachedRevisionInfo] Revision to delete
|
|
433
|
+
def delete_revision_safely(revision_info)
|
|
434
|
+
# Find the revision directory
|
|
435
|
+
repo_dir = find_repo_dir_for_revision(revision_info)
|
|
436
|
+
return unless repo_dir
|
|
437
|
+
|
|
438
|
+
revision_dir = repo_dir.join("snapshots", revision_info.commit_hash)
|
|
439
|
+
return unless revision_dir.exist?
|
|
440
|
+
|
|
441
|
+
# Remove the entire revision directory
|
|
442
|
+
FileUtils.rm_rf(revision_dir)
|
|
443
|
+
|
|
444
|
+
# Clean up refs that pointed to this revision
|
|
445
|
+
cleanup_refs_for_revision(repo_dir, revision_info.commit_hash)
|
|
446
|
+
rescue => e
|
|
447
|
+
warn "Failed to delete revision #{revision_info.commit_hash}: #{e.message}"
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
# Safely delete an entire repository.
|
|
451
|
+
#
|
|
452
|
+
# @param repo_info [DurableHuggingfaceHub::Types::CachedRepoInfo] Repository to delete
|
|
453
|
+
def delete_repository_safely(repo_info)
|
|
454
|
+
# Find the repository directory
|
|
455
|
+
repo_dir_name = "#{repo_info.repo_type}s--#{repo_info.repo_id.gsub('/', '--')}"
|
|
456
|
+
repo_dir = @cache_dir.join(repo_dir_name)
|
|
457
|
+
|
|
458
|
+
return unless repo_dir.exist?
|
|
459
|
+
|
|
460
|
+
# Remove the entire repository directory
|
|
461
|
+
FileUtils.rm_rf(repo_dir)
|
|
462
|
+
rescue => e
|
|
463
|
+
warn "Failed to delete repository #{repo_info.repo_id}: #{e.message}"
|
|
464
|
+
end
|
|
465
|
+
|
|
466
|
+
# Find repository directory for a revision.
|
|
467
|
+
#
|
|
468
|
+
# @param revision_info [DurableHuggingfaceHub::Types::CachedRevisionInfo] Revision info
|
|
469
|
+
# @return [Pathname, nil] Repository directory or nil if not found
|
|
470
|
+
def find_repo_dir_for_revision(revision_info)
|
|
471
|
+
# This is a simplified implementation - in practice we'd need to track
|
|
472
|
+
# which repository each revision belongs to
|
|
473
|
+
@cache_dir.each_child do |repo_dir|
|
|
474
|
+
next unless repo_dir.directory?
|
|
475
|
+
|
|
476
|
+
snapshots_dir = repo_dir.join("snapshots")
|
|
477
|
+
next unless snapshots_dir.exist?
|
|
478
|
+
|
|
479
|
+
revision_dir = snapshots_dir.join(revision_info.commit_hash)
|
|
480
|
+
return repo_dir if revision_dir.exist?
|
|
481
|
+
end
|
|
482
|
+
|
|
483
|
+
nil
|
|
484
|
+
end
|
|
485
|
+
|
|
486
|
+
# Clean up refs that pointed to a deleted revision.
|
|
487
|
+
#
|
|
488
|
+
# @param repo_dir [Pathname] Repository directory
|
|
489
|
+
# @param commit_hash [String] Commit hash that was deleted
|
|
490
|
+
def cleanup_refs_for_revision(repo_dir, commit_hash)
|
|
491
|
+
refs_dir = repo_dir.join("refs")
|
|
492
|
+
return unless refs_dir.exist?
|
|
493
|
+
|
|
494
|
+
refs_dir.glob("**/*") do |ref_file|
|
|
495
|
+
next if ref_file.directory?
|
|
496
|
+
|
|
497
|
+
begin
|
|
498
|
+
ref_commit = ref_file.read.strip
|
|
499
|
+
ref_file.unlink if ref_commit == commit_hash
|
|
500
|
+
rescue
|
|
501
|
+
# Skip unreadable ref files
|
|
502
|
+
next
|
|
503
|
+
end
|
|
504
|
+
end
|
|
505
|
+
end
|
|
506
|
+
end
|
|
507
|
+
end
|
|
508
|
+
end
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
|
|
5
|
+
module DurableHuggingfaceHub
|
|
6
|
+
# Configuration management for the HuggingFace Hub client.
|
|
7
|
+
#
|
|
8
|
+
# This class provides a singleton configuration object that can be accessed
|
|
9
|
+
# and modified throughout the library. Configuration values are read from
|
|
10
|
+
# environment variables or can be set programmatically.
|
|
11
|
+
#
|
|
12
|
+
# @example Accessing the configuration
|
|
13
|
+
# DurableHuggingfaceHub::Configuration.instance.token
|
|
14
|
+
#
|
|
15
|
+
# @example Configuring programmatically
|
|
16
|
+
# DurableHuggingfaceHub.configure do |config|
|
|
17
|
+
# config.token = "hf_your_token_here"
|
|
18
|
+
# config.cache_dir = "/custom/cache/path"
|
|
19
|
+
# end
|
|
20
|
+
class Configuration
|
|
21
|
+
# @return [String, nil] HuggingFace API token
|
|
22
|
+
attr_accessor :token
|
|
23
|
+
|
|
24
|
+
# @return [String] Base cache directory for HuggingFace Hub files
|
|
25
|
+
attr_accessor :cache_dir
|
|
26
|
+
|
|
27
|
+
# @return [String] HuggingFace Hub endpoint URL
|
|
28
|
+
attr_accessor :endpoint
|
|
29
|
+
|
|
30
|
+
# @return [Boolean] Whether to operate in offline mode
|
|
31
|
+
attr_accessor :offline
|
|
32
|
+
|
|
33
|
+
# @return [Boolean] Whether to disable progress bars during downloads
|
|
34
|
+
attr_accessor :disable_progress_bars
|
|
35
|
+
|
|
36
|
+
# @return [Boolean] Whether to disable telemetry
|
|
37
|
+
attr_accessor :disable_telemetry
|
|
38
|
+
|
|
39
|
+
# @return [Integer] Default timeout for API requests
|
|
40
|
+
attr_accessor :request_timeout
|
|
41
|
+
|
|
42
|
+
# @return [Integer] Default timeout for downloads
|
|
43
|
+
attr_accessor :download_timeout
|
|
44
|
+
|
|
45
|
+
# Creates a new Configuration instance with default values.
|
|
46
|
+
#
|
|
47
|
+
# Configuration values are read from environment variables if available,
|
|
48
|
+
# otherwise sensible defaults are used.
|
|
49
|
+
def initialize
|
|
50
|
+
@token = env_var("HF_TOKEN") || env_var("HUGGING_FACE_HUB_TOKEN")
|
|
51
|
+
@cache_dir = determine_cache_dir
|
|
52
|
+
@endpoint = env_var("HF_ENDPOINT") || Constants::ENDPOINT
|
|
53
|
+
@offline = parse_boolean(env_var("HF_HUB_OFFLINE"), default: false)
|
|
54
|
+
@disable_progress_bars = parse_boolean(env_var("HF_HUB_DISABLE_PROGRESS_BARS"), default: false)
|
|
55
|
+
@disable_telemetry = parse_boolean(env_var("HF_HUB_DISABLE_TELEMETRY"), default: true)
|
|
56
|
+
@request_timeout = parse_integer(env_var("HF_HUB_REQUEST_TIMEOUT"),
|
|
57
|
+
default: Constants::DEFAULT_REQUEST_TIMEOUT)
|
|
58
|
+
@download_timeout = parse_integer(env_var("HF_HUB_DOWNLOAD_TIMEOUT"),
|
|
59
|
+
default: Constants::DEFAULT_DOWNLOAD_TIMEOUT)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Returns the singleton configuration instance.
|
|
63
|
+
#
|
|
64
|
+
# @return [Configuration] The singleton configuration object
|
|
65
|
+
def self.instance
|
|
66
|
+
@instance ||= new
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Resets the configuration to default values.
|
|
70
|
+
# Primarily used for testing.
|
|
71
|
+
#
|
|
72
|
+
# @return [Configuration] A new configuration instance
|
|
73
|
+
def self.reset!
|
|
74
|
+
@instance = new
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Returns the path to the HuggingFace Hub cache directory.
|
|
78
|
+
#
|
|
79
|
+
# The cache directory is created if it doesn't exist.
|
|
80
|
+
#
|
|
81
|
+
# @return [Pathname] Path to the HuggingFace Hub cache
|
|
82
|
+
def hub_cache_dir
|
|
83
|
+
path = Pathname.new(cache_dir).join(Constants::HF_CACHE_SUBDIR)
|
|
84
|
+
path.mkpath unless path.exist?
|
|
85
|
+
path
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Returns the path to the token file.
|
|
89
|
+
#
|
|
90
|
+
# @return [Pathname] Path to the token storage file
|
|
91
|
+
def token_path
|
|
92
|
+
Pathname.new(cache_dir).join("token")
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
private
|
|
96
|
+
|
|
97
|
+
# Retrieves an environment variable value.
|
|
98
|
+
#
|
|
99
|
+
# @param key [String] The environment variable name
|
|
100
|
+
# @return [String, nil] The environment variable value or nil if not set
|
|
101
|
+
def env_var(key)
|
|
102
|
+
value = ENV[key]
|
|
103
|
+
value&.empty? ? nil : value
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Parses a boolean value from a string.
|
|
107
|
+
#
|
|
108
|
+
# Recognizes common boolean representations:
|
|
109
|
+
# - true: "1", "true", "yes", "on" (case-insensitive)
|
|
110
|
+
# - false: "0", "false", "no", "off" (case-insensitive)
|
|
111
|
+
#
|
|
112
|
+
# @param value [String, nil] The string value to parse
|
|
113
|
+
# @param default [Boolean] Default value if parsing fails
|
|
114
|
+
# @return [Boolean] The parsed boolean value
|
|
115
|
+
def parse_boolean(value, default: false)
|
|
116
|
+
return default if value.nil?
|
|
117
|
+
|
|
118
|
+
case value.downcase.strip
|
|
119
|
+
when "1", "true", "yes", "on"
|
|
120
|
+
true
|
|
121
|
+
when "0", "false", "no", "off"
|
|
122
|
+
false
|
|
123
|
+
else
|
|
124
|
+
default
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Parses an integer value from a string.
|
|
129
|
+
#
|
|
130
|
+
# @param value [String, nil] The string value to parse
|
|
131
|
+
# @param default [Integer] Default value if parsing fails
|
|
132
|
+
# @return [Integer] The parsed integer value
|
|
133
|
+
def parse_integer(value, default:)
|
|
134
|
+
return default if value.nil?
|
|
135
|
+
|
|
136
|
+
Integer(value)
|
|
137
|
+
rescue ArgumentError
|
|
138
|
+
default
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Determines the cache directory from environment variables or defaults.
|
|
142
|
+
#
|
|
143
|
+
# Priority order:
|
|
144
|
+
# 1. HF_HOME
|
|
145
|
+
# 2. XDG_CACHE_HOME/huggingface
|
|
146
|
+
# 3. ~/.cache/huggingface (Linux/Mac)
|
|
147
|
+
# 4. ~/AppData/Local/huggingface (Windows)
|
|
148
|
+
#
|
|
149
|
+
# @return [String] Path to the cache directory
|
|
150
|
+
def determine_cache_dir
|
|
151
|
+
if (hf_home = env_var("HF_HOME"))
|
|
152
|
+
return hf_home
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
if (xdg_cache = env_var("XDG_CACHE_HOME"))
|
|
156
|
+
return Pathname.new(xdg_cache).join("huggingface").to_s
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# Default cache locations by platform
|
|
160
|
+
home = Dir.home
|
|
161
|
+
if Gem.win_platform?
|
|
162
|
+
Pathname.new(home).join("AppData", "Local", "huggingface").to_s
|
|
163
|
+
else
|
|
164
|
+
Pathname.new(home).join(".cache", "huggingface").to_s
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Provides a convenient way to configure the library.
|
|
170
|
+
#
|
|
171
|
+
# @example
|
|
172
|
+
# DurableHuggingfaceHub.configure do |config|
|
|
173
|
+
# config.token = "hf_your_token"
|
|
174
|
+
# config.cache_dir = "/tmp/hf_cache"
|
|
175
|
+
# end
|
|
176
|
+
#
|
|
177
|
+
# @yield [config] Yields the configuration object for modification
|
|
178
|
+
# @yieldparam config [Configuration] The configuration object
|
|
179
|
+
# @return [Configuration] The configuration object
|
|
180
|
+
def self.configure
|
|
181
|
+
yield(Configuration.instance) if block_given?
|
|
182
|
+
Configuration.instance
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# Returns the current configuration.
|
|
186
|
+
#
|
|
187
|
+
# @return [Configuration] The current configuration object
|
|
188
|
+
def self.config
|
|
189
|
+
Configuration.instance
|
|
190
|
+
end
|
|
191
|
+
end
|