durable_huggingface_hub 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +7 -0
  2. data/.editorconfig +29 -0
  3. data/.rubocop.yml +108 -0
  4. data/CHANGELOG.md +127 -0
  5. data/README.md +547 -0
  6. data/Rakefile +106 -0
  7. data/devenv.lock +171 -0
  8. data/devenv.nix +15 -0
  9. data/devenv.yaml +8 -0
  10. data/huggingface_hub.gemspec +63 -0
  11. data/lib/durable_huggingface_hub/authentication.rb +245 -0
  12. data/lib/durable_huggingface_hub/cache.rb +508 -0
  13. data/lib/durable_huggingface_hub/configuration.rb +191 -0
  14. data/lib/durable_huggingface_hub/constants.rb +145 -0
  15. data/lib/durable_huggingface_hub/errors.rb +412 -0
  16. data/lib/durable_huggingface_hub/file_download.rb +831 -0
  17. data/lib/durable_huggingface_hub/hf_api.rb +1278 -0
  18. data/lib/durable_huggingface_hub/repo_card.rb +430 -0
  19. data/lib/durable_huggingface_hub/types/cache_info.rb +298 -0
  20. data/lib/durable_huggingface_hub/types/commit_info.rb +149 -0
  21. data/lib/durable_huggingface_hub/types/dataset_info.rb +158 -0
  22. data/lib/durable_huggingface_hub/types/model_info.rb +154 -0
  23. data/lib/durable_huggingface_hub/types/space_info.rb +158 -0
  24. data/lib/durable_huggingface_hub/types/user.rb +179 -0
  25. data/lib/durable_huggingface_hub/types.rb +205 -0
  26. data/lib/durable_huggingface_hub/utils/auth.rb +174 -0
  27. data/lib/durable_huggingface_hub/utils/headers.rb +220 -0
  28. data/lib/durable_huggingface_hub/utils/http.rb +329 -0
  29. data/lib/durable_huggingface_hub/utils/paths.rb +230 -0
  30. data/lib/durable_huggingface_hub/utils/progress.rb +217 -0
  31. data/lib/durable_huggingface_hub/utils/retry.rb +165 -0
  32. data/lib/durable_huggingface_hub/utils/validators.rb +236 -0
  33. data/lib/durable_huggingface_hub/version.rb +8 -0
  34. data/lib/huggingface_hub.rb +205 -0
  35. metadata +334 -0
@@ -0,0 +1,298 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../types"
4
+
5
+ module DurableHuggingfaceHub
6
+ module Types
7
+ # Information about a cached file.
8
+ #
9
+ # Represents a single file in the cache with its metadata.
10
+ #
11
+ # @example
12
+ # cached_file = CachedFileInfo.new(
13
+ # file_path: Pathname.new("/cache/blobs/abc123"),
14
+ # size: 1024,
15
+ # etag: "abc123",
16
+ # commit_hash: "def456",
17
+ # last_accessed: Time.now,
18
+ # last_modified: Time.now
19
+ # )
20
+ class CachedFileInfo < DurableHuggingfaceHub::Struct
21
+ include Loadable
22
+
23
+ # @!attribute [r] file_path
24
+ # Path to the cached file
25
+ # @return [Pathname]
26
+ attribute :file_path, Types::PathnameType
27
+
28
+ # @!attribute [r] size
29
+ # Size of the file in bytes
30
+ # @return [Integer]
31
+ attribute :size, Types::Integer
32
+
33
+ # @!attribute [r] etag
34
+ # ETag of the file (used for cache validation)
35
+ # @return [String, nil]
36
+ attribute :etag, Types::OptionalString
37
+
38
+ # @!attribute [r] commit_hash
39
+ # Git commit hash this file belongs to
40
+ # @return [String, nil]
41
+ attribute :commit_hash, Types::OptionalString.default(nil)
42
+
43
+ # @!attribute [r] last_accessed
44
+ # When the file was last accessed
45
+ # @return [Time, nil]
46
+ attribute :last_accessed, Types::OptionalTimestamp.default(nil)
47
+
48
+ # @!attribute [r] last_modified
49
+ # When the file was last modified
50
+ # @return [Time, nil]
51
+ attribute :last_modified, Types::OptionalTimestamp.default(nil)
52
+
53
+ # Human-readable size string.
54
+ #
55
+ # @return [String] Size formatted as human-readable string (e.g., "1.2 MB")
56
+ def size_str
57
+ units = ["B", "KB", "MB", "GB", "TB"]
58
+ size = self.size.to_f
59
+ unit_index = 0
60
+
61
+ while size >= 1024 && unit_index < units.length - 1
62
+ size /= 1024.0
63
+ unit_index += 1
64
+ end
65
+
66
+ format("%.2f %s", size, units[unit_index])
67
+ end
68
+ end
69
+
70
+ # Information about a cached repository revision.
71
+ #
72
+ # Represents a specific revision (commit, branch, or tag) of a repository in the cache.
73
+ #
74
+ # @example
75
+ # revision = CachedRevisionInfo.new(
76
+ # commit_hash: "abc123",
77
+ # refs: ["main", "v1.0"],
78
+ # files: [cached_file_info1, cached_file_info2],
79
+ # size: 2048,
80
+ # last_modified: Time.now
81
+ # )
82
+ class CachedRevisionInfo < DurableHuggingfaceHub::Struct
83
+ include Loadable
84
+
85
+ # @!attribute [r] commit_hash
86
+ # Git commit hash for this revision
87
+ # @return [String]
88
+ attribute :commit_hash, Types::String
89
+
90
+ # @!attribute [r] refs
91
+ # List of refs (branches/tags) pointing to this commit
92
+ # @return [Array<String>]
93
+ attribute :refs, Types::StringArray
94
+
95
+ # @!attribute [r] files
96
+ # List of cached files in this revision
97
+ # @return [Array<CachedFileInfo>]
98
+ attribute :files, Types::Array.of(CachedFileInfo)
99
+
100
+ # @!attribute [r] size
101
+ # Total size of all files in this revision
102
+ # @return [Integer]
103
+ attribute :size, Types::Integer
104
+
105
+ # @!attribute [r] last_modified
106
+ # When this revision was last modified
107
+ # @return [Time, nil]
108
+ attribute :last_modified, Types::OptionalTimestamp.default(nil)
109
+
110
+ # Human-readable size string.
111
+ #
112
+ # @return [String] Size formatted as human-readable string
113
+ def size_str
114
+ units = ["B", "KB", "MB", "GB", "TB"]
115
+ size = self.size.to_f
116
+ unit_index = 0
117
+
118
+ while size >= 1024 && unit_index < units.length - 1
119
+ size /= 1024.0
120
+ unit_index += 1
121
+ end
122
+
123
+ format("%.2f %s", size, units[unit_index])
124
+ end
125
+
126
+ # Number of files in this revision.
127
+ #
128
+ # @return [Integer] File count
129
+ def file_count
130
+ files.length
131
+ end
132
+ end
133
+
134
+ # Information about a cached repository.
135
+ #
136
+ # Represents a repository in the cache with all its revisions and files.
137
+ #
138
+ # @example
139
+ # repo = CachedRepoInfo.new(
140
+ # repo_id: "bert-base-uncased",
141
+ # repo_type: "model",
142
+ # revisions: [revision_info1, revision_info2],
143
+ # size: 1048576,
144
+ # last_accessed: Time.now,
145
+ # last_modified: Time.now
146
+ # )
147
+ class CachedRepoInfo < DurableHuggingfaceHub::Struct
148
+ include Loadable
149
+
150
+ # @!attribute [r] repo_id
151
+ # Repository identifier
152
+ # @return [String]
153
+ attribute :repo_id, Types::String
154
+
155
+ # @!attribute [r] repo_type
156
+ # Type of repository ("model", "dataset", or "space")
157
+ # @return [String]
158
+ attribute :repo_type, Types::String
159
+
160
+ # @!attribute [r] revisions
161
+ # List of cached revisions for this repository
162
+ # @return [Array<CachedRevisionInfo>]
163
+ attribute :revisions, Types::Array.of(CachedRevisionInfo)
164
+
165
+ # @!attribute [r] size
166
+ # Total size of all revisions in this repository
167
+ # @return [Integer]
168
+ attribute :size, Types::Integer
169
+
170
+ # @!attribute [r] last_accessed
171
+ # When the repository was last accessed
172
+ # @return [Time, nil]
173
+ attribute :last_accessed, Types::OptionalTimestamp.default(nil)
174
+
175
+ # @!attribute [r] last_modified
176
+ # When the repository was last modified
177
+ # @return [Time, nil]
178
+ attribute :last_modified, Types::OptionalTimestamp.default(nil)
179
+
180
+ # Human-readable size string.
181
+ #
182
+ # @return [String] Size formatted as human-readable string
183
+ def size_str
184
+ units = ["B", "KB", "MB", "GB", "TB"]
185
+ size = self.size.to_f
186
+ unit_index = 0
187
+
188
+ while size >= 1024 && unit_index < units.length - 1
189
+ size /= 1024.0
190
+ unit_index += 1
191
+ end
192
+
193
+ format("%.2f %s", size, units[unit_index])
194
+ end
195
+
196
+ # Number of revisions cached for this repository.
197
+ #
198
+ # @return [Integer] Revision count
199
+ def revision_count
200
+ revisions.length
201
+ end
202
+
203
+ # Total number of files across all revisions.
204
+ #
205
+ # @return [Integer] Total file count
206
+ def file_count
207
+ revisions.sum(&:file_count)
208
+ end
209
+ end
210
+
211
+ # Comprehensive cache information.
212
+ #
213
+ # Contains information about the entire cache directory including all repositories.
214
+ #
215
+ # @example
216
+ # cache_info = HFCacheInfo.new(
217
+ # cache_dir: Pathname.new("/cache"),
218
+ # repos: [repo_info1, repo_info2],
219
+ # size: 2097152
220
+ # )
221
+ class HFCacheInfo < DurableHuggingfaceHub::Struct
222
+ include Loadable
223
+
224
+ # @!attribute [r] cache_dir
225
+ # Path to the cache directory
226
+ # @return [Pathname]
227
+ attribute :cache_dir, Types::PathnameType
228
+
229
+ # @!attribute [r] repos
230
+ # List of cached repositories
231
+ # @return [Array<CachedRepoInfo>]
232
+ attribute :repos, Types::Array.of(CachedRepoInfo)
233
+
234
+ # @!attribute [r] size
235
+ # Total size of the cache in bytes
236
+ # @return [Integer]
237
+ attribute :size, Types::Integer
238
+
239
+ # Human-readable size string.
240
+ #
241
+ # @return [String] Size formatted as human-readable string
242
+ def size_str
243
+ units = ["B", "KB", "MB", "GB", "TB"]
244
+ size = self.size.to_f
245
+ unit_index = 0
246
+
247
+ while size >= 1024 && unit_index < units.length - 1
248
+ size /= 1024.0
249
+ unit_index += 1
250
+ end
251
+
252
+ format("%.2f %s", size, units[unit_index])
253
+ end
254
+
255
+ # Number of repositories in the cache.
256
+ #
257
+ # @return [Integer] Repository count
258
+ def repo_count
259
+ repos.length
260
+ end
261
+
262
+ # Total number of revisions across all repositories.
263
+ #
264
+ # @return [Integer] Total revision count
265
+ def revision_count
266
+ repos.sum(&:revision_count)
267
+ end
268
+
269
+ # Total number of files across all repositories and revisions.
270
+ #
271
+ # @return [Integer] Total file count
272
+ def file_count
273
+ repos.sum(&:file_count)
274
+ end
275
+
276
+ # Get repositories sorted by size (largest first).
277
+ #
278
+ # @return [Array<CachedRepoInfo>] Repositories sorted by size
279
+ def repos_by_size
280
+ repos.sort_by { |repo| -repo.size }
281
+ end
282
+
283
+ # Get repositories sorted by last accessed time (most recent first).
284
+ #
285
+ # @return [Array<CachedRepoInfo>] Repositories sorted by access time
286
+ def repos_by_last_accessed
287
+ repos.compact.sort_by { |repo| repo.last_accessed || Time.at(0) }.reverse
288
+ end
289
+
290
+ # Get repositories sorted by last modified time (most recent first).
291
+ #
292
+ # @return [Array<CachedRepoInfo>] Repositories sorted by modification time
293
+ def repos_by_last_modified
294
+ repos.compact.sort_by { |repo| repo.last_modified || Time.at(0) }.reverse
295
+ end
296
+ end
297
+ end
298
+ end
@@ -0,0 +1,149 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../types"
4
+
5
+ module DurableHuggingfaceHub
6
+ module Types
7
+ # Information about a Git commit in a HuggingFace Hub repository.
8
+ #
9
+ # @example Creating a CommitInfo from API response
10
+ # commit_info = CommitInfo.from_hash({
11
+ # "oid" => "a1b2c3d4e5f6...",
12
+ # "title" => "Update model weights",
13
+ # "message" => "Update model weights\n\nImproved accuracy",
14
+ # "date" => "2024-01-15T10:30:00Z"
15
+ # })
16
+ #
17
+ # @example Accessing commit information
18
+ # commit_info.oid # => "a1b2c3d4e5f6..."
19
+ # commit_info.title # => "Update model weights"
20
+ class CommitInfo < DurableHuggingfaceHub::Struct
21
+ include Loadable
22
+
23
+ # @!attribute [r] oid
24
+ # @return [String] Commit OID (Git SHA)
25
+ attribute :oid, Types::String
26
+
27
+ # @!attribute [r] title
28
+ # @return [String] Commit title (first line of message)
29
+ attribute :title, Types::String
30
+
31
+ # @!attribute [r] message
32
+ # @return [String, nil] Full commit message
33
+ attribute :message, Types::OptionalString.default(nil)
34
+
35
+ # @!attribute [r] date
36
+ # @return [Time, nil] Commit timestamp
37
+ attribute :date, Types::OptionalTimestamp.default(nil)
38
+
39
+ # @!attribute [r] authors
40
+ # @return [Array<String>, nil] Commit authors
41
+ attribute :authors, Types::OptionalStringArray.default(nil)
42
+
43
+ # @!attribute [r] commit_url
44
+ # @return [String, nil] URL to view the commit
45
+ attribute :commit_url, Types::OptionalString.default(nil)
46
+
47
+ # @!attribute [r] commit_message
48
+ # @return [String, nil] Alias for message (API compatibility)
49
+ attribute :commit_message, Types::OptionalString.default(nil)
50
+
51
+ # Returns the short OID (first 7 characters).
52
+ #
53
+ # @return [String] Short OID
54
+ def short_oid
55
+ oid[0, 7]
56
+ end
57
+
58
+ # Returns the commit message (preferring message over commit_message).
59
+ #
60
+ # @return [String, nil] Commit message
61
+ def full_message
62
+ message || commit_message
63
+ end
64
+
65
+ # Returns a short description of the commit.
66
+ #
67
+ # @return [String] Description string
68
+ def to_s
69
+ "#{short_oid}: #{title}"
70
+ end
71
+
72
+ # Returns a detailed inspection string.
73
+ #
74
+ # @return [String] Inspection string
75
+ def inspect
76
+ "#<#{self.class.name} oid=#{short_oid.inspect} title=#{title[0, 50].inspect}>"
77
+ end
78
+ end
79
+
80
+ # Information about a Git reference (branch or tag) in a HuggingFace Hub repository.
81
+ #
82
+ # @example Creating a GitRefInfo from API response
83
+ # ref_info = GitRefInfo.from_hash({
84
+ # "name" => "main",
85
+ # "ref" => "refs/heads/main",
86
+ # "targetCommit" => "a1b2c3d4e5f6..."
87
+ # })
88
+ #
89
+ # @example Accessing ref information
90
+ # ref_info.name # => "main"
91
+ # ref_info.target_commit # => "a1b2c3d4e5f6..."
92
+ class GitRefInfo < DurableHuggingfaceHub::Struct
93
+ include Loadable
94
+
95
+ # @!attribute [r] name
96
+ # @return [String] Reference name (e.g., "main", "v1.0.0")
97
+ attribute :name, Types::String
98
+
99
+ # @!attribute [r] ref
100
+ # @return [String] Full reference path (e.g., "refs/heads/main")
101
+ attribute :ref, Types::String
102
+
103
+ # @!attribute [r] target_commit
104
+ # @return [String, nil] Target commit OID
105
+ attribute :target_commit, Types::OptionalString.default(nil)
106
+
107
+
108
+
109
+ # Checks if this is a branch reference.
110
+ #
111
+ # @return [Boolean] True if branch
112
+ def branch?
113
+ ref.start_with?("refs/heads/")
114
+ end
115
+
116
+ # Checks if this is a tag reference.
117
+ #
118
+ # @return [Boolean] True if tag
119
+ def tag?
120
+ ref.start_with?("refs/tags/")
121
+ end
122
+
123
+ # Returns the reference type.
124
+ #
125
+ # @return [String] "branch", "tag", or "unknown"
126
+ def ref_type
127
+ return "branch" if branch?
128
+ return "tag" if tag?
129
+
130
+ "unknown"
131
+ end
132
+
133
+ # Returns a short description of the ref.
134
+ #
135
+ # @return [String] Description string
136
+ def to_s
137
+ "#{ref_type}: #{name}"
138
+ end
139
+
140
+ # Returns a detailed inspection string.
141
+ #
142
+ # @return [String] Inspection string
143
+ def inspect
144
+ "#<#{self.class.name} name=#{name.inspect} type=#{ref_type} " \
145
+ "commit=#{target_commit&.[](0, 7).inspect}>"
146
+ end
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,158 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../types"
4
+
5
+ module DurableHuggingfaceHub
6
+ module Types
7
+ # Information about a dataset repository on HuggingFace Hub.
8
+ #
9
+ # This structure represents metadata about a dataset, including its ID,
10
+ # tags, files, statistics, and configuration.
11
+ #
12
+ # @example Creating a DatasetInfo from API response
13
+ # dataset_info = DatasetInfo.from_hash({
14
+ # "id" => "squad",
15
+ # "sha" => "a1b2c3d4...",
16
+ # "tags" => ["question-answering", "en"],
17
+ # "downloads" => 500000
18
+ # })
19
+ #
20
+ # @example Accessing dataset information
21
+ # dataset_info.id # => "squad"
22
+ # dataset_info.tags # => ["question-answering", "en"]
23
+ # dataset_info.downloads # => 500000
24
+ class DatasetInfo < DurableHuggingfaceHub::Struct
25
+ include Loadable
26
+
27
+ # @!attribute [r] id
28
+ # @return [String] Dataset repository ID
29
+ attribute :id, Types::RepoId
30
+
31
+ # @!attribute [r] sha
32
+ # @return [String, nil] Git commit SHA of the current revision
33
+ attribute :sha, Types::OptionalString.default(nil)
34
+
35
+ # @!attribute [r] last_modified
36
+ # @return [Time, nil] Timestamp of last modification
37
+ attribute :last_modified, Types::OptionalTimestamp.default(nil)
38
+
39
+ # @!attribute [r] tags
40
+ # @return [Array<String>] Tags associated with the dataset
41
+ attribute :tags, Types::StringArray.default([].freeze)
42
+
43
+ # @!attribute [r] siblings
44
+ # @return [Array<Hash>, nil] List of files in the repository
45
+ attribute :siblings, Types::OptionalFileSiblings.default(nil)
46
+
47
+ # @!attribute [r] private
48
+ # @return [Boolean, nil] Whether the repository is private
49
+ attribute :private, Types::OptionalBool.default(nil)
50
+
51
+ # @!attribute [r] gated
52
+ # @return [Boolean, String, nil] Gated access status
53
+ attribute :gated, Types::OptionalGated.default(nil)
54
+
55
+ # @!attribute [r] disabled
56
+ # @return [Boolean, nil] Whether the repository is disabled
57
+ attribute :disabled, Types::OptionalBool.default(nil)
58
+
59
+ # @!attribute [r] downloads
60
+ # @return [Integer, nil] Total number of downloads
61
+ attribute :downloads, Types::OptionalInteger.default(nil)
62
+
63
+ # @!attribute [r] likes
64
+ # @return [Integer, nil] Number of likes/stars
65
+ attribute :likes, Types::OptionalInteger.default(nil)
66
+
67
+ # @!attribute [r] author
68
+ # @return [String, nil] Author/organization name
69
+ attribute :author, Types::OptionalString.default(nil)
70
+
71
+ # @!attribute [r] created_at
72
+ # @return [Time, nil] Repository creation timestamp
73
+ attribute :created_at, Types::OptionalTimestamp.default(nil)
74
+
75
+ # @!attribute [r] card_data
76
+ # @return [Hash, nil] Dataset card metadata
77
+ attribute :card_data, Types::OptionalHash.default(nil)
78
+
79
+ # @!attribute [r] description
80
+ # @return [String, nil] Dataset description
81
+ attribute :description, Types::OptionalString.default(nil)
82
+
83
+ # @!attribute [r] citation
84
+ # @return [String, nil] Citation information
85
+ attribute :citation, Types::OptionalString.default(nil)
86
+
87
+ # @!attribute [r] downloads_all_time
88
+ # @return [Integer, nil] Total number of downloads all time
89
+ attribute :downloads_all_time, Types::OptionalInteger.default(nil)
90
+
91
+ # @!attribute [r] paperswithcode_id
92
+ # @return [String, nil] PapersWithCode identifier
93
+ attribute :paperswithcode_id, Types::OptionalString.default(nil)
94
+
95
+ # @!attribute [r] trending_score
96
+ # @return [Integer, nil] Trending score
97
+ attribute :trending_score, Types::OptionalInteger.default(nil)
98
+
99
+ # Returns the list of file names in the repository.
100
+ #
101
+ # @return [Array<String>] File names
102
+ def file_names
103
+ return [] if siblings.nil?
104
+
105
+ siblings.map { |s| s[:rfilename] || s["rfilename"] }.compact
106
+ end
107
+
108
+ # Checks if the dataset has a specific tag.
109
+ #
110
+ # @param tag [String] Tag to check for
111
+ # @return [Boolean] True if the tag is present
112
+ def has_tag?(tag)
113
+ tags.include?(tag)
114
+ end
115
+
116
+ # Checks if the repository is public.
117
+ #
118
+ # @return [Boolean] True if public
119
+ def public?
120
+ !private
121
+ end
122
+
123
+ # Checks if the repository is gated.
124
+ #
125
+ # @return [Boolean] True if gated
126
+ def gated?
127
+ case gated
128
+ when true, "auto", "manual"
129
+ true
130
+ else
131
+ false
132
+ end
133
+ end
134
+
135
+ # Checks if the repository is disabled.
136
+ #
137
+ # @return [Boolean] True if disabled
138
+ def disabled?
139
+ disabled == true
140
+ end
141
+
142
+ # Returns a short description of the dataset.
143
+ #
144
+ # @return [String] Description string
145
+ def to_s
146
+ id
147
+ end
148
+
149
+ # Returns a detailed inspection string.
150
+ #
151
+ # @return [String] Inspection string
152
+ def inspect
153
+ "#<#{self.class.name} id=#{id.inspect} sha=#{sha&.[](0, 7).inspect} " \
154
+ "tags=#{tags.size} files=#{siblings&.size || 0}>"
155
+ end
156
+ end
157
+ end
158
+ end