durable_huggingface_hub 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.editorconfig +29 -0
- data/.rubocop.yml +108 -0
- data/CHANGELOG.md +127 -0
- data/README.md +547 -0
- data/Rakefile +106 -0
- data/devenv.lock +171 -0
- data/devenv.nix +15 -0
- data/devenv.yaml +8 -0
- data/huggingface_hub.gemspec +63 -0
- data/lib/durable_huggingface_hub/authentication.rb +245 -0
- data/lib/durable_huggingface_hub/cache.rb +508 -0
- data/lib/durable_huggingface_hub/configuration.rb +191 -0
- data/lib/durable_huggingface_hub/constants.rb +145 -0
- data/lib/durable_huggingface_hub/errors.rb +412 -0
- data/lib/durable_huggingface_hub/file_download.rb +831 -0
- data/lib/durable_huggingface_hub/hf_api.rb +1278 -0
- data/lib/durable_huggingface_hub/repo_card.rb +430 -0
- data/lib/durable_huggingface_hub/types/cache_info.rb +298 -0
- data/lib/durable_huggingface_hub/types/commit_info.rb +149 -0
- data/lib/durable_huggingface_hub/types/dataset_info.rb +158 -0
- data/lib/durable_huggingface_hub/types/model_info.rb +154 -0
- data/lib/durable_huggingface_hub/types/space_info.rb +158 -0
- data/lib/durable_huggingface_hub/types/user.rb +179 -0
- data/lib/durable_huggingface_hub/types.rb +205 -0
- data/lib/durable_huggingface_hub/utils/auth.rb +174 -0
- data/lib/durable_huggingface_hub/utils/headers.rb +220 -0
- data/lib/durable_huggingface_hub/utils/http.rb +329 -0
- data/lib/durable_huggingface_hub/utils/paths.rb +230 -0
- data/lib/durable_huggingface_hub/utils/progress.rb +217 -0
- data/lib/durable_huggingface_hub/utils/retry.rb +165 -0
- data/lib/durable_huggingface_hub/utils/validators.rb +236 -0
- data/lib/durable_huggingface_hub/version.rb +8 -0
- data/lib/huggingface_hub.rb +205 -0
- metadata +334 -0
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../types"
|
|
4
|
+
|
|
5
|
+
module DurableHuggingfaceHub
|
|
6
|
+
module Types
|
|
7
|
+
# Information about a cached file.
|
|
8
|
+
#
|
|
9
|
+
# Represents a single file in the cache with its metadata.
|
|
10
|
+
#
|
|
11
|
+
# @example
|
|
12
|
+
# cached_file = CachedFileInfo.new(
|
|
13
|
+
# file_path: Pathname.new("/cache/blobs/abc123"),
|
|
14
|
+
# size: 1024,
|
|
15
|
+
# etag: "abc123",
|
|
16
|
+
# commit_hash: "def456",
|
|
17
|
+
# last_accessed: Time.now,
|
|
18
|
+
# last_modified: Time.now
|
|
19
|
+
# )
|
|
20
|
+
class CachedFileInfo < DurableHuggingfaceHub::Struct
|
|
21
|
+
include Loadable
|
|
22
|
+
|
|
23
|
+
# @!attribute [r] file_path
|
|
24
|
+
# Path to the cached file
|
|
25
|
+
# @return [Pathname]
|
|
26
|
+
attribute :file_path, Types::PathnameType
|
|
27
|
+
|
|
28
|
+
# @!attribute [r] size
|
|
29
|
+
# Size of the file in bytes
|
|
30
|
+
# @return [Integer]
|
|
31
|
+
attribute :size, Types::Integer
|
|
32
|
+
|
|
33
|
+
# @!attribute [r] etag
|
|
34
|
+
# ETag of the file (used for cache validation)
|
|
35
|
+
# @return [String, nil]
|
|
36
|
+
attribute :etag, Types::OptionalString
|
|
37
|
+
|
|
38
|
+
# @!attribute [r] commit_hash
|
|
39
|
+
# Git commit hash this file belongs to
|
|
40
|
+
# @return [String, nil]
|
|
41
|
+
attribute :commit_hash, Types::OptionalString.default(nil)
|
|
42
|
+
|
|
43
|
+
# @!attribute [r] last_accessed
|
|
44
|
+
# When the file was last accessed
|
|
45
|
+
# @return [Time, nil]
|
|
46
|
+
attribute :last_accessed, Types::OptionalTimestamp.default(nil)
|
|
47
|
+
|
|
48
|
+
# @!attribute [r] last_modified
|
|
49
|
+
# When the file was last modified
|
|
50
|
+
# @return [Time, nil]
|
|
51
|
+
attribute :last_modified, Types::OptionalTimestamp.default(nil)
|
|
52
|
+
|
|
53
|
+
# Human-readable size string.
|
|
54
|
+
#
|
|
55
|
+
# @return [String] Size formatted as human-readable string (e.g., "1.2 MB")
|
|
56
|
+
def size_str
|
|
57
|
+
units = ["B", "KB", "MB", "GB", "TB"]
|
|
58
|
+
size = self.size.to_f
|
|
59
|
+
unit_index = 0
|
|
60
|
+
|
|
61
|
+
while size >= 1024 && unit_index < units.length - 1
|
|
62
|
+
size /= 1024.0
|
|
63
|
+
unit_index += 1
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
format("%.2f %s", size, units[unit_index])
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Information about a cached repository revision.
|
|
71
|
+
#
|
|
72
|
+
# Represents a specific revision (commit, branch, or tag) of a repository in the cache.
|
|
73
|
+
#
|
|
74
|
+
# @example
|
|
75
|
+
# revision = CachedRevisionInfo.new(
|
|
76
|
+
# commit_hash: "abc123",
|
|
77
|
+
# refs: ["main", "v1.0"],
|
|
78
|
+
# files: [cached_file_info1, cached_file_info2],
|
|
79
|
+
# size: 2048,
|
|
80
|
+
# last_modified: Time.now
|
|
81
|
+
# )
|
|
82
|
+
class CachedRevisionInfo < DurableHuggingfaceHub::Struct
|
|
83
|
+
include Loadable
|
|
84
|
+
|
|
85
|
+
# @!attribute [r] commit_hash
|
|
86
|
+
# Git commit hash for this revision
|
|
87
|
+
# @return [String]
|
|
88
|
+
attribute :commit_hash, Types::String
|
|
89
|
+
|
|
90
|
+
# @!attribute [r] refs
|
|
91
|
+
# List of refs (branches/tags) pointing to this commit
|
|
92
|
+
# @return [Array<String>]
|
|
93
|
+
attribute :refs, Types::StringArray
|
|
94
|
+
|
|
95
|
+
# @!attribute [r] files
|
|
96
|
+
# List of cached files in this revision
|
|
97
|
+
# @return [Array<CachedFileInfo>]
|
|
98
|
+
attribute :files, Types::Array.of(CachedFileInfo)
|
|
99
|
+
|
|
100
|
+
# @!attribute [r] size
|
|
101
|
+
# Total size of all files in this revision
|
|
102
|
+
# @return [Integer]
|
|
103
|
+
attribute :size, Types::Integer
|
|
104
|
+
|
|
105
|
+
# @!attribute [r] last_modified
|
|
106
|
+
# When this revision was last modified
|
|
107
|
+
# @return [Time, nil]
|
|
108
|
+
attribute :last_modified, Types::OptionalTimestamp.default(nil)
|
|
109
|
+
|
|
110
|
+
# Human-readable size string.
|
|
111
|
+
#
|
|
112
|
+
# @return [String] Size formatted as human-readable string
|
|
113
|
+
def size_str
|
|
114
|
+
units = ["B", "KB", "MB", "GB", "TB"]
|
|
115
|
+
size = self.size.to_f
|
|
116
|
+
unit_index = 0
|
|
117
|
+
|
|
118
|
+
while size >= 1024 && unit_index < units.length - 1
|
|
119
|
+
size /= 1024.0
|
|
120
|
+
unit_index += 1
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
format("%.2f %s", size, units[unit_index])
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Number of files in this revision.
|
|
127
|
+
#
|
|
128
|
+
# @return [Integer] File count
|
|
129
|
+
def file_count
|
|
130
|
+
files.length
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Information about a cached repository.
|
|
135
|
+
#
|
|
136
|
+
# Represents a repository in the cache with all its revisions and files.
|
|
137
|
+
#
|
|
138
|
+
# @example
|
|
139
|
+
# repo = CachedRepoInfo.new(
|
|
140
|
+
# repo_id: "bert-base-uncased",
|
|
141
|
+
# repo_type: "model",
|
|
142
|
+
# revisions: [revision_info1, revision_info2],
|
|
143
|
+
# size: 1048576,
|
|
144
|
+
# last_accessed: Time.now,
|
|
145
|
+
# last_modified: Time.now
|
|
146
|
+
# )
|
|
147
|
+
class CachedRepoInfo < DurableHuggingfaceHub::Struct
|
|
148
|
+
include Loadable
|
|
149
|
+
|
|
150
|
+
# @!attribute [r] repo_id
|
|
151
|
+
# Repository identifier
|
|
152
|
+
# @return [String]
|
|
153
|
+
attribute :repo_id, Types::String
|
|
154
|
+
|
|
155
|
+
# @!attribute [r] repo_type
|
|
156
|
+
# Type of repository ("model", "dataset", or "space")
|
|
157
|
+
# @return [String]
|
|
158
|
+
attribute :repo_type, Types::String
|
|
159
|
+
|
|
160
|
+
# @!attribute [r] revisions
|
|
161
|
+
# List of cached revisions for this repository
|
|
162
|
+
# @return [Array<CachedRevisionInfo>]
|
|
163
|
+
attribute :revisions, Types::Array.of(CachedRevisionInfo)
|
|
164
|
+
|
|
165
|
+
# @!attribute [r] size
|
|
166
|
+
# Total size of all revisions in this repository
|
|
167
|
+
# @return [Integer]
|
|
168
|
+
attribute :size, Types::Integer
|
|
169
|
+
|
|
170
|
+
# @!attribute [r] last_accessed
|
|
171
|
+
# When the repository was last accessed
|
|
172
|
+
# @return [Time, nil]
|
|
173
|
+
attribute :last_accessed, Types::OptionalTimestamp.default(nil)
|
|
174
|
+
|
|
175
|
+
# @!attribute [r] last_modified
|
|
176
|
+
# When the repository was last modified
|
|
177
|
+
# @return [Time, nil]
|
|
178
|
+
attribute :last_modified, Types::OptionalTimestamp.default(nil)
|
|
179
|
+
|
|
180
|
+
# Human-readable size string.
|
|
181
|
+
#
|
|
182
|
+
# @return [String] Size formatted as human-readable string
|
|
183
|
+
def size_str
|
|
184
|
+
units = ["B", "KB", "MB", "GB", "TB"]
|
|
185
|
+
size = self.size.to_f
|
|
186
|
+
unit_index = 0
|
|
187
|
+
|
|
188
|
+
while size >= 1024 && unit_index < units.length - 1
|
|
189
|
+
size /= 1024.0
|
|
190
|
+
unit_index += 1
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
format("%.2f %s", size, units[unit_index])
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
# Number of revisions cached for this repository.
|
|
197
|
+
#
|
|
198
|
+
# @return [Integer] Revision count
|
|
199
|
+
def revision_count
|
|
200
|
+
revisions.length
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# Total number of files across all revisions.
|
|
204
|
+
#
|
|
205
|
+
# @return [Integer] Total file count
|
|
206
|
+
def file_count
|
|
207
|
+
revisions.sum(&:file_count)
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# Comprehensive cache information.
|
|
212
|
+
#
|
|
213
|
+
# Contains information about the entire cache directory including all repositories.
|
|
214
|
+
#
|
|
215
|
+
# @example
|
|
216
|
+
# cache_info = HFCacheInfo.new(
|
|
217
|
+
# cache_dir: Pathname.new("/cache"),
|
|
218
|
+
# repos: [repo_info1, repo_info2],
|
|
219
|
+
# size: 2097152
|
|
220
|
+
# )
|
|
221
|
+
class HFCacheInfo < DurableHuggingfaceHub::Struct
|
|
222
|
+
include Loadable
|
|
223
|
+
|
|
224
|
+
# @!attribute [r] cache_dir
|
|
225
|
+
# Path to the cache directory
|
|
226
|
+
# @return [Pathname]
|
|
227
|
+
attribute :cache_dir, Types::PathnameType
|
|
228
|
+
|
|
229
|
+
# @!attribute [r] repos
|
|
230
|
+
# List of cached repositories
|
|
231
|
+
# @return [Array<CachedRepoInfo>]
|
|
232
|
+
attribute :repos, Types::Array.of(CachedRepoInfo)
|
|
233
|
+
|
|
234
|
+
# @!attribute [r] size
|
|
235
|
+
# Total size of the cache in bytes
|
|
236
|
+
# @return [Integer]
|
|
237
|
+
attribute :size, Types::Integer
|
|
238
|
+
|
|
239
|
+
# Human-readable size string.
|
|
240
|
+
#
|
|
241
|
+
# @return [String] Size formatted as human-readable string
|
|
242
|
+
def size_str
|
|
243
|
+
units = ["B", "KB", "MB", "GB", "TB"]
|
|
244
|
+
size = self.size.to_f
|
|
245
|
+
unit_index = 0
|
|
246
|
+
|
|
247
|
+
while size >= 1024 && unit_index < units.length - 1
|
|
248
|
+
size /= 1024.0
|
|
249
|
+
unit_index += 1
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
format("%.2f %s", size, units[unit_index])
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
# Number of repositories in the cache.
|
|
256
|
+
#
|
|
257
|
+
# @return [Integer] Repository count
|
|
258
|
+
def repo_count
|
|
259
|
+
repos.length
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
# Total number of revisions across all repositories.
|
|
263
|
+
#
|
|
264
|
+
# @return [Integer] Total revision count
|
|
265
|
+
def revision_count
|
|
266
|
+
repos.sum(&:revision_count)
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
# Total number of files across all repositories and revisions.
|
|
270
|
+
#
|
|
271
|
+
# @return [Integer] Total file count
|
|
272
|
+
def file_count
|
|
273
|
+
repos.sum(&:file_count)
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
# Get repositories sorted by size (largest first).
|
|
277
|
+
#
|
|
278
|
+
# @return [Array<CachedRepoInfo>] Repositories sorted by size
|
|
279
|
+
def repos_by_size
|
|
280
|
+
repos.sort_by { |repo| -repo.size }
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
# Get repositories sorted by last accessed time (most recent first).
|
|
284
|
+
#
|
|
285
|
+
# @return [Array<CachedRepoInfo>] Repositories sorted by access time
|
|
286
|
+
def repos_by_last_accessed
|
|
287
|
+
repos.compact.sort_by { |repo| repo.last_accessed || Time.at(0) }.reverse
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
# Get repositories sorted by last modified time (most recent first).
|
|
291
|
+
#
|
|
292
|
+
# @return [Array<CachedRepoInfo>] Repositories sorted by modification time
|
|
293
|
+
def repos_by_last_modified
|
|
294
|
+
repos.compact.sort_by { |repo| repo.last_modified || Time.at(0) }.reverse
|
|
295
|
+
end
|
|
296
|
+
end
|
|
297
|
+
end
|
|
298
|
+
end
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../types"
|
|
4
|
+
|
|
5
|
+
module DurableHuggingfaceHub
|
|
6
|
+
module Types
|
|
7
|
+
# Information about a Git commit in a HuggingFace Hub repository.
|
|
8
|
+
#
|
|
9
|
+
# @example Creating a CommitInfo from API response
|
|
10
|
+
# commit_info = CommitInfo.from_hash({
|
|
11
|
+
# "oid" => "a1b2c3d4e5f6...",
|
|
12
|
+
# "title" => "Update model weights",
|
|
13
|
+
# "message" => "Update model weights\n\nImproved accuracy",
|
|
14
|
+
# "date" => "2024-01-15T10:30:00Z"
|
|
15
|
+
# })
|
|
16
|
+
#
|
|
17
|
+
# @example Accessing commit information
|
|
18
|
+
# commit_info.oid # => "a1b2c3d4e5f6..."
|
|
19
|
+
# commit_info.title # => "Update model weights"
|
|
20
|
+
class CommitInfo < DurableHuggingfaceHub::Struct
|
|
21
|
+
include Loadable
|
|
22
|
+
|
|
23
|
+
# @!attribute [r] oid
|
|
24
|
+
# @return [String] Commit OID (Git SHA)
|
|
25
|
+
attribute :oid, Types::String
|
|
26
|
+
|
|
27
|
+
# @!attribute [r] title
|
|
28
|
+
# @return [String] Commit title (first line of message)
|
|
29
|
+
attribute :title, Types::String
|
|
30
|
+
|
|
31
|
+
# @!attribute [r] message
|
|
32
|
+
# @return [String, nil] Full commit message
|
|
33
|
+
attribute :message, Types::OptionalString.default(nil)
|
|
34
|
+
|
|
35
|
+
# @!attribute [r] date
|
|
36
|
+
# @return [Time, nil] Commit timestamp
|
|
37
|
+
attribute :date, Types::OptionalTimestamp.default(nil)
|
|
38
|
+
|
|
39
|
+
# @!attribute [r] authors
|
|
40
|
+
# @return [Array<String>, nil] Commit authors
|
|
41
|
+
attribute :authors, Types::OptionalStringArray.default(nil)
|
|
42
|
+
|
|
43
|
+
# @!attribute [r] commit_url
|
|
44
|
+
# @return [String, nil] URL to view the commit
|
|
45
|
+
attribute :commit_url, Types::OptionalString.default(nil)
|
|
46
|
+
|
|
47
|
+
# @!attribute [r] commit_message
|
|
48
|
+
# @return [String, nil] Alias for message (API compatibility)
|
|
49
|
+
attribute :commit_message, Types::OptionalString.default(nil)
|
|
50
|
+
|
|
51
|
+
# Returns the short OID (first 7 characters).
|
|
52
|
+
#
|
|
53
|
+
# @return [String] Short OID
|
|
54
|
+
def short_oid
|
|
55
|
+
oid[0, 7]
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Returns the commit message (preferring message over commit_message).
|
|
59
|
+
#
|
|
60
|
+
# @return [String, nil] Commit message
|
|
61
|
+
def full_message
|
|
62
|
+
message || commit_message
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Returns a short description of the commit.
|
|
66
|
+
#
|
|
67
|
+
# @return [String] Description string
|
|
68
|
+
def to_s
|
|
69
|
+
"#{short_oid}: #{title}"
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Returns a detailed inspection string.
|
|
73
|
+
#
|
|
74
|
+
# @return [String] Inspection string
|
|
75
|
+
def inspect
|
|
76
|
+
"#<#{self.class.name} oid=#{short_oid.inspect} title=#{title[0, 50].inspect}>"
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Information about a Git reference (branch or tag) in a HuggingFace Hub repository.
|
|
81
|
+
#
|
|
82
|
+
# @example Creating a GitRefInfo from API response
|
|
83
|
+
# ref_info = GitRefInfo.from_hash({
|
|
84
|
+
# "name" => "main",
|
|
85
|
+
# "ref" => "refs/heads/main",
|
|
86
|
+
# "targetCommit" => "a1b2c3d4e5f6..."
|
|
87
|
+
# })
|
|
88
|
+
#
|
|
89
|
+
# @example Accessing ref information
|
|
90
|
+
# ref_info.name # => "main"
|
|
91
|
+
# ref_info.target_commit # => "a1b2c3d4e5f6..."
|
|
92
|
+
class GitRefInfo < DurableHuggingfaceHub::Struct
|
|
93
|
+
include Loadable
|
|
94
|
+
|
|
95
|
+
# @!attribute [r] name
|
|
96
|
+
# @return [String] Reference name (e.g., "main", "v1.0.0")
|
|
97
|
+
attribute :name, Types::String
|
|
98
|
+
|
|
99
|
+
# @!attribute [r] ref
|
|
100
|
+
# @return [String] Full reference path (e.g., "refs/heads/main")
|
|
101
|
+
attribute :ref, Types::String
|
|
102
|
+
|
|
103
|
+
# @!attribute [r] target_commit
|
|
104
|
+
# @return [String, nil] Target commit OID
|
|
105
|
+
attribute :target_commit, Types::OptionalString.default(nil)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
# Checks if this is a branch reference.
|
|
110
|
+
#
|
|
111
|
+
# @return [Boolean] True if branch
|
|
112
|
+
def branch?
|
|
113
|
+
ref.start_with?("refs/heads/")
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Checks if this is a tag reference.
|
|
117
|
+
#
|
|
118
|
+
# @return [Boolean] True if tag
|
|
119
|
+
def tag?
|
|
120
|
+
ref.start_with?("refs/tags/")
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Returns the reference type.
|
|
124
|
+
#
|
|
125
|
+
# @return [String] "branch", "tag", or "unknown"
|
|
126
|
+
def ref_type
|
|
127
|
+
return "branch" if branch?
|
|
128
|
+
return "tag" if tag?
|
|
129
|
+
|
|
130
|
+
"unknown"
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Returns a short description of the ref.
|
|
134
|
+
#
|
|
135
|
+
# @return [String] Description string
|
|
136
|
+
def to_s
|
|
137
|
+
"#{ref_type}: #{name}"
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Returns a detailed inspection string.
|
|
141
|
+
#
|
|
142
|
+
# @return [String] Inspection string
|
|
143
|
+
def inspect
|
|
144
|
+
"#<#{self.class.name} name=#{name.inspect} type=#{ref_type} " \
|
|
145
|
+
"commit=#{target_commit&.[](0, 7).inspect}>"
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../types"
|
|
4
|
+
|
|
5
|
+
module DurableHuggingfaceHub
|
|
6
|
+
module Types
|
|
7
|
+
# Information about a dataset repository on HuggingFace Hub.
|
|
8
|
+
#
|
|
9
|
+
# This structure represents metadata about a dataset, including its ID,
|
|
10
|
+
# tags, files, statistics, and configuration.
|
|
11
|
+
#
|
|
12
|
+
# @example Creating a DatasetInfo from API response
|
|
13
|
+
# dataset_info = DatasetInfo.from_hash({
|
|
14
|
+
# "id" => "squad",
|
|
15
|
+
# "sha" => "a1b2c3d4...",
|
|
16
|
+
# "tags" => ["question-answering", "en"],
|
|
17
|
+
# "downloads" => 500000
|
|
18
|
+
# })
|
|
19
|
+
#
|
|
20
|
+
# @example Accessing dataset information
|
|
21
|
+
# dataset_info.id # => "squad"
|
|
22
|
+
# dataset_info.tags # => ["question-answering", "en"]
|
|
23
|
+
# dataset_info.downloads # => 500000
|
|
24
|
+
class DatasetInfo < DurableHuggingfaceHub::Struct
|
|
25
|
+
include Loadable
|
|
26
|
+
|
|
27
|
+
# @!attribute [r] id
|
|
28
|
+
# @return [String] Dataset repository ID
|
|
29
|
+
attribute :id, Types::RepoId
|
|
30
|
+
|
|
31
|
+
# @!attribute [r] sha
|
|
32
|
+
# @return [String, nil] Git commit SHA of the current revision
|
|
33
|
+
attribute :sha, Types::OptionalString.default(nil)
|
|
34
|
+
|
|
35
|
+
# @!attribute [r] last_modified
|
|
36
|
+
# @return [Time, nil] Timestamp of last modification
|
|
37
|
+
attribute :last_modified, Types::OptionalTimestamp.default(nil)
|
|
38
|
+
|
|
39
|
+
# @!attribute [r] tags
|
|
40
|
+
# @return [Array<String>] Tags associated with the dataset
|
|
41
|
+
attribute :tags, Types::StringArray.default([].freeze)
|
|
42
|
+
|
|
43
|
+
# @!attribute [r] siblings
|
|
44
|
+
# @return [Array<Hash>, nil] List of files in the repository
|
|
45
|
+
attribute :siblings, Types::OptionalFileSiblings.default(nil)
|
|
46
|
+
|
|
47
|
+
# @!attribute [r] private
|
|
48
|
+
# @return [Boolean, nil] Whether the repository is private
|
|
49
|
+
attribute :private, Types::OptionalBool.default(nil)
|
|
50
|
+
|
|
51
|
+
# @!attribute [r] gated
|
|
52
|
+
# @return [Boolean, String, nil] Gated access status
|
|
53
|
+
attribute :gated, Types::OptionalGated.default(nil)
|
|
54
|
+
|
|
55
|
+
# @!attribute [r] disabled
|
|
56
|
+
# @return [Boolean, nil] Whether the repository is disabled
|
|
57
|
+
attribute :disabled, Types::OptionalBool.default(nil)
|
|
58
|
+
|
|
59
|
+
# @!attribute [r] downloads
|
|
60
|
+
# @return [Integer, nil] Total number of downloads
|
|
61
|
+
attribute :downloads, Types::OptionalInteger.default(nil)
|
|
62
|
+
|
|
63
|
+
# @!attribute [r] likes
|
|
64
|
+
# @return [Integer, nil] Number of likes/stars
|
|
65
|
+
attribute :likes, Types::OptionalInteger.default(nil)
|
|
66
|
+
|
|
67
|
+
# @!attribute [r] author
|
|
68
|
+
# @return [String, nil] Author/organization name
|
|
69
|
+
attribute :author, Types::OptionalString.default(nil)
|
|
70
|
+
|
|
71
|
+
# @!attribute [r] created_at
|
|
72
|
+
# @return [Time, nil] Repository creation timestamp
|
|
73
|
+
attribute :created_at, Types::OptionalTimestamp.default(nil)
|
|
74
|
+
|
|
75
|
+
# @!attribute [r] card_data
|
|
76
|
+
# @return [Hash, nil] Dataset card metadata
|
|
77
|
+
attribute :card_data, Types::OptionalHash.default(nil)
|
|
78
|
+
|
|
79
|
+
# @!attribute [r] description
|
|
80
|
+
# @return [String, nil] Dataset description
|
|
81
|
+
attribute :description, Types::OptionalString.default(nil)
|
|
82
|
+
|
|
83
|
+
# @!attribute [r] citation
|
|
84
|
+
# @return [String, nil] Citation information
|
|
85
|
+
attribute :citation, Types::OptionalString.default(nil)
|
|
86
|
+
|
|
87
|
+
# @!attribute [r] downloads_all_time
|
|
88
|
+
# @return [Integer, nil] Total number of downloads all time
|
|
89
|
+
attribute :downloads_all_time, Types::OptionalInteger.default(nil)
|
|
90
|
+
|
|
91
|
+
# @!attribute [r] paperswithcode_id
|
|
92
|
+
# @return [String, nil] PapersWithCode identifier
|
|
93
|
+
attribute :paperswithcode_id, Types::OptionalString.default(nil)
|
|
94
|
+
|
|
95
|
+
# @!attribute [r] trending_score
|
|
96
|
+
# @return [Integer, nil] Trending score
|
|
97
|
+
attribute :trending_score, Types::OptionalInteger.default(nil)
|
|
98
|
+
|
|
99
|
+
# Returns the list of file names in the repository.
|
|
100
|
+
#
|
|
101
|
+
# @return [Array<String>] File names
|
|
102
|
+
def file_names
|
|
103
|
+
return [] if siblings.nil?
|
|
104
|
+
|
|
105
|
+
siblings.map { |s| s[:rfilename] || s["rfilename"] }.compact
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Checks if the dataset has a specific tag.
|
|
109
|
+
#
|
|
110
|
+
# @param tag [String] Tag to check for
|
|
111
|
+
# @return [Boolean] True if the tag is present
|
|
112
|
+
def has_tag?(tag)
|
|
113
|
+
tags.include?(tag)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Checks if the repository is public.
|
|
117
|
+
#
|
|
118
|
+
# @return [Boolean] True if public
|
|
119
|
+
def public?
|
|
120
|
+
!private
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Checks if the repository is gated.
|
|
124
|
+
#
|
|
125
|
+
# @return [Boolean] True if gated
|
|
126
|
+
def gated?
|
|
127
|
+
case gated
|
|
128
|
+
when true, "auto", "manual"
|
|
129
|
+
true
|
|
130
|
+
else
|
|
131
|
+
false
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Checks if the repository is disabled.
|
|
136
|
+
#
|
|
137
|
+
# @return [Boolean] True if disabled
|
|
138
|
+
def disabled?
|
|
139
|
+
disabled == true
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Returns a short description of the dataset.
|
|
143
|
+
#
|
|
144
|
+
# @return [String] Description string
|
|
145
|
+
def to_s
|
|
146
|
+
id
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Returns a detailed inspection string.
|
|
150
|
+
#
|
|
151
|
+
# @return [String] Inspection string
|
|
152
|
+
def inspect
|
|
153
|
+
"#<#{self.class.name} id=#{id.inspect} sha=#{sha&.[](0, 7).inspect} " \
|
|
154
|
+
"tags=#{tags.size} files=#{siblings&.size || 0}>"
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
end
|