durable_huggingface_hub 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.editorconfig +29 -0
- data/.rubocop.yml +108 -0
- data/CHANGELOG.md +127 -0
- data/README.md +547 -0
- data/Rakefile +106 -0
- data/devenv.lock +171 -0
- data/devenv.nix +15 -0
- data/devenv.yaml +8 -0
- data/huggingface_hub.gemspec +63 -0
- data/lib/durable_huggingface_hub/authentication.rb +245 -0
- data/lib/durable_huggingface_hub/cache.rb +508 -0
- data/lib/durable_huggingface_hub/configuration.rb +191 -0
- data/lib/durable_huggingface_hub/constants.rb +145 -0
- data/lib/durable_huggingface_hub/errors.rb +412 -0
- data/lib/durable_huggingface_hub/file_download.rb +831 -0
- data/lib/durable_huggingface_hub/hf_api.rb +1278 -0
- data/lib/durable_huggingface_hub/repo_card.rb +430 -0
- data/lib/durable_huggingface_hub/types/cache_info.rb +298 -0
- data/lib/durable_huggingface_hub/types/commit_info.rb +149 -0
- data/lib/durable_huggingface_hub/types/dataset_info.rb +158 -0
- data/lib/durable_huggingface_hub/types/model_info.rb +154 -0
- data/lib/durable_huggingface_hub/types/space_info.rb +158 -0
- data/lib/durable_huggingface_hub/types/user.rb +179 -0
- data/lib/durable_huggingface_hub/types.rb +205 -0
- data/lib/durable_huggingface_hub/utils/auth.rb +174 -0
- data/lib/durable_huggingface_hub/utils/headers.rb +220 -0
- data/lib/durable_huggingface_hub/utils/http.rb +329 -0
- data/lib/durable_huggingface_hub/utils/paths.rb +230 -0
- data/lib/durable_huggingface_hub/utils/progress.rb +217 -0
- data/lib/durable_huggingface_hub/utils/retry.rb +165 -0
- data/lib/durable_huggingface_hub/utils/validators.rb +236 -0
- data/lib/durable_huggingface_hub/version.rb +8 -0
- data/lib/huggingface_hub.rb +205 -0
- metadata +334 -0
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DurableHuggingfaceHub
|
|
4
|
+
module Utils
|
|
5
|
+
# Progress tracking for long-running operations.
|
|
6
|
+
#
|
|
7
|
+
# This module provides a simple progress tracking mechanism for operations
|
|
8
|
+
# like file downloads. It supports custom callbacks for progress updates.
|
|
9
|
+
#
|
|
10
|
+
# @example Basic progress tracking
|
|
11
|
+
# progress = Progress.new(total: 1000)
|
|
12
|
+
# progress.update(100) # 10% complete
|
|
13
|
+
# progress.update(500) # 50% complete
|
|
14
|
+
# progress.finish
|
|
15
|
+
#
|
|
16
|
+
# @example With callback
|
|
17
|
+
# progress = Progress.new(total: 1000) do |current, total, percentage|
|
|
18
|
+
# puts "Progress: #{percentage.round(1)}% (#{current}/#{total})"
|
|
19
|
+
# end
|
|
20
|
+
# progress.update(500) # Calls callback
|
|
21
|
+
class Progress
|
|
22
|
+
# @return [Integer, nil] Total size/count expected
|
|
23
|
+
attr_reader :total
|
|
24
|
+
|
|
25
|
+
# @return [Integer] Current progress
|
|
26
|
+
attr_reader :current
|
|
27
|
+
|
|
28
|
+
# @return [Time] Start time
|
|
29
|
+
attr_reader :start_time
|
|
30
|
+
|
|
31
|
+
# Creates a new Progress tracker.
|
|
32
|
+
#
|
|
33
|
+
# @param total [Integer, nil] Total size/count expected
|
|
34
|
+
# @param callback [Proc, nil] Callback to invoke on updates
|
|
35
|
+
# @yield [current, total, percentage] Optional block called on updates
|
|
36
|
+
# @yieldparam current [Integer] Current progress
|
|
37
|
+
# @yieldparam total [Integer, nil] Total expected
|
|
38
|
+
# @yieldparam percentage [Float] Percentage complete (0-100)
|
|
39
|
+
# @raise [ValidationError] If total is provided but not positive
|
|
40
|
+
def initialize(total: nil, callback: nil, &block)
|
|
41
|
+
validate_total(total)
|
|
42
|
+
@total = total
|
|
43
|
+
@current = 0
|
|
44
|
+
@start_time = Time.now
|
|
45
|
+
@callback = callback || block
|
|
46
|
+
@finished = false
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Updates the progress.
|
|
50
|
+
#
|
|
51
|
+
# @param amount [Integer] Amount to add to current progress
|
|
52
|
+
# @return [void]
|
|
53
|
+
# @raise [ValidationError] If amount is negative
|
|
54
|
+
def update(amount)
|
|
55
|
+
return if @finished
|
|
56
|
+
|
|
57
|
+
validate_amount(amount)
|
|
58
|
+
@current += amount
|
|
59
|
+
notify_callback
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Sets the current progress to a specific value.
|
|
63
|
+
#
|
|
64
|
+
# @param value [Integer] New current value
|
|
65
|
+
# @return [void]
|
|
66
|
+
# @raise [ValidationError] If value is negative
|
|
67
|
+
def set(value)
|
|
68
|
+
return if @finished
|
|
69
|
+
|
|
70
|
+
validate_value(value)
|
|
71
|
+
@current = value
|
|
72
|
+
notify_callback
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Marks the progress as finished.
|
|
76
|
+
#
|
|
77
|
+
# @return [void]
|
|
78
|
+
def finish
|
|
79
|
+
return if @finished
|
|
80
|
+
|
|
81
|
+
@finished = true
|
|
82
|
+
@current = @total if @total
|
|
83
|
+
notify_callback
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Checks if progress is finished.
|
|
87
|
+
#
|
|
88
|
+
# @return [Boolean] True if finished
|
|
89
|
+
def finished?
|
|
90
|
+
@finished
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Calculates the percentage complete.
|
|
94
|
+
#
|
|
95
|
+
# @return [Float, nil] Percentage (0-100) or nil if total unknown
|
|
96
|
+
def percentage
|
|
97
|
+
return nil unless @total&.positive?
|
|
98
|
+
|
|
99
|
+
(@current.to_f / @total * 100).round(2)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Calculates elapsed time.
|
|
103
|
+
#
|
|
104
|
+
# @return [Float] Elapsed seconds
|
|
105
|
+
def elapsed
|
|
106
|
+
Time.now - @start_time
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Estimates time remaining.
|
|
110
|
+
#
|
|
111
|
+
# @return [Float, nil] Estimated seconds remaining or nil if unknown
|
|
112
|
+
def eta
|
|
113
|
+
return nil unless @total&.positive? && @current.positive?
|
|
114
|
+
|
|
115
|
+
elapsed_time = elapsed
|
|
116
|
+
return nil if elapsed_time <= 0
|
|
117
|
+
|
|
118
|
+
rate = @current.to_f / elapsed_time
|
|
119
|
+
remaining = @total - @current
|
|
120
|
+
remaining / rate
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Resets the progress tracker to its initial state.
|
|
124
|
+
#
|
|
125
|
+
# @return [void]
|
|
126
|
+
def reset
|
|
127
|
+
@current = 0
|
|
128
|
+
@start_time = Time.now
|
|
129
|
+
@finished = false
|
|
130
|
+
notify_callback
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Returns a string representation of the progress.
|
|
134
|
+
#
|
|
135
|
+
# @return [String] String representation
|
|
136
|
+
def to_s
|
|
137
|
+
if @total
|
|
138
|
+
"#{@current}/#{@total} (#{percentage&.round(1)}%)"
|
|
139
|
+
else
|
|
140
|
+
"#{@current} completed"
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
private
|
|
145
|
+
|
|
146
|
+
# Validates the total parameter.
|
|
147
|
+
#
|
|
148
|
+
# @param total [Integer, nil] Total value to validate
|
|
149
|
+
# @raise [ValidationError] If total is provided but not positive
|
|
150
|
+
def validate_total(total)
|
|
151
|
+
return if total.nil?
|
|
152
|
+
|
|
153
|
+
unless total.is_a?(Integer) && total.positive?
|
|
154
|
+
raise ValidationError.new("total", "Total must be a positive integer, got #{total.inspect}")
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Validates the amount parameter.
|
|
159
|
+
#
|
|
160
|
+
# @param amount [Integer] Amount to validate
|
|
161
|
+
# @raise [ValidationError] If amount is negative
|
|
162
|
+
def validate_amount(amount)
|
|
163
|
+
unless amount.is_a?(Integer) && amount >= 0
|
|
164
|
+
raise ValidationError.new("amount", "Amount must be a non-negative integer, got #{amount.inspect}")
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Validates the value parameter.
|
|
169
|
+
#
|
|
170
|
+
# @param value [Integer] Value to validate
|
|
171
|
+
# @raise [ValidationError] If value is negative
|
|
172
|
+
def validate_value(value)
|
|
173
|
+
unless value.is_a?(Integer) && value >= 0
|
|
174
|
+
raise ValidationError.new("value", "Value must be a non-negative integer, got #{value.inspect}")
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Notifies the callback of progress update.
|
|
179
|
+
def notify_callback
|
|
180
|
+
return unless @callback
|
|
181
|
+
|
|
182
|
+
@callback.call(@current, @total, percentage)
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
# No-op progress tracker for when progress tracking is disabled.
|
|
187
|
+
class NullProgress
|
|
188
|
+
def update(_amount); end
|
|
189
|
+
|
|
190
|
+
def set(_value); end
|
|
191
|
+
|
|
192
|
+
def finish; end
|
|
193
|
+
|
|
194
|
+
def finished?
|
|
195
|
+
false
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def percentage
|
|
199
|
+
nil
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
def elapsed
|
|
203
|
+
0
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def eta
|
|
207
|
+
nil
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def reset; end
|
|
211
|
+
|
|
212
|
+
def to_s
|
|
213
|
+
"NullProgress"
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
end
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "faraday"
|
|
4
|
+
|
|
5
|
+
module DurableHuggingfaceHub
|
|
6
|
+
module Utils
|
|
7
|
+
# Retry logic with exponential backoff for HTTP requests.
|
|
8
|
+
#
|
|
9
|
+
# This module provides retry functionality for handling transient failures
|
|
10
|
+
# in HTTP requests, with configurable retry attempts and exponential backoff.
|
|
11
|
+
module Retry
|
|
12
|
+
# Default maximum number of retry attempts
|
|
13
|
+
DEFAULT_MAX_RETRIES = 3
|
|
14
|
+
|
|
15
|
+
# Default initial delay in seconds
|
|
16
|
+
DEFAULT_INITIAL_DELAY = 1
|
|
17
|
+
|
|
18
|
+
# Maximum delay between retries (in seconds)
|
|
19
|
+
MAX_DELAY = 60
|
|
20
|
+
|
|
21
|
+
# Multiplier for exponential backoff
|
|
22
|
+
BACKOFF_MULTIPLIER = 2
|
|
23
|
+
|
|
24
|
+
# HTTP status codes that should trigger a retry
|
|
25
|
+
RETRYABLE_STATUS_CODES = [
|
|
26
|
+
408, # Request Timeout
|
|
27
|
+
429, # Too Many Requests
|
|
28
|
+
500, # Internal Server Error
|
|
29
|
+
502, # Bad Gateway
|
|
30
|
+
503, # Service Unavailable
|
|
31
|
+
504 # Gateway Timeout
|
|
32
|
+
].freeze
|
|
33
|
+
|
|
34
|
+
# Errors that should trigger a retry
|
|
35
|
+
RETRYABLE_ERRORS = [
|
|
36
|
+
Faraday::TimeoutError,
|
|
37
|
+
Faraday::ConnectionFailed,
|
|
38
|
+
Faraday::SSLError
|
|
39
|
+
].freeze
|
|
40
|
+
|
|
41
|
+
# Executes a block with retry logic.
|
|
42
|
+
#
|
|
43
|
+
# @param max_retries [Integer] Maximum number of retry attempts (must be >= 0)
|
|
44
|
+
# @param initial_delay [Float] Initial delay in seconds (must be > 0)
|
|
45
|
+
# @param logger [Logger, nil] Logger for retry messages
|
|
46
|
+
# @yield Block to execute with retry
|
|
47
|
+
# @yieldreturn Result of the block
|
|
48
|
+
# @return Result of the block if successful
|
|
49
|
+
# @raise [ArgumentError] If parameters are invalid
|
|
50
|
+
# @raise Last exception if all retries exhausted
|
|
51
|
+
#
|
|
52
|
+
# @example Basic usage
|
|
53
|
+
# result = Retry.with_retry do
|
|
54
|
+
# perform_http_request
|
|
55
|
+
# end
|
|
56
|
+
#
|
|
57
|
+
# @example Custom retry configuration
|
|
58
|
+
# result = Retry.with_retry(max_retries: 5, initial_delay: 2) do
|
|
59
|
+
# risky_operation
|
|
60
|
+
# end
|
|
61
|
+
def self.with_retry(max_retries: DEFAULT_MAX_RETRIES, initial_delay: DEFAULT_INITIAL_DELAY, logger: nil)
|
|
62
|
+
# Validate parameters
|
|
63
|
+
validate_max_retries(max_retries)
|
|
64
|
+
validate_initial_delay(initial_delay)
|
|
65
|
+
|
|
66
|
+
attempt = 0
|
|
67
|
+
last_error = nil
|
|
68
|
+
|
|
69
|
+
loop do
|
|
70
|
+
begin
|
|
71
|
+
return yield
|
|
72
|
+
rescue => e
|
|
73
|
+
attempt += 1
|
|
74
|
+
last_error = e
|
|
75
|
+
|
|
76
|
+
# Check if error is retryable
|
|
77
|
+
unless retryable_error?(e)
|
|
78
|
+
raise e
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Check if we've exhausted retries
|
|
82
|
+
if attempt > max_retries
|
|
83
|
+
logger&.error("Max retries (#{max_retries}) exhausted for #{e.class}: #{e.message}")
|
|
84
|
+
raise e
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Calculate delay with exponential backoff
|
|
88
|
+
delay = calculate_delay(attempt, initial_delay)
|
|
89
|
+
|
|
90
|
+
# Log retry attempt
|
|
91
|
+
logger&.warn("Retry attempt #{attempt}/#{max_retries} after #{delay}s due to #{e.class}: #{e.message}")
|
|
92
|
+
|
|
93
|
+
# Wait before retrying
|
|
94
|
+
sleep(delay)
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Checks if an error should trigger a retry.
|
|
100
|
+
#
|
|
101
|
+
# @param error [Exception] The error to check
|
|
102
|
+
# @return [Boolean] True if error is retryable
|
|
103
|
+
def self.retryable_error?(error)
|
|
104
|
+
# Check if it's a known retryable error class
|
|
105
|
+
return true if RETRYABLE_ERRORS.any? { |klass| error.is_a?(klass) }
|
|
106
|
+
|
|
107
|
+
# Check if it's an HTTP error with retryable status
|
|
108
|
+
if error.is_a?(HfHubHTTPError) && error.status_code
|
|
109
|
+
return RETRYABLE_STATUS_CODES.include?(error.status_code)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Check Faraday response errors
|
|
113
|
+
if error.respond_to?(:response) && error.response
|
|
114
|
+
status = error.response[:status]
|
|
115
|
+
return RETRYABLE_STATUS_CODES.include?(status) if status
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
false
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Calculates the delay for a retry attempt using exponential backoff.
|
|
122
|
+
#
|
|
123
|
+
# @param attempt [Integer] Current attempt number (1-based)
|
|
124
|
+
# @param initial_delay [Float] Initial delay in seconds
|
|
125
|
+
# @return [Float] Delay in seconds (capped at MAX_DELAY)
|
|
126
|
+
#
|
|
127
|
+
# @example
|
|
128
|
+
# Retry.calculate_delay(1, 1.0) # => 1.0
|
|
129
|
+
# Retry.calculate_delay(2, 1.0) # => 2.0
|
|
130
|
+
# Retry.calculate_delay(3, 1.0) # => 4.0
|
|
131
|
+
# Retry.calculate_delay(4, 1.0) # => 8.0
|
|
132
|
+
def self.calculate_delay(attempt, initial_delay)
|
|
133
|
+
# Exponential backoff: initial_delay * (2 ^ (attempt - 1))
|
|
134
|
+
delay = initial_delay * (BACKOFF_MULTIPLIER**(attempt - 1))
|
|
135
|
+
|
|
136
|
+
# Cap at maximum delay
|
|
137
|
+
[delay, MAX_DELAY].min
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Validates the max_retries parameter.
|
|
141
|
+
#
|
|
142
|
+
# @param max_retries [Integer] Maximum number of retry attempts
|
|
143
|
+
# @raise [ArgumentError] If max_retries is invalid
|
|
144
|
+
# @private
|
|
145
|
+
def self.validate_max_retries(max_retries)
|
|
146
|
+
unless max_retries.is_a?(Integer) && max_retries >= 0
|
|
147
|
+
raise ArgumentError, "max_retries must be a non-negative integer, got #{max_retries.inspect}"
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
private_class_method :validate_max_retries
|
|
151
|
+
|
|
152
|
+
# Validates the initial_delay parameter.
|
|
153
|
+
#
|
|
154
|
+
# @param initial_delay [Numeric] Initial delay in seconds
|
|
155
|
+
# @raise [ArgumentError] If initial_delay is invalid
|
|
156
|
+
# @private
|
|
157
|
+
def self.validate_initial_delay(initial_delay)
|
|
158
|
+
unless initial_delay.is_a?(Numeric) && initial_delay > 0
|
|
159
|
+
raise ArgumentError, "initial_delay must be a positive number, got #{initial_delay.inspect}"
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
private_class_method :validate_initial_delay
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
end
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module DurableHuggingfaceHub
|
|
4
|
+
module Utils
|
|
5
|
+
# Input validation utilities for HuggingFace Hub parameters.
|
|
6
|
+
#
|
|
7
|
+
# This module provides validation functions for repository IDs, revisions,
|
|
8
|
+
# filenames, and other user inputs to ensure they meet HuggingFace Hub requirements.
|
|
9
|
+
module Validators
|
|
10
|
+
# Maximum length for repository ID
|
|
11
|
+
MAX_REPO_ID_LENGTH = 96
|
|
12
|
+
|
|
13
|
+
# Validates a repository ID format.
|
|
14
|
+
#
|
|
15
|
+
# Rules:
|
|
16
|
+
# - Between 1 and 96 characters
|
|
17
|
+
# - Either "repo_name" or "namespace/repo_name"
|
|
18
|
+
# - Contains only [a-zA-Z0-9] or "-", "_", "."
|
|
19
|
+
# - Cannot have "--" or ".." sequences
|
|
20
|
+
# - Cannot end with ".git"
|
|
21
|
+
# - Name parts cannot start or end with ".", "-", or "_"
|
|
22
|
+
#
|
|
23
|
+
# @param repo_id [String] Repository ID to validate
|
|
24
|
+
# @param repo_type [String, nil] Repository type (optional, for error messages)
|
|
25
|
+
# @return [String] The validated repo_id
|
|
26
|
+
# @raise [ValidationError] If repo_id is invalid
|
|
27
|
+
#
|
|
28
|
+
# @example Valid repository IDs
|
|
29
|
+
# Validators.validate_repo_id("bert-base-uncased")
|
|
30
|
+
# Validators.validate_repo_id("huggingface/transformers")
|
|
31
|
+
# Validators.validate_repo_id("my-org/my.model-v2")
|
|
32
|
+
#
|
|
33
|
+
# @example Invalid repository IDs
|
|
34
|
+
# Validators.validate_repo_id("") # raises ValidationError
|
|
35
|
+
# Validators.validate_repo_id("foo--bar") # raises ValidationError
|
|
36
|
+
# Validators.validate_repo_id("foo.git") # raises ValidationError
|
|
37
|
+
def self.validate_repo_id(repo_id, repo_type: nil)
|
|
38
|
+
if repo_id.nil?
|
|
39
|
+
raise ValidationError.new("repo_id", "Repository ID cannot be empty")
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
unless repo_id.is_a?(String)
|
|
43
|
+
raise ValidationError.new("repo_id", "Repository ID must be a string, not #{repo_id.class}: '#{repo_id}'")
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
if repo_id.empty?
|
|
47
|
+
raise ValidationError.new("repo_id", "Repository ID cannot be empty")
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
if repo_id.length > MAX_REPO_ID_LENGTH
|
|
51
|
+
raise ValidationError.new("repo_id", "Repository ID is too long (max #{MAX_REPO_ID_LENGTH} characters)")
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Check for multiple slashes
|
|
55
|
+
if repo_id.count("/") > 1
|
|
56
|
+
raise ValidationError.new("repo_id", "Repository ID must be in format 'repo_name' or 'namespace/repo_name': '#{repo_id}'")
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Check for "--" and ".." sequences
|
|
60
|
+
if repo_id.include?("--") || repo_id.include?("..")
|
|
61
|
+
raise ValidationError.new("repo_id", "Cannot have -- or .. in repo_id: '#{repo_id}'")
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Check for .git suffix
|
|
65
|
+
if repo_id.end_with?(".git")
|
|
66
|
+
raise ValidationError.new("repo_id", "Repository ID cannot end with '.git': '#{repo_id}'")
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Validate with regex pattern (equivalent to Python REPO_ID_REGEX)
|
|
70
|
+
unless repo_id.match?(/\A(\b[\w\-.]+\b\/)?\b[\w\-.]{1,96}\b\z/)
|
|
71
|
+
raise ValidationError.new("repo_id", "Repository ID must use alphanumeric chars, '-', '_' or '.'. The name cannot start or end with '-' or '.' and the maximum length is 96: '#{repo_id}'")
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Additional validation for namespace/repo format
|
|
75
|
+
if repo_id.include?("/")
|
|
76
|
+
namespace, name = repo_id.split("/", 2)
|
|
77
|
+
|
|
78
|
+
if namespace.empty? || name.empty?
|
|
79
|
+
raise ValidationError.new("repo_id", "Both namespace and name must be non-empty")
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Validate no leading/trailing special chars in parts
|
|
83
|
+
[namespace, name].each do |part|
|
|
84
|
+
if part.start_with?(".", "-", "_") || part.end_with?(".", "-", "_")
|
|
85
|
+
raise ValidationError.new("repo_id", "Repository name parts cannot start or end with '.', '-', or '_'")
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
elsif repo_id.start_with?(".", "-", "_") || repo_id.end_with?(".", "-", "_")
|
|
89
|
+
raise ValidationError.new("repo_id", "Repository name cannot start or end with '.', '-', or '_'")
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
repo_id
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Validates a revision (branch, tag, or commit SHA).
|
|
96
|
+
#
|
|
97
|
+
# Valid formats:
|
|
98
|
+
# - Branch names: "main", "dev", "feature/my-feature"
|
|
99
|
+
# - Tags: "v1.0.0", "release-2023"
|
|
100
|
+
# - Commit SHAs: 40 hexadecimal characters
|
|
101
|
+
#
|
|
102
|
+
# @param revision [String] Revision to validate
|
|
103
|
+
# @return [String] The validated revision
|
|
104
|
+
# @raise [ValidationError] If revision is invalid
|
|
105
|
+
#
|
|
106
|
+
# @example
|
|
107
|
+
# Validators.validate_revision("main")
|
|
108
|
+
# Validators.validate_revision("v1.0.0")
|
|
109
|
+
# Validators.validate_revision("a" * 40) # commit SHA
|
|
110
|
+
def self.validate_revision(revision)
|
|
111
|
+
if revision.nil? || revision.empty?
|
|
112
|
+
raise ValidationError.new("revision", "Revision cannot be empty")
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Check length (reasonable max for branch/tag names)
|
|
116
|
+
if revision.length > 255
|
|
117
|
+
raise ValidationError.new("revision", "Revision name is too long")
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# If it looks like a commit SHA (40 hex chars), validate that
|
|
121
|
+
if revision.match?(Constants::REGEX_COMMIT_OID)
|
|
122
|
+
return revision
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# For branch/tag names, allow alphanumeric, hyphen, underscore, dot, slash
|
|
126
|
+
unless revision.match?(/\A[a-zA-Z0-9._\/-]+\z/)
|
|
127
|
+
raise ValidationError.new("revision", "Revision contains invalid characters")
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Disallow leading/trailing slashes
|
|
131
|
+
if revision.start_with?("/") || revision.end_with?("/")
|
|
132
|
+
raise ValidationError.new("revision", "Revision cannot start or end with '/'")
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
revision
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Validates a filename for use in repository paths.
|
|
139
|
+
#
|
|
140
|
+
# Ensures filename doesn't contain path traversal sequences or
|
|
141
|
+
# other potentially dangerous patterns.
|
|
142
|
+
#
|
|
143
|
+
# @param filename [String] Filename to validate
|
|
144
|
+
# @return [String] The validated filename
|
|
145
|
+
# @raise [ValidationError] If filename is unsafe
|
|
146
|
+
#
|
|
147
|
+
# @example Valid filenames
|
|
148
|
+
# Validators.validate_filename("config.json")
|
|
149
|
+
# Validators.validate_filename("models/pytorch_model.bin")
|
|
150
|
+
# Validators.validate_filename("data/train.csv")
|
|
151
|
+
#
|
|
152
|
+
# @example Invalid filenames
|
|
153
|
+
# Validators.validate_filename("../etc/passwd") # raises
|
|
154
|
+
# Validators.validate_filename("/absolute/path") # raises
|
|
155
|
+
def self.validate_filename(filename)
|
|
156
|
+
if filename.nil? || filename.empty?
|
|
157
|
+
raise ValidationError.new("filename", "Filename cannot be empty")
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Disallow absolute paths
|
|
161
|
+
if filename.start_with?("/")
|
|
162
|
+
raise ValidationError.new("filename", "Filename cannot be an absolute path")
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# Disallow path traversal
|
|
166
|
+
if filename.include?("../") || filename.include?("..\\")
|
|
167
|
+
raise ValidationError.new("filename", "Filename cannot contain path traversal sequences")
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# Disallow null bytes
|
|
171
|
+
if filename.include?("\0")
|
|
172
|
+
raise ValidationError.new("filename", "Filename cannot contain null bytes")
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# Disallow Windows reserved names
|
|
176
|
+
basename = File.basename(filename)
|
|
177
|
+
windows_reserved = %w[CON PRN AUX NUL COM1 COM2 COM3 COM4 COM5 COM6 COM7 COM8 COM9
|
|
178
|
+
LPT1 LPT2 LPT3 LPT4 LPT5 LPT6 LPT7 LPT8 LPT9]
|
|
179
|
+
if windows_reserved.include?(basename.upcase)
|
|
180
|
+
raise ValidationError.new("filename", "Filename cannot use Windows reserved names")
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
filename
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
# Validates a repository type.
|
|
187
|
+
#
|
|
188
|
+
# @param repo_type [String] Repository type
|
|
189
|
+
# @return [String] The validated repo_type
|
|
190
|
+
# @raise [ValidationError] If repo_type is invalid
|
|
191
|
+
#
|
|
192
|
+
# @example
|
|
193
|
+
# Validators.validate_repo_type("model")
|
|
194
|
+
# Validators.validate_repo_type("dataset")
|
|
195
|
+
def self.validate_repo_type(repo_type)
|
|
196
|
+
unless Constants::REPO_TYPES.include?(repo_type)
|
|
197
|
+
valid_types = Constants::REPO_TYPES.join(", ")
|
|
198
|
+
raise ValidationError.new(
|
|
199
|
+
"repo_type",
|
|
200
|
+
"Invalid repository type '#{repo_type}'. Must be one of: #{valid_types}"
|
|
201
|
+
)
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
repo_type
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# Validates that a value is not nil.
|
|
208
|
+
#
|
|
209
|
+
# @param value [Object] Value to check
|
|
210
|
+
# @param name [String] Parameter name for error message
|
|
211
|
+
# @return [Object] The value if not nil
|
|
212
|
+
# @raise [ValidationError] If value is nil
|
|
213
|
+
def self.require_non_nil(value, name)
|
|
214
|
+
if value.nil?
|
|
215
|
+
raise ValidationError.new(name, "#{name} is required and cannot be nil")
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
value
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
# Validates that a string is not empty.
|
|
222
|
+
#
|
|
223
|
+
# @param value [String] String to check
|
|
224
|
+
# @param name [String] Parameter name for error message
|
|
225
|
+
# @return [String] The value if not empty
|
|
226
|
+
# @raise [ValidationError] If value is nil or empty
|
|
227
|
+
def self.require_non_empty(value, name)
|
|
228
|
+
if value.nil? || (value.respond_to?(:empty?) && value.empty?)
|
|
229
|
+
raise ValidationError.new(name, "#{name} cannot be empty")
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
value
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
end
|