durable_huggingface_hub 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.editorconfig +29 -0
- data/.rubocop.yml +108 -0
- data/CHANGELOG.md +127 -0
- data/README.md +547 -0
- data/Rakefile +106 -0
- data/devenv.lock +171 -0
- data/devenv.nix +15 -0
- data/devenv.yaml +8 -0
- data/huggingface_hub.gemspec +63 -0
- data/lib/durable_huggingface_hub/authentication.rb +245 -0
- data/lib/durable_huggingface_hub/cache.rb +508 -0
- data/lib/durable_huggingface_hub/configuration.rb +191 -0
- data/lib/durable_huggingface_hub/constants.rb +145 -0
- data/lib/durable_huggingface_hub/errors.rb +412 -0
- data/lib/durable_huggingface_hub/file_download.rb +831 -0
- data/lib/durable_huggingface_hub/hf_api.rb +1278 -0
- data/lib/durable_huggingface_hub/repo_card.rb +430 -0
- data/lib/durable_huggingface_hub/types/cache_info.rb +298 -0
- data/lib/durable_huggingface_hub/types/commit_info.rb +149 -0
- data/lib/durable_huggingface_hub/types/dataset_info.rb +158 -0
- data/lib/durable_huggingface_hub/types/model_info.rb +154 -0
- data/lib/durable_huggingface_hub/types/space_info.rb +158 -0
- data/lib/durable_huggingface_hub/types/user.rb +179 -0
- data/lib/durable_huggingface_hub/types.rb +205 -0
- data/lib/durable_huggingface_hub/utils/auth.rb +174 -0
- data/lib/durable_huggingface_hub/utils/headers.rb +220 -0
- data/lib/durable_huggingface_hub/utils/http.rb +329 -0
- data/lib/durable_huggingface_hub/utils/paths.rb +230 -0
- data/lib/durable_huggingface_hub/utils/progress.rb +217 -0
- data/lib/durable_huggingface_hub/utils/retry.rb +165 -0
- data/lib/durable_huggingface_hub/utils/validators.rb +236 -0
- data/lib/durable_huggingface_hub/version.rb +8 -0
- data/lib/huggingface_hub.rb +205 -0
- metadata +334 -0
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "faraday"
|
|
5
|
+
require "faraday/retry"
|
|
6
|
+
|
|
7
|
+
require_relative "../configuration"
|
|
8
|
+
require_relative "headers"
|
|
9
|
+
|
|
10
|
+
module DurableHuggingfaceHub
|
|
11
|
+
module Utils
|
|
12
|
+
# HTTP client for making requests to the HuggingFace Hub API.
|
|
13
|
+
#
|
|
14
|
+
# This class provides a configured Faraday client with retry logic,
|
|
15
|
+
# connection pooling, timeout settings, and proper error handling.
|
|
16
|
+
#
|
|
17
|
+
# @example Basic GET request
|
|
18
|
+
# client = HttpClient.new
|
|
19
|
+
# response = client.get("https://huggingface.co/api/models/bert-base-uncased")
|
|
20
|
+
#
|
|
21
|
+
# @example With authentication
|
|
22
|
+
# client = HttpClient.new(token: "hf_...")
|
|
23
|
+
# response = client.get("/api/whoami")
|
|
24
|
+
class HttpClient
|
|
25
|
+
# @return [String, nil] Authentication token
|
|
26
|
+
attr_reader :token
|
|
27
|
+
|
|
28
|
+
# @return [String] Base URL for API requests
|
|
29
|
+
attr_reader :endpoint
|
|
30
|
+
|
|
31
|
+
# @return [Hash] Default headers for all requests
|
|
32
|
+
attr_reader :default_headers
|
|
33
|
+
|
|
34
|
+
# @return [Faraday::Connection] The underlying Faraday connection
|
|
35
|
+
attr_reader :connection
|
|
36
|
+
|
|
37
|
+
# Creates a new HTTP client.
|
|
38
|
+
#
|
|
39
|
+
# @param token [String, nil] Authentication token
|
|
40
|
+
# @param endpoint [String] Base endpoint URL
|
|
41
|
+
# @param headers [Hash, nil] Additional default headers
|
|
42
|
+
# @param timeout [Integer, nil] Request timeout in seconds
|
|
43
|
+
# @param open_timeout [Integer, nil] Connection timeout in seconds
|
|
44
|
+
# @param proxy [String, nil] Proxy URL
|
|
45
|
+
# @param logger [Logger, nil] Logger for request/response logging
|
|
46
|
+
def initialize(
|
|
47
|
+
token: nil,
|
|
48
|
+
endpoint: nil,
|
|
49
|
+
headers: nil,
|
|
50
|
+
timeout: nil,
|
|
51
|
+
open_timeout: nil,
|
|
52
|
+
proxy: nil,
|
|
53
|
+
logger: nil
|
|
54
|
+
)
|
|
55
|
+
@token = token || Configuration.instance.token
|
|
56
|
+
@endpoint = endpoint || Configuration.instance.endpoint
|
|
57
|
+
@default_headers = build_default_headers(headers)
|
|
58
|
+
@logger = logger
|
|
59
|
+
@connection = build_connection(
|
|
60
|
+
timeout: timeout,
|
|
61
|
+
open_timeout: open_timeout,
|
|
62
|
+
proxy: proxy
|
|
63
|
+
)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Performs a GET request.
|
|
67
|
+
#
|
|
68
|
+
# @param path [String] URL path or full URL
|
|
69
|
+
# @param params [Hash, nil] Query parameters
|
|
70
|
+
# @param headers [Hash, nil] Additional headers
|
|
71
|
+
# @param timeout [Numeric, nil] Request timeout in seconds
|
|
72
|
+
# @return [Faraday::Response] Response object
|
|
73
|
+
# @raise [HfHubHTTPError] On HTTP errors
|
|
74
|
+
#
|
|
75
|
+
# @example
|
|
76
|
+
# response = client.get("/api/models", params: { limit: 10 })
|
|
77
|
+
def get(path, params: nil, headers: nil, timeout: nil)
|
|
78
|
+
request(:get, path, params: params, headers: headers, timeout: timeout)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Performs a POST request.
|
|
82
|
+
#
|
|
83
|
+
# @param path [String] URL path or full URL
|
|
84
|
+
# @param body [Hash, String, nil] Request body
|
|
85
|
+
# @param params [Hash, nil] Query parameters
|
|
86
|
+
# @param headers [Hash, nil] Additional headers
|
|
87
|
+
# @param timeout [Numeric, nil] Request timeout in seconds
|
|
88
|
+
# @return [Faraday::Response] Response object
|
|
89
|
+
# @raise [HfHubHTTPError] On HTTP errors
|
|
90
|
+
#
|
|
91
|
+
# @example
|
|
92
|
+
# response = client.post("/api/repos", body: { name: "my-model" })
|
|
93
|
+
def post(path, body: nil, params: nil, headers: nil, timeout: nil)
|
|
94
|
+
request(:post, path, body: body, params: params, headers: headers, timeout: timeout)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Performs a PUT request.
|
|
98
|
+
#
|
|
99
|
+
# @param path [String] URL path or full URL
|
|
100
|
+
# @param body [Hash, String, nil] Request body
|
|
101
|
+
# @param params [Hash, nil] Query parameters
|
|
102
|
+
# @param headers [Hash, nil] Additional headers
|
|
103
|
+
# @return [Faraday::Response] Response object
|
|
104
|
+
# @raise [HfHubHTTPError] On HTTP errors
|
|
105
|
+
def put(path, body: nil, params: nil, headers: nil)
|
|
106
|
+
request(:put, path, body: body, params: params, headers: headers)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Performs a DELETE request.
|
|
110
|
+
#
|
|
111
|
+
# @param path [String] URL path or full URL
|
|
112
|
+
# @param params [Hash, nil] Query parameters
|
|
113
|
+
# @param headers [Hash, nil] Additional headers
|
|
114
|
+
# @return [Faraday::Response] Response object
|
|
115
|
+
# @raise [HfHubHTTPError] On HTTP errors
|
|
116
|
+
def delete(path, params: nil, headers: nil)
|
|
117
|
+
request(:delete, path, params: params, headers: headers)
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# Performs a HEAD request.
|
|
121
|
+
#
|
|
122
|
+
# @param path [String] URL path or full URL
|
|
123
|
+
# @param params [Hash, nil] Query parameters
|
|
124
|
+
# @param headers [Hash, nil] Additional headers
|
|
125
|
+
# @param timeout [Numeric, nil] Request timeout in seconds
|
|
126
|
+
# @return [Faraday::Response] Response object
|
|
127
|
+
# @raise [HfHubHTTPError] On HTTP errors
|
|
128
|
+
def head(path, params: nil, headers: nil, timeout: nil)
|
|
129
|
+
request(:head, path, params: params, headers: headers, timeout: timeout)
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Performs an HTTP request with error handling.
|
|
133
|
+
#
|
|
134
|
+
# @param method [Symbol] HTTP method
|
|
135
|
+
# @param path [String] URL path
|
|
136
|
+
# @param body [Hash, String, nil] Request body
|
|
137
|
+
# @param params [Hash, nil] Query parameters
|
|
138
|
+
# @param headers [Hash, nil] Additional headers
|
|
139
|
+
# @param timeout [Numeric, nil] Request timeout in seconds (overrides default)
|
|
140
|
+
# @yield [req] Optional block for Faraday request configuration
|
|
141
|
+
# @return [Faraday::Response] Response object
|
|
142
|
+
# @raise [HfHubHTTPError] On HTTP errors
|
|
143
|
+
def request(method, path, body: nil, params: nil, headers: nil, timeout: nil, &block)
|
|
144
|
+
url = build_url(path)
|
|
145
|
+
merged_headers = @default_headers.merge(headers || {})
|
|
146
|
+
|
|
147
|
+
response = @connection.send(method) do |req|
|
|
148
|
+
req.url(url)
|
|
149
|
+
req.params.update(params) if params
|
|
150
|
+
req.headers.update(merged_headers)
|
|
151
|
+
req.body = prepare_body(body) if body && method != :get && method != :head
|
|
152
|
+
req.options.timeout = timeout if timeout
|
|
153
|
+
block&.call(req)
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
handle_response(response)
|
|
157
|
+
rescue Faraday::Error => e
|
|
158
|
+
handle_faraday_error(e)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
private
|
|
162
|
+
|
|
163
|
+
# Builds the Faraday connection with middleware.
|
|
164
|
+
#
|
|
165
|
+
# @param timeout [Integer, nil] Request timeout
|
|
166
|
+
# @param open_timeout [Integer, nil] Connection timeout
|
|
167
|
+
# @param proxy [String, nil] Proxy URL
|
|
168
|
+
# @return [Faraday::Connection] Configured connection
|
|
169
|
+
def build_connection(timeout: nil, open_timeout: nil, proxy: nil)
|
|
170
|
+
Faraday.new(url: @endpoint) do |conn|
|
|
171
|
+
# Request/response logging (if logger provided)
|
|
172
|
+
if @logger
|
|
173
|
+
conn.response :logger, @logger, { headers: true, bodies: false }
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# Retry middleware with exponential backoff
|
|
177
|
+
conn.request :retry,
|
|
178
|
+
max: 3,
|
|
179
|
+
interval: 1,
|
|
180
|
+
interval_randomness: 0.5,
|
|
181
|
+
backoff_factor: 2,
|
|
182
|
+
retry_statuses: [408, 429, 500, 502, 503, 504],
|
|
183
|
+
methods: %i[get head options delete],
|
|
184
|
+
exceptions: [
|
|
185
|
+
Faraday::TimeoutError,
|
|
186
|
+
Faraday::ConnectionFailed
|
|
187
|
+
]
|
|
188
|
+
|
|
189
|
+
# JSON request/response handling
|
|
190
|
+
conn.request :json
|
|
191
|
+
conn.response :json, content_type: /\bjson$/
|
|
192
|
+
|
|
193
|
+
# Set timeouts
|
|
194
|
+
conn.options.timeout = timeout || Configuration.instance.request_timeout
|
|
195
|
+
conn.options.open_timeout = open_timeout || 10
|
|
196
|
+
|
|
197
|
+
# Set proxy if provided
|
|
198
|
+
conn.proxy = proxy if proxy
|
|
199
|
+
|
|
200
|
+
# Use default adapter
|
|
201
|
+
conn.adapter Faraday.default_adapter
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
# Builds default headers for requests.
|
|
206
|
+
#
|
|
207
|
+
# @param additional_headers [Hash, nil] Additional headers
|
|
208
|
+
# @return [Hash] Complete headers hash
|
|
209
|
+
def build_default_headers(additional_headers)
|
|
210
|
+
Headers.build_hf_headers(
|
|
211
|
+
token: @token,
|
|
212
|
+
headers: additional_headers
|
|
213
|
+
)
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
# Builds the full URL from a path.
|
|
217
|
+
#
|
|
218
|
+
# @param path [String] URL path or full URL
|
|
219
|
+
# @return [String] Full URL
|
|
220
|
+
def build_url(path)
|
|
221
|
+
return path if path.start_with?("http://", "https://")
|
|
222
|
+
|
|
223
|
+
# Ensure endpoint doesn't end with / and path doesn't start with /
|
|
224
|
+
endpoint = @endpoint.chomp("/")
|
|
225
|
+
path = path.start_with?("/") ? path : "/#{path}"
|
|
226
|
+
"#{endpoint}#{path}"
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
# Prepares request body for transmission.
|
|
230
|
+
#
|
|
231
|
+
# @param body [Hash, String] Request body
|
|
232
|
+
# @return [String] Prepared body
|
|
233
|
+
def prepare_body(body)
|
|
234
|
+
return body unless body.is_a?(Hash)
|
|
235
|
+
|
|
236
|
+
body.to_json
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
# Handles HTTP response and raises errors for non-success status.
|
|
240
|
+
#
|
|
241
|
+
# @param response [Faraday::Response] HTTP response
|
|
242
|
+
# @return [Faraday::Response] Response object if successful
|
|
243
|
+
# @raise [HfHubHTTPError] On error status codes
|
|
244
|
+
def handle_response(response)
|
|
245
|
+
return response if response.success?
|
|
246
|
+
|
|
247
|
+
raise_http_error(response)
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
# Raises appropriate HTTP error based on response.
|
|
251
|
+
#
|
|
252
|
+
# @param response [Faraday::Response] HTTP response
|
|
253
|
+
# @raise [HfHubHTTPError] Appropriate error subclass
|
|
254
|
+
def raise_http_error(response)
|
|
255
|
+
status = response.status
|
|
256
|
+
body = response.body.is_a?(String) ? response.body : (response.body ? response.body.to_json : nil)
|
|
257
|
+
request_id = Headers.extract_request_id(response.headers)
|
|
258
|
+
|
|
259
|
+
case status
|
|
260
|
+
when 400
|
|
261
|
+
raise BadRequestError.new("Bad request", response_body: body)
|
|
262
|
+
when 401
|
|
263
|
+
raise HfHubHTTPError.new("Unauthorized", status_code: 401, response_body: body, request_id: request_id)
|
|
264
|
+
when 403
|
|
265
|
+
# Try to determine if it's gated or disabled
|
|
266
|
+
if body&.include?("gated")
|
|
267
|
+
raise GatedRepoError.new("unknown", message: extract_error_message(body, "Access to this repository is gated"))
|
|
268
|
+
elsif body&.include?("disabled")
|
|
269
|
+
raise DisabledRepoError.new("unknown", message: extract_error_message(body, "Repository has been disabled"))
|
|
270
|
+
else
|
|
271
|
+
raise HfHubHTTPError.new("Forbidden", status_code: 403, response_body: body, request_id: request_id)
|
|
272
|
+
end
|
|
273
|
+
when 404
|
|
274
|
+
raise RepositoryNotFoundError.new("unknown", message: extract_error_message(body, "Repository not found"))
|
|
275
|
+
when 408
|
|
276
|
+
raise HfHubHTTPError.new("Request timeout", status_code: 408, response_body: body, request_id: request_id)
|
|
277
|
+
when 429
|
|
278
|
+
raise HfHubHTTPError.new("Too many requests", status_code: 429, response_body: body, request_id: request_id)
|
|
279
|
+
when 500..599
|
|
280
|
+
raise HfHubHTTPError.new("Server error", status_code: status, response_body: body, request_id: request_id)
|
|
281
|
+
else
|
|
282
|
+
raise HfHubHTTPError.new("HTTP error", status_code: status, response_body: body, request_id: request_id)
|
|
283
|
+
end
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
# Extracts error message from response body.
|
|
287
|
+
#
|
|
288
|
+
# @param body [String] Response body
|
|
289
|
+
# @param default [String] Default message
|
|
290
|
+
# @return [String] Error message
|
|
291
|
+
def extract_error_message(body, default)
|
|
292
|
+
return default unless body
|
|
293
|
+
|
|
294
|
+
begin
|
|
295
|
+
parsed = JSON.parse(body)
|
|
296
|
+
parsed["error"] || parsed["message"] || default
|
|
297
|
+
rescue JSON::ParserError
|
|
298
|
+
default
|
|
299
|
+
end
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
# Handles Faraday errors.
|
|
303
|
+
#
|
|
304
|
+
# @param error [Faraday::Error] Faraday error
|
|
305
|
+
# @raise [HfHubHTTPError] Converted error
|
|
306
|
+
def handle_faraday_error(error)
|
|
307
|
+
case error
|
|
308
|
+
when Faraday::RetriableResponse
|
|
309
|
+
# Retry middleware exhausted retries - extract response and handle as HTTP error
|
|
310
|
+
if error.response && error.response.is_a?(Hash) && error.response[:response]
|
|
311
|
+
raise_http_error(error.response[:response])
|
|
312
|
+
elsif error.response && error.response.respond_to?(:status)
|
|
313
|
+
raise_http_error(error.response)
|
|
314
|
+
else
|
|
315
|
+
raise HfHubHTTPError.new("Retryable error: #{error.message}")
|
|
316
|
+
end
|
|
317
|
+
when Faraday::TimeoutError
|
|
318
|
+
raise HfHubHTTPError.new("Request timed out: #{error.message}")
|
|
319
|
+
when Faraday::ConnectionFailed
|
|
320
|
+
raise HfHubHTTPError.new("Connection failed: #{error.message}")
|
|
321
|
+
when Faraday::SSLError
|
|
322
|
+
raise HfHubHTTPError.new("SSL error: #{error.message}")
|
|
323
|
+
else
|
|
324
|
+
raise HfHubHTTPError.new("HTTP error: #{error.message}")
|
|
325
|
+
end
|
|
326
|
+
end
|
|
327
|
+
end
|
|
328
|
+
end
|
|
329
|
+
end
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
|
|
5
|
+
module DurableHuggingfaceHub
|
|
6
|
+
module Utils
|
|
7
|
+
# Path manipulation and filtering utilities.
|
|
8
|
+
#
|
|
9
|
+
# This module provides functions for working with file paths,
|
|
10
|
+
# including expansion, filtering, and pattern matching.
|
|
11
|
+
module Paths
|
|
12
|
+
# Expands a path, resolving home directory and environment variables.
|
|
13
|
+
#
|
|
14
|
+
# @param path [String, Pathname] Path to expand
|
|
15
|
+
# @return [Pathname] Expanded path
|
|
16
|
+
#
|
|
17
|
+
# @example
|
|
18
|
+
# Paths.expand_path("~/models") # => Pathname("/home/user/models")
|
|
19
|
+
# Paths.expand_path("$HOME/data") # => Pathname("/home/user/data")
|
|
20
|
+
def self.expand_path(path)
|
|
21
|
+
path_str = path.to_s
|
|
22
|
+
|
|
23
|
+
# Expand environment variables
|
|
24
|
+
path_str = path_str.gsub(/\$([A-Z_][A-Z0-9_]*)|\$\{([A-Z_][A-Z0-9_]*)\}/) do
|
|
25
|
+
key = Regexp.last_match(1) || Regexp.last_match(2)
|
|
26
|
+
ENV[key] || ""
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Expand home directory
|
|
30
|
+
Pathname.new(path_str).expand_path
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Filters a list of repository objects (files) based on allow and ignore patterns.
|
|
34
|
+
#
|
|
35
|
+
# This function implements the filtering logic used by HuggingFace Hub for
|
|
36
|
+
# selecting which files to include in operations like snapshot downloads.
|
|
37
|
+
#
|
|
38
|
+
# @param objects [Array<String>, Array<Hash>] List of file paths or file info hashes
|
|
39
|
+
# @param allow_patterns [Array<String>, String, nil] Patterns to allow (globs or regexes)
|
|
40
|
+
# @param ignore_patterns [Array<String>, String, nil] Patterns to ignore (globs or regexes)
|
|
41
|
+
# @param key [String, Symbol, nil] Key to extract path from hash objects
|
|
42
|
+
# @return [Array] Filtered list of objects
|
|
43
|
+
#
|
|
44
|
+
# @example Filter file list with glob patterns
|
|
45
|
+
# files = ["config.json", "model.safetensors", "README.md", "data/train.csv"]
|
|
46
|
+
# Paths.filter_repo_objects(files, allow_patterns: ["*.json", "*.safetensors"])
|
|
47
|
+
# # => ["config.json", "model.safetensors"]
|
|
48
|
+
#
|
|
49
|
+
# @example Filter with ignore patterns
|
|
50
|
+
# files = ["model.bin", "config.json", "training_log.txt"]
|
|
51
|
+
# Paths.filter_repo_objects(files, ignore_patterns: ["*.txt"])
|
|
52
|
+
# # => ["model.bin", "config.json"]
|
|
53
|
+
#
|
|
54
|
+
# @example Filter hash objects
|
|
55
|
+
# files = [{ path: "config.json" }, { path: "model.bin" }]
|
|
56
|
+
# Paths.filter_repo_objects(files, allow_patterns: "*.json", key: :path)
|
|
57
|
+
# # => [{ path: "config.json" }]
|
|
58
|
+
def self.filter_repo_objects(objects, allow_patterns: nil, ignore_patterns: nil, key: nil)
|
|
59
|
+
return objects if objects.nil? || objects.empty?
|
|
60
|
+
|
|
61
|
+
# Normalize patterns to arrays
|
|
62
|
+
allow_patterns = normalize_patterns(allow_patterns)
|
|
63
|
+
ignore_patterns = normalize_patterns(ignore_patterns)
|
|
64
|
+
|
|
65
|
+
# If no patterns, return all objects
|
|
66
|
+
return objects if allow_patterns.nil? && ignore_patterns.nil?
|
|
67
|
+
|
|
68
|
+
objects.select do |obj|
|
|
69
|
+
path = extract_path(obj, key)
|
|
70
|
+
next false if path.nil?
|
|
71
|
+
|
|
72
|
+
should_include?(path, allow_patterns: allow_patterns, ignore_patterns: ignore_patterns)
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Checks if a path should be included based on allow and ignore patterns.
|
|
77
|
+
#
|
|
78
|
+
# @param path [String] File path to check
|
|
79
|
+
# @param allow_patterns [Array<String>, nil] Patterns to allow
|
|
80
|
+
# @param ignore_patterns [Array<String>, nil] Patterns to ignore
|
|
81
|
+
# @return [Boolean] True if path should be included
|
|
82
|
+
#
|
|
83
|
+
# @example
|
|
84
|
+
# Paths.should_include?("config.json", allow_patterns: ["*.json"]) # => true
|
|
85
|
+
# Paths.should_include?("data.txt", allow_patterns: ["*.json"]) # => false
|
|
86
|
+
# Paths.should_include?("temp.log", ignore_patterns: ["*.log"]) # => false
|
|
87
|
+
def self.should_include?(path, allow_patterns: nil, ignore_patterns: nil)
|
|
88
|
+
# If ignore patterns specified and path matches, exclude it
|
|
89
|
+
if ignore_patterns && matches_any_pattern?(path, ignore_patterns)
|
|
90
|
+
return false
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# If allow patterns specified, path must match at least one
|
|
94
|
+
if allow_patterns
|
|
95
|
+
return matches_any_pattern?(path, allow_patterns)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# If no allow patterns, include by default (unless already ignored above)
|
|
99
|
+
true
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Checks if a path matches any of the given patterns.
|
|
103
|
+
#
|
|
104
|
+
# @param path [String] File path to check
|
|
105
|
+
# @param patterns [Array<String>] Glob or regex patterns
|
|
106
|
+
# @return [Boolean] True if path matches any pattern
|
|
107
|
+
#
|
|
108
|
+
# @example
|
|
109
|
+
# Paths.matches_any_pattern?("config.json", ["*.json", "*.yaml"]) # => true
|
|
110
|
+
# Paths.matches_any_pattern?("data.txt", ["*.json", "*.yaml"]) # => false
|
|
111
|
+
def self.matches_any_pattern?(path, patterns)
|
|
112
|
+
return false if patterns.nil? || patterns.empty?
|
|
113
|
+
|
|
114
|
+
patterns.any? { |pattern| matches_pattern?(path, pattern) }
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Checks if a path matches a single pattern.
|
|
118
|
+
#
|
|
119
|
+
# Supports both glob patterns and regular expressions.
|
|
120
|
+
#
|
|
121
|
+
# @param path [String] File path to check
|
|
122
|
+
# @param pattern [String, Regexp] Glob pattern or regex
|
|
123
|
+
# @return [Boolean] True if path matches pattern
|
|
124
|
+
#
|
|
125
|
+
# @example Glob patterns
|
|
126
|
+
# Paths.matches_pattern?("config.json", "*.json") # => true
|
|
127
|
+
# Paths.matches_pattern?("data/train.csv", "data/*.csv") # => true
|
|
128
|
+
# Paths.matches_pattern?("model.bin", "*.json") # => false
|
|
129
|
+
#
|
|
130
|
+
# @example Regex patterns
|
|
131
|
+
# Paths.matches_pattern?("config.json", /\.json$/) # => true
|
|
132
|
+
def self.matches_pattern?(path, pattern)
|
|
133
|
+
case pattern
|
|
134
|
+
when Regexp
|
|
135
|
+
!pattern.match(path).nil?
|
|
136
|
+
when String
|
|
137
|
+
# Convert glob pattern to regex
|
|
138
|
+
File.fnmatch?(pattern, path, File::FNM_PATHNAME | File::FNM_EXTGLOB)
|
|
139
|
+
else
|
|
140
|
+
false
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Sanitizes a filename by removing or replacing unsafe characters.
|
|
145
|
+
#
|
|
146
|
+
# @param filename [String] Filename to sanitize
|
|
147
|
+
# @return [String] Sanitized filename
|
|
148
|
+
#
|
|
149
|
+
# @example
|
|
150
|
+
# Paths.sanitize_filename("my file!.txt") # => "my_file_.txt"
|
|
151
|
+
# Paths.sanitize_filename("test/file.json") # => "test_file.json"
|
|
152
|
+
def self.sanitize_filename(filename)
|
|
153
|
+
# Replace path separators with underscores
|
|
154
|
+
sanitized = filename.gsub(%r{[/\\]}, "_")
|
|
155
|
+
|
|
156
|
+
# Replace spaces with underscores
|
|
157
|
+
sanitized = sanitized.gsub(/\s/, "_")
|
|
158
|
+
|
|
159
|
+
# Replace other problematic characters
|
|
160
|
+
sanitized.gsub(/[<>:"|?*]/, "_")
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# Joins path components safely, ensuring no path traversal.
|
|
164
|
+
#
|
|
165
|
+
# @param base [String, Pathname] Base path
|
|
166
|
+
# @param *parts [String] Path components to join
|
|
167
|
+
# @return [Pathname] Joined path
|
|
168
|
+
# @raise [ValidationError] If result would escape base path
|
|
169
|
+
#
|
|
170
|
+
# @example
|
|
171
|
+
# Paths.safe_join("/cache", "models", "bert")
|
|
172
|
+
# # => Pathname("/cache/models/bert")
|
|
173
|
+
def self.safe_join(base, *parts)
|
|
174
|
+
# Validate that no part is an absolute path
|
|
175
|
+
parts.each do |part|
|
|
176
|
+
if part.to_s.start_with?("/")
|
|
177
|
+
raise ValidationError.new(
|
|
178
|
+
"path",
|
|
179
|
+
"Path component cannot be absolute: #{part}"
|
|
180
|
+
)
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
base_path = Pathname.new(base).expand_path
|
|
185
|
+
joined_path = parts.reduce(base_path) { |path, part| path.join(part) }
|
|
186
|
+
final_path = joined_path.expand_path
|
|
187
|
+
|
|
188
|
+
# Ensure the final path is within the base path
|
|
189
|
+
unless final_path.to_s.start_with?(base_path.to_s)
|
|
190
|
+
raise ValidationError.new(
|
|
191
|
+
"path",
|
|
192
|
+
"Path traversal detected: result would escape base directory"
|
|
193
|
+
)
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
final_path
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
private
|
|
200
|
+
|
|
201
|
+
# Normalizes pattern input to an array.
|
|
202
|
+
#
|
|
203
|
+
# @param patterns [Array, String, nil] Patterns
|
|
204
|
+
# @return [Array<String>, nil] Normalized patterns
|
|
205
|
+
def self.normalize_patterns(patterns)
|
|
206
|
+
return nil if patterns.nil?
|
|
207
|
+
return [patterns] if patterns.is_a?(String) || patterns.is_a?(Regexp)
|
|
208
|
+
return patterns if patterns.is_a?(Array)
|
|
209
|
+
|
|
210
|
+
nil
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
# Extracts path from an object (string or hash).
|
|
214
|
+
#
|
|
215
|
+
# @param obj [String, Hash] Object
|
|
216
|
+
# @param key [String, Symbol, nil] Key for hash extraction
|
|
217
|
+
# @return [String, nil] Extracted path
|
|
218
|
+
def self.extract_path(obj, key)
|
|
219
|
+
case obj
|
|
220
|
+
when String
|
|
221
|
+
obj
|
|
222
|
+
when Hash
|
|
223
|
+
key ? (obj[key] || obj[key.to_s] || obj[key.to_sym]) : nil
|
|
224
|
+
else
|
|
225
|
+
nil
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
end
|
|
230
|
+
end
|