crawlora 1.5.0.pre.sdk.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +21 -0
- data/LICENSE +21 -0
- data/README.md +123 -0
- data/docs/operations.md +338 -0
- data/docs/recipes.md +87 -0
- data/examples/bing_search.rb +10 -0
- data/examples/paginate.rb +11 -0
- data/examples/youtube_transcript.rb +10 -0
- data/lib/crawlora/client.rb +626 -0
- data/lib/crawlora/errors.rb +38 -0
- data/lib/crawlora/operations.rb +13841 -0
- data/lib/crawlora/pagination.rb +39 -0
- data/lib/crawlora/version.rb +9 -0
- data/lib/crawlora.rb +31 -0
- data/openapi/public.json +54522 -0
- data/sig/crawlora.rbs +465 -0
- metadata +70 -0
|
@@ -0,0 +1,626 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "cgi"
|
|
4
|
+
require "json"
|
|
5
|
+
require "net/http"
|
|
6
|
+
require "securerandom"
|
|
7
|
+
require "set"
|
|
8
|
+
require "stringio"
|
|
9
|
+
require "time"
|
|
10
|
+
require "uri"
|
|
11
|
+
|
|
12
|
+
require_relative "errors"
|
|
13
|
+
require_relative "pagination"
|
|
14
|
+
require_relative "operations"
|
|
15
|
+
|
|
16
|
+
module Crawlora
|
|
17
|
+
DEFAULT_BASE_URL = "https://api.crawlora.net/api/v1"
|
|
18
|
+
DEFAULT_MAX_RETRY_DELAY = 30.0
|
|
19
|
+
DEFAULT_RETRY_STATUSES = [408, 409, 425, 429].freeze
|
|
20
|
+
RESPONSE_TYPES = %w[auto json text stream].freeze
|
|
21
|
+
|
|
22
|
+
Response = Struct.new(:status, :headers, :body)
|
|
23
|
+
|
|
24
|
+
# Default keep-alive transport: reuses one Net::HTTP connection per origin so
|
|
25
|
+
# repeated calls share a TCP/TLS session. Inject a callable transport for
|
|
26
|
+
# tests or custom HTTP stacks.
|
|
27
|
+
class DefaultTransport
|
|
28
|
+
def initialize
|
|
29
|
+
@connections = {}
|
|
30
|
+
@mutex = Mutex.new
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def call(method:, url:, headers:, body:, timeout:)
|
|
34
|
+
uri = URI.parse(url)
|
|
35
|
+
http = connection(uri, timeout)
|
|
36
|
+
request = build_request(method, uri, headers, body)
|
|
37
|
+
response = http.request(request)
|
|
38
|
+
Response.new(response.code.to_i, response.to_hash.transform_values { |v| v.is_a?(Array) ? v.join(", ") : v }, response.body || "")
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def close
|
|
42
|
+
@mutex.synchronize do
|
|
43
|
+
@connections.each_value { |http| http.finish if http.started? }
|
|
44
|
+
@connections.clear
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
def connection(uri, timeout)
|
|
51
|
+
key = "#{uri.scheme}://#{uri.host}:#{uri.port}"
|
|
52
|
+
@mutex.synchronize do
|
|
53
|
+
http = @connections[key]
|
|
54
|
+
if http.nil?
|
|
55
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
56
|
+
http.use_ssl = uri.scheme == "https"
|
|
57
|
+
http.keep_alive_timeout = 30
|
|
58
|
+
@connections[key] = http
|
|
59
|
+
end
|
|
60
|
+
http.open_timeout = timeout
|
|
61
|
+
http.read_timeout = timeout
|
|
62
|
+
http.start unless http.started?
|
|
63
|
+
http
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def build_request(method, uri, headers, body)
|
|
68
|
+
klass = Net::HTTP.const_get(method.capitalize)
|
|
69
|
+
request = klass.new(uri.request_uri)
|
|
70
|
+
headers.each { |name, value| request[name] = value }
|
|
71
|
+
request.body = body if body
|
|
72
|
+
request
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Optional client-side throttle: caps concurrency and spaces requests to a
|
|
77
|
+
# maximum rate (requests per second).
|
|
78
|
+
class RateLimiter
|
|
79
|
+
def initialize(rps, concurrency)
|
|
80
|
+
@interval = rps&.positive? ? 1.0 / rps : 0.0
|
|
81
|
+
@slots = concurrency&.positive? ? concurrency : nil
|
|
82
|
+
@available = @slots
|
|
83
|
+
@mutex = Mutex.new
|
|
84
|
+
@cond = ConditionVariable.new
|
|
85
|
+
@next_at = 0.0
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def run
|
|
89
|
+
acquire
|
|
90
|
+
begin
|
|
91
|
+
space
|
|
92
|
+
yield
|
|
93
|
+
ensure
|
|
94
|
+
release
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
private
|
|
99
|
+
|
|
100
|
+
def acquire
|
|
101
|
+
return if @slots.nil?
|
|
102
|
+
|
|
103
|
+
@mutex.synchronize do
|
|
104
|
+
@cond.wait(@mutex) while @available <= 0
|
|
105
|
+
@available -= 1
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def release
|
|
110
|
+
return if @slots.nil?
|
|
111
|
+
|
|
112
|
+
@mutex.synchronize do
|
|
113
|
+
@available += 1
|
|
114
|
+
@cond.signal
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def space
|
|
119
|
+
return if @interval.zero?
|
|
120
|
+
|
|
121
|
+
wait = 0.0
|
|
122
|
+
@mutex.synchronize do
|
|
123
|
+
now = monotonic
|
|
124
|
+
wait = [0.0, @next_at - now].max
|
|
125
|
+
@next_at = [now, @next_at].max + @interval
|
|
126
|
+
end
|
|
127
|
+
sleep(wait) if wait.positive?
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def monotonic
|
|
131
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Synchronous client for the Crawlora API.
|
|
136
|
+
#
|
|
137
|
+
# Call operations via grouped helpers (+client.bing.search(q: "...")+) or
|
|
138
|
+
# dynamically (+client.request("bing-search", q: "...")+). Supports
|
|
139
|
+
# configurable retries, an +on_retry+ hook, opt-in +request_id+ and
|
|
140
|
+
# +idempotency_keys+, +before_request+/+after_response+ middleware, client-side
|
|
141
|
+
# +rate_limit+/+max_concurrency+, pagination (+paginate+/+paginate_items+), and
|
|
142
|
+
# +response_type: "stream"+. Uses a keep-alive connection pool by default; call
|
|
143
|
+
# +close+ (or use the block form of +Crawlora.client+) to release connections.
|
|
144
|
+
class Client
|
|
145
|
+
attr_reader :api_key, :jwt_token, :base_url, :timeout, :retries, :retry_delay,
|
|
146
|
+
:max_retry_delay, :retry_statuses, :headers, :user_agent
|
|
147
|
+
|
|
148
|
+
def initialize(
|
|
149
|
+
api_key: nil, jwt_token: nil, base_url: nil, timeout: 30,
|
|
150
|
+
retries: 0, retry_delay: 0.25, max_retry_delay: DEFAULT_MAX_RETRY_DELAY,
|
|
151
|
+
retry_statuses: nil, retry_predicate: nil, on_retry: nil,
|
|
152
|
+
request_id: false, idempotency_keys: false,
|
|
153
|
+
rate_limit: nil, max_concurrency: nil, logger: nil,
|
|
154
|
+
before_request: nil, after_response: nil,
|
|
155
|
+
headers: nil, user_agent: nil, transport: nil
|
|
156
|
+
)
|
|
157
|
+
# Precedence: explicit argument > environment variable > default.
|
|
158
|
+
@api_key = api_key || ENV.fetch("CRAWLORA_API_KEY", "")
|
|
159
|
+
@jwt_token = jwt_token || ""
|
|
160
|
+
@base_url = (base_url || ENV["CRAWLORA_BASE_URL"] || DEFAULT_BASE_URL).chomp("/")
|
|
161
|
+
@timeout = timeout
|
|
162
|
+
@retries = [0, retries.to_i].max
|
|
163
|
+
@retry_delay = [0.0, retry_delay.to_f].max
|
|
164
|
+
@max_retry_delay = [0.0, max_retry_delay.to_f].max
|
|
165
|
+
@retry_statuses = retry_statuses&.to_a&.to_set
|
|
166
|
+
@retry_predicate = retry_predicate
|
|
167
|
+
@on_retry = on_retry
|
|
168
|
+
@request_id = request_id
|
|
169
|
+
@idempotency_keys = idempotency_keys
|
|
170
|
+
@rate_limiter = rate_limit || max_concurrency ? RateLimiter.new(rate_limit, max_concurrency) : nil
|
|
171
|
+
@logger = logger
|
|
172
|
+
@before_request = as_hook_list(before_request)
|
|
173
|
+
@after_response = as_hook_list(after_response)
|
|
174
|
+
@headers = headers ? headers.dup : {}
|
|
175
|
+
@user_agent = user_agent || "crawlora-ruby-sdk/#{VERSION}"
|
|
176
|
+
@transport = transport || DefaultTransport.new
|
|
177
|
+
|
|
178
|
+
@groups = {}
|
|
179
|
+
GROUPS.each do |group_name, operations|
|
|
180
|
+
@groups[group_name] = OperationGroup.new(self, operations)
|
|
181
|
+
define_singleton_method(group_name) { @groups[group_name] }
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# Release pooled keep-alive connections, if the transport supports it.
|
|
186
|
+
def close
|
|
187
|
+
@transport.close if @transport.respond_to?(:close)
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
def operation(operation_id, params = {}, **options)
|
|
191
|
+
request(operation_id, params, **options)
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def request(operation_id, params = {}, response_type: "auto", timeout: nil, headers: nil,
|
|
195
|
+
retries: nil, retry_predicate: nil)
|
|
196
|
+
operation = OPERATIONS[operation_id]
|
|
197
|
+
raise ArgumentError, "unknown Crawlora operation: #{operation_id}" if operation.nil?
|
|
198
|
+
|
|
199
|
+
response_type = validate_response_type(response_type)
|
|
200
|
+
log(event: "request", operation: operation_id)
|
|
201
|
+
max_retries = retries.nil? ? @retries : [0, retries.to_i].max
|
|
202
|
+
idempotency_key =
|
|
203
|
+
@idempotency_keys && %w[POST PATCH].include?(operation["method"]) ? SecureRandom.hex(16) : nil
|
|
204
|
+
|
|
205
|
+
attempt = 0
|
|
206
|
+
loop do
|
|
207
|
+
return send_request(operation, stringify_keys(params), response_type: response_type,
|
|
208
|
+
timeout: timeout, headers: headers, idempotency_key: idempotency_key)
|
|
209
|
+
rescue Error => e
|
|
210
|
+
retryable = retry_predicate ? retry_predicate.call(e.status, e) : retryable?(e.status, e)
|
|
211
|
+
raise if attempt >= max_retries || !retryable
|
|
212
|
+
|
|
213
|
+
attempt += 1
|
|
214
|
+
delay = compute_retry_delay(attempt, e.headers)
|
|
215
|
+
log(event: "retry", operation: operation_id, attempt: attempt, status: e.status, delay: delay)
|
|
216
|
+
@on_retry&.call(attempt, e, delay)
|
|
217
|
+
sleep(delay) if delay.positive?
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
# Yield successive pages of a paginated operation.
|
|
222
|
+
#
|
|
223
|
+
# Numeric mode (default) advances the +page+/+offset+ query parameter and
|
|
224
|
+
# stops on an empty page. Cursor mode (pass both +cursor_param+ and a
|
|
225
|
+
# +next_cursor+ extractor) sends the cursor parameter and stops when
|
|
226
|
+
# +next_cursor+ returns a falsy value.
|
|
227
|
+
def paginate(operation_id, params = {}, page_param: nil, cursor_param: nil, next_cursor: nil,
|
|
228
|
+
start: nil, step: 1, max_pages: nil, response_type: "auto", timeout: nil, headers: nil)
|
|
229
|
+
unless block_given?
|
|
230
|
+
return enum_for(:paginate, operation_id, params, page_param: page_param, cursor_param: cursor_param,
|
|
231
|
+
next_cursor: next_cursor, start: start, step: step, max_pages: max_pages,
|
|
232
|
+
response_type: response_type, timeout: timeout, headers: headers)
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
operation = OPERATIONS[operation_id]
|
|
236
|
+
raise ArgumentError, "unknown Crawlora operation: #{operation_id}" if operation.nil?
|
|
237
|
+
|
|
238
|
+
base_params = stringify_keys(params)
|
|
239
|
+
|
|
240
|
+
if cursor_param || next_cursor
|
|
241
|
+
raise ArgumentError, "cursor pagination requires both cursor_param and next_cursor" unless cursor_param && next_cursor
|
|
242
|
+
|
|
243
|
+
query_names = (operation["queryParams"] || []).map { |p| p["name"] }
|
|
244
|
+
unless query_names.include?(cursor_param)
|
|
245
|
+
raise ArgumentError, "cursor_param #{cursor_param.inspect} is not a query parameter of operation #{operation_id}"
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
cursor = start
|
|
249
|
+
fetched = 0
|
|
250
|
+
while max_pages.nil? || fetched < max_pages
|
|
251
|
+
page_params = base_params.dup
|
|
252
|
+
page_params[cursor_param] = cursor unless cursor.nil?
|
|
253
|
+
response = request(operation_id, page_params, response_type: response_type, timeout: timeout, headers: headers)
|
|
254
|
+
yield response
|
|
255
|
+
fetched += 1
|
|
256
|
+
cursor = next_cursor.call(response)
|
|
257
|
+
break unless cursor && !(cursor.respond_to?(:empty?) && cursor.empty?)
|
|
258
|
+
end
|
|
259
|
+
return
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
page_param ||= Pagination.detect_page_param(operation)
|
|
263
|
+
raise ArgumentError, "operation #{operation_id} has no page or offset query parameter to paginate" unless page_param
|
|
264
|
+
|
|
265
|
+
page_value = start.nil? ? Pagination.default_start(page_param) : start
|
|
266
|
+
fetched = 0
|
|
267
|
+
while max_pages.nil? || fetched < max_pages
|
|
268
|
+
page_params = base_params.merge(page_param => page_value)
|
|
269
|
+
response = request(operation_id, page_params, response_type: response_type, timeout: timeout, headers: headers)
|
|
270
|
+
yield response
|
|
271
|
+
fetched += 1
|
|
272
|
+
break if Pagination.page_empty?(response)
|
|
273
|
+
|
|
274
|
+
page_value += step
|
|
275
|
+
end
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
# Yield individual items across pages. +items+ extracts the list from a page
|
|
279
|
+
# (default: the Crawlora +data+ array).
|
|
280
|
+
def paginate_items(operation_id, params = {}, items: nil, **options, &block)
|
|
281
|
+
return enum_for(:paginate_items, operation_id, params, items: items, **options) unless block_given?
|
|
282
|
+
|
|
283
|
+
extract = items || Pagination.method(:default_items)
|
|
284
|
+
paginate(operation_id, params, **options) do |page|
|
|
285
|
+
extract.call(page).each(&block)
|
|
286
|
+
end
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
private
|
|
290
|
+
|
|
291
|
+
def send_request(operation, params, response_type:, timeout:, headers:, idempotency_key: nil)
|
|
292
|
+
url, body, body_headers = build_request(@base_url, operation, params)
|
|
293
|
+
request_headers = merge_headers(
|
|
294
|
+
@headers,
|
|
295
|
+
auth_headers(operation["security"] || [], @api_key, @jwt_token),
|
|
296
|
+
@user_agent.empty? ? {} : { "User-Agent" => @user_agent },
|
|
297
|
+
body_headers,
|
|
298
|
+
headers || {}
|
|
299
|
+
)
|
|
300
|
+
req_id =
|
|
301
|
+
if @request_id
|
|
302
|
+
ensure_request_id(request_headers)
|
|
303
|
+
else
|
|
304
|
+
v = header_value(request_headers, "x-request-id")
|
|
305
|
+
v.empty? ? nil : v
|
|
306
|
+
end
|
|
307
|
+
request_headers["Idempotency-Key"] = idempotency_key if idempotency_key && header_value(request_headers, "idempotency-key").empty?
|
|
308
|
+
unless @before_request.empty?
|
|
309
|
+
ctx = { operation: operation["id"], method: operation["method"], url: url, headers: request_headers }
|
|
310
|
+
@before_request.each { |hook| hook.call(ctx) }
|
|
311
|
+
url = ctx[:url]
|
|
312
|
+
request_headers = ctx[:headers]
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
request_timeout = timeout.nil? ? @timeout : timeout
|
|
316
|
+
begin
|
|
317
|
+
response =
|
|
318
|
+
if @rate_limiter
|
|
319
|
+
@rate_limiter.run do
|
|
320
|
+
@transport.call(method: operation["method"], url: url, headers: request_headers, body: body, timeout: request_timeout)
|
|
321
|
+
end
|
|
322
|
+
else
|
|
323
|
+
@transport.call(method: operation["method"], url: url, headers: request_headers, body: body, timeout: request_timeout)
|
|
324
|
+
end
|
|
325
|
+
rescue StandardError => e
|
|
326
|
+
message = timeout_error?(e) ? "Crawlora request timed out" : "Crawlora transport error"
|
|
327
|
+
raise NetworkError.new(message, request_id: req_id, cause: e)
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
raw_body = response.body.to_s
|
|
331
|
+
is_error = response.status < 200 || response.status >= 300
|
|
332
|
+
return StringIO.new(response.body.to_s) if response_type == "stream" && !is_error
|
|
333
|
+
|
|
334
|
+
parse_mode = response_type == "stream" ? "auto" : response_type
|
|
335
|
+
begin
|
|
336
|
+
parsed = parse_response(response.body.to_s, header_value(response.headers, "content-type"), parse_mode)
|
|
337
|
+
rescue JSON::ParserError => e
|
|
338
|
+
raise Error.new("Crawlora JSON parse error", status: response.status, raw_body: raw_body,
|
|
339
|
+
headers: response.headers, request_id: req_id, cause: e)
|
|
340
|
+
end
|
|
341
|
+
|
|
342
|
+
if is_error
|
|
343
|
+
code = parsed.is_a?(Hash) ? parsed["code"] : nil
|
|
344
|
+
message = parsed.is_a?(Hash) && parsed["msg"] && !parsed["msg"].to_s.empty? ? parsed["msg"] : "HTTP #{response.status}"
|
|
345
|
+
raise Crawlora.error_class_for(response.status).new(
|
|
346
|
+
message, status: response.status, code: code, body: parsed,
|
|
347
|
+
raw_body: raw_body, headers: response.headers, request_id: req_id
|
|
348
|
+
)
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
@after_response.each do |hook|
|
|
352
|
+
result = hook.call(operation["id"], response.status, response.headers, parsed)
|
|
353
|
+
parsed = result unless result.nil?
|
|
354
|
+
end
|
|
355
|
+
parsed
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
def retryable?(status, exc)
|
|
359
|
+
return @retry_predicate.call(status, exc) ? true : false if @retry_predicate
|
|
360
|
+
return status.zero? || @retry_statuses.include?(status) if @retry_statuses
|
|
361
|
+
|
|
362
|
+
status.zero? || DEFAULT_RETRY_STATUSES.include?(status) || status >= 500
|
|
363
|
+
end
|
|
364
|
+
|
|
365
|
+
def compute_retry_delay(attempt, headers)
|
|
366
|
+
retry_after = retry_after_delay(headers, @max_retry_delay)
|
|
367
|
+
return retry_after if retry_after
|
|
368
|
+
return 0.0 if @retry_delay <= 0
|
|
369
|
+
|
|
370
|
+
delay = @retry_delay * (2**[0, attempt - 1].max)
|
|
371
|
+
delay + (rand * (@retry_delay / 2))
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
def log(event)
|
|
375
|
+
@logger&.call(event)
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
def as_hook_list(value)
|
|
379
|
+
return [] if value.nil?
|
|
380
|
+
return [value] if value.respond_to?(:call)
|
|
381
|
+
|
|
382
|
+
value.to_a
|
|
383
|
+
end
|
|
384
|
+
|
|
385
|
+
def stringify_keys(params)
|
|
386
|
+
(params || {}).each_with_object({}) { |(k, v), out| out[k.to_s] = v }
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
def build_request(base_url, operation, params)
|
|
390
|
+
validate_required_params(operation, params)
|
|
391
|
+
validate_enum_params(operation, params)
|
|
392
|
+
|
|
393
|
+
path = operation["path"].dup
|
|
394
|
+
(operation["pathParams"] || []).each do |name|
|
|
395
|
+
value = params[name]
|
|
396
|
+
raise ArgumentError, "missing required path parameter: #{name}" if value.nil? || value == ""
|
|
397
|
+
|
|
398
|
+
path = path.gsub("{#{name}}", url_escape(value))
|
|
399
|
+
end
|
|
400
|
+
|
|
401
|
+
query = []
|
|
402
|
+
(operation["queryParams"] || []).each do |parameter|
|
|
403
|
+
name = parameter["name"]
|
|
404
|
+
value = params[name]
|
|
405
|
+
next if value.nil? || value == ""
|
|
406
|
+
|
|
407
|
+
if value.is_a?(Array)
|
|
408
|
+
value.each { |item| query << [name, stringify_param(item)] }
|
|
409
|
+
else
|
|
410
|
+
query << [name, stringify_param(value)]
|
|
411
|
+
end
|
|
412
|
+
end
|
|
413
|
+
url = base_url + path
|
|
414
|
+
url += "?#{URI.encode_www_form(query)}" unless query.empty?
|
|
415
|
+
|
|
416
|
+
return [url, *multipart_body(operation["formParams"], params)] if operation["formParams"] && !operation["formParams"].empty?
|
|
417
|
+
|
|
418
|
+
body_param = operation["bodyParam"]
|
|
419
|
+
if body_param
|
|
420
|
+
value = params.fetch(body_param, params["body"])
|
|
421
|
+
return [url, JSON.generate(value), { "content-type" => "application/json" }] unless value.nil?
|
|
422
|
+
end
|
|
423
|
+
|
|
424
|
+
[url, nil, {}]
|
|
425
|
+
end
|
|
426
|
+
|
|
427
|
+
def validate_required_params(operation, params)
|
|
428
|
+
(operation["pathParams"] || []).each do |name|
|
|
429
|
+
raise ArgumentError, "missing required path parameter: #{name}" if missing?(params[name])
|
|
430
|
+
end
|
|
431
|
+
%w[queryParams formParams].each do |location|
|
|
432
|
+
(operation[location] || []).each do |parameter|
|
|
433
|
+
next unless parameter["required"] && missing?(params[parameter["name"]])
|
|
434
|
+
|
|
435
|
+
raise ArgumentError, "missing required #{parameter["in"] || "request"} parameter: #{parameter["name"]}"
|
|
436
|
+
end
|
|
437
|
+
end
|
|
438
|
+
return unless operation["bodyRequired"]
|
|
439
|
+
|
|
440
|
+
body_param = operation["bodyParam"]
|
|
441
|
+
return unless missing?(params[body_param]) && missing?(params["body"])
|
|
442
|
+
|
|
443
|
+
raise ArgumentError, "missing required body parameter: #{body_param}"
|
|
444
|
+
end
|
|
445
|
+
|
|
446
|
+
def validate_enum_params(operation, params)
|
|
447
|
+
%w[queryParams formParams].each do |location|
|
|
448
|
+
(operation[location] || []).each do |parameter|
|
|
449
|
+
enum_values = parameter["enum"] || []
|
|
450
|
+
value = params[parameter["name"]]
|
|
451
|
+
next if enum_values.empty? || missing?(value)
|
|
452
|
+
|
|
453
|
+
values = value.is_a?(Array) ? value : [value]
|
|
454
|
+
values.each do |item|
|
|
455
|
+
next if enum_values.include?(stringify_param(item))
|
|
456
|
+
|
|
457
|
+
location_name = parameter["in"] || "request"
|
|
458
|
+
raise ArgumentError, "invalid #{location_name} parameter #{parameter["name"]}: expected one of #{enum_values.join(", ")}"
|
|
459
|
+
end
|
|
460
|
+
end
|
|
461
|
+
end
|
|
462
|
+
end
|
|
463
|
+
|
|
464
|
+
def missing?(value)
|
|
465
|
+
value.nil? || value == "" || (value.is_a?(Array) && value.empty?)
|
|
466
|
+
end
|
|
467
|
+
|
|
468
|
+
def multipart_body(form_params, params)
|
|
469
|
+
boundary = "crawlora-#{SecureRandom.hex(16)}"
|
|
470
|
+
chunks = +""
|
|
471
|
+
form_params.each do |parameter|
|
|
472
|
+
name = parameter["name"]
|
|
473
|
+
next unless params.key?(name) && !params[name].nil?
|
|
474
|
+
|
|
475
|
+
value = params[name]
|
|
476
|
+
chunks << "--#{boundary}\r\n"
|
|
477
|
+
if parameter["type"] == "file"
|
|
478
|
+
filename, data = read_file_value(value)
|
|
479
|
+
chunks << %(Content-Disposition: form-data; name="#{name}"; filename="#{filename}"\r\n)
|
|
480
|
+
chunks << "Content-Type: application/octet-stream\r\n\r\n"
|
|
481
|
+
chunks << data
|
|
482
|
+
chunks << "\r\n"
|
|
483
|
+
else
|
|
484
|
+
chunks << %(Content-Disposition: form-data; name="#{name}"\r\n\r\n#{value}\r\n)
|
|
485
|
+
end
|
|
486
|
+
end
|
|
487
|
+
chunks << "--#{boundary}--\r\n"
|
|
488
|
+
[chunks, { "content-type" => "multipart/form-data; boundary=#{boundary}" }]
|
|
489
|
+
end
|
|
490
|
+
|
|
491
|
+
def read_file_value(value)
|
|
492
|
+
return ["upload.bin", value] if value.is_a?(String) && !File.exist?(value)
|
|
493
|
+
return [File.basename(value), File.binread(value)] if value.is_a?(String)
|
|
494
|
+
return [File.basename(value.path), value.read] if value.respond_to?(:read) && value.respond_to?(:path)
|
|
495
|
+
|
|
496
|
+
["upload.bin", value.read]
|
|
497
|
+
end
|
|
498
|
+
|
|
499
|
+
def auth_headers(security, api_key, jwt_token)
|
|
500
|
+
headers = {}
|
|
501
|
+
headers["x-api-key"] = api_key if security.include?("ApiKeyAuth") && !api_key.empty?
|
|
502
|
+
if security.include?("JWTAuth") && !jwt_token.empty?
|
|
503
|
+
prefixed = jwt_token.downcase.start_with?("token ", "bearer ")
|
|
504
|
+
headers["Authorization"] = prefixed ? jwt_token : "Token #{jwt_token}"
|
|
505
|
+
end
|
|
506
|
+
headers
|
|
507
|
+
end
|
|
508
|
+
|
|
509
|
+
def merge_headers(*sources)
|
|
510
|
+
headers = {}
|
|
511
|
+
names = {}
|
|
512
|
+
sources.each do |source|
|
|
513
|
+
source.each do |name, value|
|
|
514
|
+
lower = name.downcase
|
|
515
|
+
existing = names[lower]
|
|
516
|
+
headers.delete(existing) if existing && existing != name
|
|
517
|
+
headers[name] = value.to_s
|
|
518
|
+
names[lower] = name
|
|
519
|
+
end
|
|
520
|
+
end
|
|
521
|
+
headers
|
|
522
|
+
end
|
|
523
|
+
|
|
524
|
+
def validate_response_type(response_type)
|
|
525
|
+
return response_type if RESPONSE_TYPES.include?(response_type)
|
|
526
|
+
|
|
527
|
+
raise ArgumentError, "invalid response_type: expected one of #{RESPONSE_TYPES.join(", ")}"
|
|
528
|
+
end
|
|
529
|
+
|
|
530
|
+
def parse_response(body, content_type, response_type)
|
|
531
|
+
return body if response_type == "text"
|
|
532
|
+
|
|
533
|
+
if response_type == "json" || content_type.downcase.include?("application/json")
|
|
534
|
+
return body.empty? ? nil : JSON.parse(body)
|
|
535
|
+
end
|
|
536
|
+
|
|
537
|
+
body
|
|
538
|
+
end
|
|
539
|
+
|
|
540
|
+
def stringify_param(value)
|
|
541
|
+
return value ? "true" : "false" if [true, false].include?(value)
|
|
542
|
+
|
|
543
|
+
value.to_s
|
|
544
|
+
end
|
|
545
|
+
|
|
546
|
+
def url_escape(value)
|
|
547
|
+
CGI.escape(value.to_s).gsub("+", "%20")
|
|
548
|
+
end
|
|
549
|
+
|
|
550
|
+
def ensure_request_id(headers)
|
|
551
|
+
existing = header_value(headers, "x-request-id")
|
|
552
|
+
return existing unless existing.empty?
|
|
553
|
+
|
|
554
|
+
request_id = SecureRandom.hex(16)
|
|
555
|
+
headers["x-request-id"] = request_id
|
|
556
|
+
request_id
|
|
557
|
+
end
|
|
558
|
+
|
|
559
|
+
def retry_after_delay(headers, cap)
|
|
560
|
+
value = header_value(headers, "retry-after")
|
|
561
|
+
return nil if value.empty?
|
|
562
|
+
|
|
563
|
+
seconds = Float(value, exception: false)
|
|
564
|
+
return [seconds, cap].min if seconds&.positive?
|
|
565
|
+
|
|
566
|
+
begin
|
|
567
|
+
delay = Time.httpdate(value).to_f - Time.now.to_f
|
|
568
|
+
rescue ArgumentError
|
|
569
|
+
return nil
|
|
570
|
+
end
|
|
571
|
+
delay.positive? ? [delay, cap].min : nil
|
|
572
|
+
end
|
|
573
|
+
|
|
574
|
+
def header_value(headers, name)
|
|
575
|
+
headers.each { |key, value| return value.to_s if key.downcase == name.downcase }
|
|
576
|
+
""
|
|
577
|
+
end
|
|
578
|
+
|
|
579
|
+
def timeout_error?(exc)
|
|
580
|
+
return true if exc.is_a?(Net::OpenTimeout) || exc.is_a?(Net::ReadTimeout) || exc.is_a?(Timeout::Error)
|
|
581
|
+
|
|
582
|
+
exc.message.to_s.downcase.include?("timed out")
|
|
583
|
+
end
|
|
584
|
+
end
|
|
585
|
+
|
|
586
|
+
# Dispatches +client.bing.search(...)+ style calls to the underlying
|
|
587
|
+
# operation id, validating that supplied keyword params are accepted.
|
|
588
|
+
class OperationGroup
|
|
589
|
+
REQUEST_OPTIONS = %i[_response_type _timeout _headers].freeze
|
|
590
|
+
|
|
591
|
+
def initialize(client, operations)
|
|
592
|
+
@client = client
|
|
593
|
+
@operations = operations
|
|
594
|
+
end
|
|
595
|
+
|
|
596
|
+
def respond_to_missing?(name, include_private = false)
|
|
597
|
+
@operations.key?(name.to_s) || super
|
|
598
|
+
end
|
|
599
|
+
|
|
600
|
+
def method_missing(name, **params)
|
|
601
|
+
operation_id = @operations[name.to_s]
|
|
602
|
+
return super if operation_id.nil?
|
|
603
|
+
|
|
604
|
+
response_type = params.delete(:_response_type) || "auto"
|
|
605
|
+
timeout = params.delete(:_timeout)
|
|
606
|
+
headers = params.delete(:_headers)
|
|
607
|
+
allowed = allowed_params(operation_id)
|
|
608
|
+
unknown = params.keys.map(&:to_s) - allowed
|
|
609
|
+
raise ArgumentError, "unexpected parameter(s) for #{operation_id}: #{unknown.sort.join(", ")}" unless unknown.empty?
|
|
610
|
+
|
|
611
|
+
@client.request(operation_id, params, response_type: response_type, timeout: timeout, headers: headers)
|
|
612
|
+
end
|
|
613
|
+
|
|
614
|
+
private
|
|
615
|
+
|
|
616
|
+
def allowed_params(operation_id)
|
|
617
|
+
operation = OPERATIONS[operation_id] || {}
|
|
618
|
+
allowed = (operation["pathParams"] || []).dup
|
|
619
|
+
allowed += (operation["queryParams"] || []).map { |p| p["name"] }
|
|
620
|
+
allowed += (operation["formParams"] || []).map { |p| p["name"] }
|
|
621
|
+
allowed << operation["bodyParam"] if operation["bodyParam"]
|
|
622
|
+
allowed << "body"
|
|
623
|
+
allowed
|
|
624
|
+
end
|
|
625
|
+
end
|
|
626
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Crawlora
|
|
4
|
+
# Base class for every error raised by the SDK. Carries the HTTP status, the
|
|
5
|
+
# parsed API `code`/body, the raw response text, response headers, and the
|
|
6
|
+
# request id (when request-id tracking is enabled).
|
|
7
|
+
class Error < StandardError
|
|
8
|
+
attr_reader :status, :code, :body, :raw_body, :headers, :request_id, :cause
|
|
9
|
+
|
|
10
|
+
def initialize(message, status: 0, code: nil, body: nil, raw_body: "", headers: nil, request_id: nil, cause: nil)
|
|
11
|
+
super(message)
|
|
12
|
+
@status = status
|
|
13
|
+
@code = code
|
|
14
|
+
@body = body
|
|
15
|
+
@raw_body = raw_body
|
|
16
|
+
@headers = headers ? headers.dup : {}
|
|
17
|
+
@request_id = request_id
|
|
18
|
+
@cause = cause
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Raised for 4xx API responses: the request was rejected by the API.
|
|
23
|
+
class ClientError < Error; end
|
|
24
|
+
|
|
25
|
+
# Raised for 5xx API responses: the API failed to handle a valid request.
|
|
26
|
+
class ServerError < Error; end
|
|
27
|
+
|
|
28
|
+
# Raised for transport failures and timeouts before a response arrived.
|
|
29
|
+
class NetworkError < Error; end
|
|
30
|
+
|
|
31
|
+
# Maps an HTTP status to the matching error class.
|
|
32
|
+
def self.error_class_for(status)
|
|
33
|
+
return ClientError if status >= 400 && status < 500
|
|
34
|
+
return ServerError if status >= 500
|
|
35
|
+
|
|
36
|
+
Error
|
|
37
|
+
end
|
|
38
|
+
end
|