crawlora 1.5.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
crawlora/client.py ADDED
@@ -0,0 +1,671 @@
1
+ from __future__ import annotations
2
+
3
+ import io
4
+ import json
5
+ import mimetypes
6
+ import os
7
+ import random
8
+ import socket
9
+ import threading
10
+ import time
11
+ import uuid
12
+ from dataclasses import dataclass
13
+ from typing import Any, Callable, Iterable, Mapping, Literal
14
+ from urllib.error import HTTPError, URLError
15
+ from urllib.parse import urlencode, quote
16
+ from urllib.request import Request, urlopen
17
+
18
+ from ._pagination import default_items, default_start, detect_page_param, page_is_empty
19
+ from ._transport_sync import KeepAliveTransport
20
+ from .operations import GROUPS, OPERATIONS
21
+
22
+ DEFAULT_BASE_URL = "https://api.crawlora.net/api/v1"
23
+ VERSION = "1.5.0-sdk.1"
24
+ DEFAULT_USER_AGENT = f"crawlora-python-sdk/{VERSION}"
25
+ DEFAULT_MAX_RETRY_DELAY = 30.0
26
+ DEFAULT_RETRY_STATUSES = (408, 409, 425, 429)
27
+ ResponseType = Literal["auto", "json", "text", "stream"]
28
+ RetryPredicate = Callable[[int, "BaseException | None"], bool]
29
+ RetryHook = Callable[[int, "BaseException", float], None]
30
+ Logger = Callable[[Mapping[str, Any]], None]
31
+ # before_request receives a mutable context dict {operation, method, url, headers};
32
+ # mutating "headers"/"url" rewrites the outgoing request. after_response receives
33
+ # (operation_id, status, headers, body) and may return a replacement body.
34
+ BeforeRequest = Callable[[dict], None]
35
+ AfterResponse = Callable[[str, int, Mapping[str, str], Any], Any]
36
+
37
+
38
+ def _as_hook_list(value: Any) -> list:
39
+ if value is None:
40
+ return []
41
+ if callable(value):
42
+ return [value]
43
+ return list(value)
44
+
45
+
46
+ def _run_before_request(hooks: list, ctx: dict) -> None:
47
+ for hook in hooks:
48
+ hook(ctx)
49
+
50
+
51
+ def _run_after_response(hooks: list, operation_id: str, status: int, headers: Mapping[str, str], body: Any) -> Any:
52
+ for hook in hooks:
53
+ result = hook(operation_id, status, headers, body)
54
+ if result is not None:
55
+ body = result
56
+ return body
57
+
58
+
59
+ class CrawloraError(Exception):
60
+ def __init__(
61
+ self,
62
+ message: str,
63
+ *,
64
+ status: int = 0,
65
+ code: int | None = None,
66
+ body: Any = None,
67
+ raw_body: str = "",
68
+ headers: Mapping[str, str] | None = None,
69
+ request_id: str | None = None,
70
+ cause: BaseException | None = None,
71
+ ):
72
+ super().__init__(message)
73
+ self.status = status
74
+ self.code = code
75
+ self.body = body
76
+ self.raw_body = raw_body
77
+ self.headers = dict(headers or {})
78
+ self.request_id = request_id
79
+ self.__cause__ = cause
80
+
81
+
82
+ class CrawloraClientError(CrawloraError):
83
+ """Raised for 4xx API responses: the request was rejected by the API."""
84
+
85
+
86
+ class CrawloraServerError(CrawloraError):
87
+ """Raised for 5xx API responses: the API failed to handle a valid request."""
88
+
89
+
90
+ class CrawloraNetworkError(CrawloraError):
91
+ """Raised for transport failures and timeouts before a response arrived."""
92
+
93
+
94
+ def _api_error_class(status: int) -> type[CrawloraError]:
95
+ if 400 <= status < 500:
96
+ return CrawloraClientError
97
+ if status >= 500:
98
+ return CrawloraServerError
99
+ return CrawloraError
100
+
101
+
102
+ @dataclass(frozen=True)
103
+ class _Response:
104
+ status: int
105
+ headers: Mapping[str, str]
106
+ body: bytes
107
+
108
+
109
+ class _RateLimiter:
110
+ """Optional client-side throttle: caps concurrency and spaces requests to a
111
+ maximum rate (requests per second)."""
112
+
113
+ def __init__(self, rps: float | None, concurrency: int | None) -> None:
114
+ self._interval = (1.0 / rps) if rps and rps > 0 else 0.0
115
+ self._sem = threading.Semaphore(concurrency) if concurrency and concurrency > 0 else None
116
+ self._lock = threading.Lock()
117
+ self._next = 0.0
118
+
119
+ def __enter__(self) -> "_RateLimiter":
120
+ if self._sem is not None:
121
+ self._sem.acquire()
122
+ if self._interval:
123
+ with self._lock:
124
+ now = time.monotonic()
125
+ wait = max(0.0, self._next - now)
126
+ self._next = max(now, self._next) + self._interval
127
+ if wait > 0:
128
+ time.sleep(wait)
129
+ return self
130
+
131
+ def __exit__(self, *_exc: Any) -> None:
132
+ if self._sem is not None:
133
+ self._sem.release()
134
+
135
+
136
+ class CrawloraClient:
137
+ """Synchronous client for the Crawlora API.
138
+
139
+ Call operations via grouped helpers (``client.bing.search(q="...")``) or
140
+ dynamically (``client.request("bing-search", {"q": "..."})``). Supports
141
+ configurable retries, an ``on_retry`` hook, opt-in ``request_id`` and
142
+ ``idempotency_keys``, ``before_request``/``after_response`` middleware,
143
+ client-side ``rate_limit``/``max_concurrency``, pagination
144
+ (``paginate``/``paginate_items``), and ``response_type="stream"``. Uses a
145
+ keep-alive connection pool by default; use it as a context manager (or call
146
+ ``close()``) to release pooled connections. See ``AsyncCrawloraClient`` for
147
+ an asyncio client.
148
+ """
149
+
150
+ def __init__(
151
+ self,
152
+ *,
153
+ api_key: str | None = None,
154
+ jwt_token: str | None = None,
155
+ base_url: str | None = None,
156
+ timeout: float = 30,
157
+ retries: int = 0,
158
+ retry_delay: float = 0.25,
159
+ max_retry_delay: float = DEFAULT_MAX_RETRY_DELAY,
160
+ retry_statuses: Iterable[int] | None = None,
161
+ retry_predicate: RetryPredicate | None = None,
162
+ on_retry: RetryHook | None = None,
163
+ request_id: bool = False,
164
+ idempotency_keys: bool = False,
165
+ rate_limit: float | None = None,
166
+ max_concurrency: int | None = None,
167
+ logger: Logger | None = None,
168
+ before_request: BeforeRequest | Iterable[BeforeRequest] | None = None,
169
+ after_response: AfterResponse | Iterable[AfterResponse] | None = None,
170
+ headers: Mapping[str, str] | None = None,
171
+ user_agent: str | None = DEFAULT_USER_AGENT,
172
+ transport: Callable[[Request, float], _Response] | None = None,
173
+ ) -> None:
174
+ # Precedence: explicit argument > environment variable > default.
175
+ self.api_key = api_key or os.environ.get("CRAWLORA_API_KEY", "")
176
+ self.jwt_token = jwt_token or ""
177
+ self.base_url = (base_url or os.environ.get("CRAWLORA_BASE_URL") or DEFAULT_BASE_URL).rstrip("/")
178
+ self.timeout = timeout
179
+ self.retries = max(0, int(retries))
180
+ self.retry_delay = max(0.0, float(retry_delay))
181
+ self.max_retry_delay = max(0.0, float(max_retry_delay))
182
+ self.retry_statuses = frozenset(retry_statuses) if retry_statuses is not None else None
183
+ self.retry_predicate = retry_predicate
184
+ self.on_retry = on_retry
185
+ self.request_id = request_id
186
+ self.idempotency_keys = idempotency_keys
187
+ self.rate_limit = rate_limit
188
+ self.max_concurrency = max_concurrency
189
+ self._rate_limiter = _RateLimiter(rate_limit, max_concurrency) if (rate_limit or max_concurrency) else None
190
+ self.logger = logger
191
+ self.before_request = _as_hook_list(before_request)
192
+ self.after_response = _as_hook_list(after_response)
193
+ self.headers = dict(headers or {})
194
+ self.user_agent = user_agent or ""
195
+ # Default to a keep-alive pool (connection reuse); an injected transport
196
+ # (e.g. tests) is used as-is.
197
+ self._transport = transport or KeepAliveTransport()
198
+
199
+ for group_name, operations in GROUPS.items():
200
+ setattr(self, group_name, _OperationGroup(self, operations))
201
+
202
+ def close(self) -> None:
203
+ """Close pooled keep-alive connections, if any."""
204
+ closer = getattr(self._transport, "close", None)
205
+ if callable(closer):
206
+ closer()
207
+
208
+ def __enter__(self) -> "CrawloraClient":
209
+ return self
210
+
211
+ def __exit__(self, *_exc: Any) -> None:
212
+ self.close()
213
+
214
+ def _is_retryable(self, status: int, exc: BaseException | None) -> bool:
215
+ if self.retry_predicate is not None:
216
+ return bool(self.retry_predicate(status, exc))
217
+ if self.retry_statuses is not None:
218
+ # Network failures (status 0) stay retryable unless a predicate decides.
219
+ return status == 0 or status in self.retry_statuses
220
+ return _should_retry(status)
221
+
222
+ def _compute_retry_delay(self, attempt: int, headers: Mapping[str, str]) -> float:
223
+ retry_after = _retry_after_delay(headers, self.max_retry_delay)
224
+ if retry_after is not None:
225
+ return retry_after
226
+ if self.retry_delay <= 0:
227
+ return 0.0
228
+ delay = self.retry_delay * (2 ** max(0, attempt - 1))
229
+ jitter = random.uniform(0, self.retry_delay / 2)
230
+ return delay + jitter
231
+
232
+ def _log(self, event: Mapping[str, Any]) -> None:
233
+ if self.logger is not None:
234
+ self.logger(event)
235
+
236
+ def operation(
237
+ self,
238
+ operation_id: str,
239
+ params: Mapping[str, Any] | None = None,
240
+ *,
241
+ response_type: ResponseType = "auto",
242
+ timeout: float | None = None,
243
+ headers: Mapping[str, str] | None = None,
244
+ retries: int | None = None,
245
+ retry_predicate: RetryPredicate | None = None,
246
+ ) -> Any:
247
+ return self.request(
248
+ operation_id, params, response_type=response_type, timeout=timeout, headers=headers,
249
+ retries=retries, retry_predicate=retry_predicate,
250
+ )
251
+
252
+ def request(
253
+ self,
254
+ operation_id: str,
255
+ params: Mapping[str, Any] | None = None,
256
+ *,
257
+ response_type: ResponseType = "auto",
258
+ timeout: float | None = None,
259
+ headers: Mapping[str, str] | None = None,
260
+ retries: int | None = None,
261
+ retry_predicate: RetryPredicate | None = None,
262
+ ) -> Any:
263
+ operation = OPERATIONS.get(operation_id)
264
+ if operation is None:
265
+ raise ValueError(f"unknown Crawlora operation: {operation_id}")
266
+ response_type = _validate_response_type(response_type)
267
+ self._log({"event": "request", "operation": operation_id})
268
+ max_retries = self.retries if retries is None else max(0, int(retries))
269
+ idempotency_key = uuid.uuid4().hex if self.idempotency_keys and operation["method"] in ("POST", "PATCH") else None
270
+
271
+ attempt = 0
272
+ while True:
273
+ try:
274
+ return self._send(operation, dict(params or {}), response_type=response_type, timeout=timeout, headers=headers, idempotency_key=idempotency_key)
275
+ except CrawloraError as exc:
276
+ retryable = retry_predicate(exc.status, exc) if retry_predicate is not None else self._is_retryable(exc.status, exc)
277
+ if attempt >= max_retries or not retryable:
278
+ raise
279
+ attempt += 1
280
+ delay = self._compute_retry_delay(attempt, exc.headers)
281
+ self._log({"event": "retry", "operation": operation_id, "attempt": attempt, "status": exc.status, "delay": delay})
282
+ if self.on_retry is not None:
283
+ self.on_retry(attempt, exc, delay)
284
+ if delay > 0:
285
+ time.sleep(delay)
286
+
287
+ def _send(
288
+ self,
289
+ operation: Mapping[str, Any],
290
+ params: dict[str, Any],
291
+ *,
292
+ response_type: ResponseType,
293
+ timeout: float | None,
294
+ headers: Mapping[str, str] | None,
295
+ idempotency_key: str | None = None,
296
+ ) -> Any:
297
+ url, body, body_headers = _build_request(self.base_url, operation, params)
298
+ request_headers = _merge_headers(
299
+ self.headers,
300
+ _auth_headers(operation.get("security", []), self.api_key, self.jwt_token),
301
+ {"User-Agent": self.user_agent} if self.user_agent else {},
302
+ body_headers,
303
+ headers or {},
304
+ )
305
+ req_id = _ensure_request_id(request_headers) if self.request_id else _header_value(request_headers, "x-request-id") or None
306
+ if idempotency_key and not _header_value(request_headers, "idempotency-key"):
307
+ request_headers["Idempotency-Key"] = idempotency_key
308
+ if self.before_request:
309
+ ctx = {"operation": operation.get("id"), "method": operation["method"], "url": url, "headers": request_headers}
310
+ _run_before_request(self.before_request, ctx)
311
+ url, request_headers = ctx["url"], ctx["headers"]
312
+ request = Request(url, data=body, headers=request_headers, method=operation["method"])
313
+ request_timeout = timeout if timeout is not None else self.timeout
314
+ try:
315
+ if self._rate_limiter is not None:
316
+ with self._rate_limiter:
317
+ response = self._transport(request, request_timeout)
318
+ else:
319
+ response = self._transport(request, request_timeout)
320
+ except Exception as exc:
321
+ message = "Crawlora request timed out" if _is_timeout_error(exc) else "Crawlora transport error"
322
+ raise CrawloraNetworkError(message, request_id=req_id, cause=exc) from exc
323
+ raw_body = response.body.decode(errors="replace")
324
+ is_error = response.status < 200 or response.status >= 300
325
+ if response_type == "stream" and not is_error:
326
+ # Caller reads the file-like body; truly incremental streaming is
327
+ # available on AsyncCrawloraClient (httpx).
328
+ return io.BytesIO(response.body)
329
+ parse_mode = "auto" if response_type == "stream" else response_type
330
+ try:
331
+ parsed = _parse_response(response.body, _header_value(response.headers, "content-type"), parse_mode)
332
+ except json.JSONDecodeError as exc:
333
+ raise CrawloraError(
334
+ "Crawlora JSON parse error",
335
+ status=response.status,
336
+ raw_body=raw_body,
337
+ headers=response.headers,
338
+ request_id=req_id,
339
+ cause=exc,
340
+ ) from exc
341
+ if response.status < 200 or response.status >= 300:
342
+ code = parsed.get("code") if isinstance(parsed, dict) else None
343
+ message = parsed.get("msg") if isinstance(parsed, dict) and parsed.get("msg") else f"HTTP {response.status}"
344
+ error_class = _api_error_class(response.status)
345
+ raise error_class(message, status=response.status, code=code, body=parsed, raw_body=raw_body, headers=response.headers, request_id=req_id)
346
+ if self.after_response:
347
+ parsed = _run_after_response(self.after_response, operation.get("id"), response.status, response.headers, parsed)
348
+ return parsed
349
+
350
+ def paginate(
351
+ self,
352
+ operation_id: str,
353
+ params: Mapping[str, Any] | None = None,
354
+ *,
355
+ page_param: str | None = None,
356
+ cursor_param: str | None = None,
357
+ next_cursor: Callable[[Any], Any] | None = None,
358
+ start: Any = None,
359
+ step: int = 1,
360
+ max_pages: int | None = None,
361
+ response_type: ResponseType = "auto",
362
+ timeout: float | None = None,
363
+ headers: Mapping[str, str] | None = None,
364
+ ):
365
+ """Yield successive pages of a paginated operation.
366
+
367
+ Numeric mode (default) advances the ``page``/``offset`` query parameter
368
+ and stops on an empty page. Cursor mode (pass both ``cursor_param`` and a
369
+ ``next_cursor`` extractor) sends the cursor parameter and stops when
370
+ ``next_cursor`` returns a falsy value.
371
+ """
372
+ operation = OPERATIONS.get(operation_id)
373
+ if operation is None:
374
+ raise ValueError(f"unknown Crawlora operation: {operation_id}")
375
+ base_params = dict(params or {})
376
+
377
+ if cursor_param or next_cursor:
378
+ if not (cursor_param and next_cursor):
379
+ raise ValueError("cursor pagination requires both cursor_param and next_cursor")
380
+ if cursor_param not in {p["name"] for p in operation.get("queryParams", [])}:
381
+ raise ValueError(f"cursor_param {cursor_param!r} is not a query parameter of operation {operation_id}")
382
+ cursor = start
383
+ fetched = 0
384
+ while max_pages is None or fetched < max_pages:
385
+ page_params = dict(base_params)
386
+ if cursor is not None:
387
+ page_params[cursor_param] = cursor
388
+ response = self.request(operation_id, page_params, response_type=response_type, timeout=timeout, headers=headers)
389
+ yield response
390
+ fetched += 1
391
+ cursor = next_cursor(response)
392
+ if not cursor:
393
+ break
394
+ return
395
+
396
+ page_param = page_param or detect_page_param(operation)
397
+ if not page_param:
398
+ raise ValueError(f"operation {operation_id} has no page or offset query parameter to paginate")
399
+ page_value = default_start(page_param) if start is None else start
400
+ fetched = 0
401
+ while max_pages is None or fetched < max_pages:
402
+ page_params = {**base_params, page_param: page_value}
403
+ response = self.request(operation_id, page_params, response_type=response_type, timeout=timeout, headers=headers)
404
+ yield response
405
+ fetched += 1
406
+ if page_is_empty(response):
407
+ break
408
+ page_value += step
409
+
410
+ def paginate_items(
411
+ self,
412
+ operation_id: str,
413
+ params: Mapping[str, Any] | None = None,
414
+ *,
415
+ items: Callable[[Any], Any] | None = None,
416
+ **kwargs: Any,
417
+ ):
418
+ """Yield individual items across pages. ``items`` extracts the list from
419
+ a page (default: the Crawlora ``data`` array)."""
420
+ extract = items or default_items
421
+ for page in self.paginate(operation_id, params, **kwargs):
422
+ for item in extract(page):
423
+ yield item
424
+
425
+ @staticmethod
426
+ def _urlopen_transport(request: Request, timeout: float) -> _Response:
427
+ try:
428
+ with urlopen(request, timeout=timeout) as response:
429
+ return _Response(response.status, dict(response.headers.items()), response.read())
430
+ except HTTPError as exc:
431
+ return _Response(exc.code, dict(exc.headers.items()), exc.read())
432
+ except URLError:
433
+ raise
434
+
435
+
436
+ def _allowed_params(operation_id: str) -> set[str]:
437
+ operation = OPERATIONS.get(operation_id) or {}
438
+ allowed = set(operation.get("pathParams", []))
439
+ allowed |= {p["name"] for p in operation.get("queryParams", [])}
440
+ allowed |= {p["name"] for p in operation.get("formParams", [])}
441
+ if operation.get("bodyParam"):
442
+ allowed.add(operation["bodyParam"])
443
+ allowed.add("body")
444
+ return allowed
445
+
446
+
447
+ _REQUEST_OPTION_KWARGS = ("_response_type", "_timeout", "_headers")
448
+
449
+
450
+ class _OperationGroup:
451
+ def __init__(self, client: CrawloraClient, operations: Mapping[str, str]) -> None:
452
+ self._client = client
453
+ self._operations = operations
454
+
455
+ def __getattr__(self, name: str) -> Callable[..., Any]:
456
+ operation_id = self._operations.get(name)
457
+ if operation_id is None:
458
+ raise AttributeError(name)
459
+ allowed = _allowed_params(operation_id)
460
+
461
+ def call(**params: Any) -> Any:
462
+ response_type = params.pop("_response_type", "auto")
463
+ timeout = params.pop("_timeout", None)
464
+ headers = params.pop("_headers", None)
465
+ unknown = set(params) - allowed
466
+ if unknown:
467
+ raise TypeError(f"unexpected parameter(s) for {operation_id}: {', '.join(sorted(unknown))}")
468
+ return self._client.request(operation_id, params, response_type=response_type, timeout=timeout, headers=headers)
469
+
470
+ return call
471
+
472
+
473
+ def _build_request(base_url: str, operation: Mapping[str, Any], params: dict[str, Any]) -> tuple[str, bytes | None, dict[str, str]]:
474
+ _validate_required_params(operation, params)
475
+ _validate_enum_params(operation, params)
476
+ path = operation["path"]
477
+ for name in operation.get("pathParams", []):
478
+ value = params.get(name)
479
+ if value in (None, ""):
480
+ raise ValueError(f"missing required path parameter: {name}")
481
+ path = path.replace("{" + name + "}", quote(str(value), safe=""))
482
+
483
+ query: list[tuple[str, Any]] = []
484
+ for parameter in operation.get("queryParams", []):
485
+ name = parameter["name"]
486
+ value = params.get(name)
487
+ if value in (None, ""):
488
+ continue
489
+ if isinstance(value, (list, tuple)):
490
+ query.extend((name, _stringify_param(item)) for item in value)
491
+ else:
492
+ query.append((name, _stringify_param(value)))
493
+ url = base_url + path
494
+ if query:
495
+ url += "?" + urlencode(query, doseq=True)
496
+
497
+ if operation.get("formParams"):
498
+ return url, *_multipart_body(operation["formParams"], params)
499
+
500
+ body_param = operation.get("bodyParam")
501
+ if body_param:
502
+ value = params.get(body_param, params.get("body"))
503
+ if value is not None:
504
+ return url, json.dumps(value).encode(), {"content-type": "application/json"}
505
+
506
+ return url, None, {}
507
+
508
+
509
+ def _validate_required_params(operation: Mapping[str, Any], params: Mapping[str, Any]) -> None:
510
+ for name in operation.get("pathParams", []):
511
+ if _is_missing(params.get(name)):
512
+ raise ValueError(f"missing required path parameter: {name}")
513
+ for location in ("queryParams", "formParams"):
514
+ for parameter in operation.get(location, []):
515
+ if parameter.get("required") and _is_missing(params.get(parameter["name"])):
516
+ param_location = parameter.get("in", "request")
517
+ raise ValueError(f"missing required {param_location} parameter: {parameter['name']}")
518
+ if operation.get("bodyRequired"):
519
+ body_param = operation.get("bodyParam")
520
+ if _is_missing(params.get(body_param)) and _is_missing(params.get("body")):
521
+ raise ValueError(f"missing required body parameter: {body_param}")
522
+
523
+
524
+ def _validate_enum_params(operation: Mapping[str, Any], params: Mapping[str, Any]) -> None:
525
+ for location in ("queryParams", "formParams"):
526
+ for parameter in operation.get(location, []):
527
+ enum_values = parameter.get("enum") or []
528
+ value = params.get(parameter["name"])
529
+ if not enum_values or _is_missing(value):
530
+ continue
531
+ values = value if isinstance(value, (list, tuple)) else [value]
532
+ for item in values:
533
+ if _stringify_param(item) not in enum_values:
534
+ param_location = parameter.get("in", "request")
535
+ expected = ", ".join(enum_values)
536
+ raise ValueError(f"invalid {param_location} parameter {parameter['name']}: expected one of {expected}")
537
+
538
+
539
+ def _is_missing(value: Any) -> bool:
540
+ return value is None or value == "" or (isinstance(value, (list, tuple)) and len(value) == 0)
541
+
542
+
543
+ def _multipart_body(form_params: list[Mapping[str, Any]], params: Mapping[str, Any]) -> tuple[bytes, dict[str, str]]:
544
+ boundary = f"crawlora-{uuid.uuid4().hex}"
545
+ chunks: list[bytes] = []
546
+ for parameter in form_params:
547
+ name = parameter["name"]
548
+ if name not in params or params[name] is None:
549
+ continue
550
+ value = params[name]
551
+ chunks.append(f"--{boundary}\r\n".encode())
552
+ if parameter.get("type") == "file":
553
+ filename, data = _read_file_value(value)
554
+ content_type = mimetypes.guess_type(filename)[0] or "application/octet-stream"
555
+ chunks.append(
556
+ f'Content-Disposition: form-data; name="{name}"; filename="{filename}"\r\n'
557
+ f"Content-Type: {content_type}\r\n\r\n".encode()
558
+ )
559
+ chunks.append(data)
560
+ chunks.append(b"\r\n")
561
+ else:
562
+ chunks.append(f'Content-Disposition: form-data; name="{name}"\r\n\r\n{value}\r\n'.encode())
563
+ chunks.append(f"--{boundary}--\r\n".encode())
564
+ return b"".join(chunks), {"content-type": f"multipart/form-data; boundary={boundary}"}
565
+
566
+
567
+ def _read_file_value(value: Any) -> tuple[str, bytes]:
568
+ if isinstance(value, (bytes, bytearray)):
569
+ return "upload.bin", bytes(value)
570
+ if isinstance(value, os.PathLike) or isinstance(value, str):
571
+ path = os.fspath(value)
572
+ with open(path, "rb") as file:
573
+ return os.path.basename(path), file.read()
574
+ name = os.path.basename(getattr(value, "name", "upload.bin"))
575
+ return name, value.read()
576
+
577
+
578
+ def _auth_headers(security: list[str], api_key: str, jwt_token: str) -> dict[str, str]:
579
+ headers: dict[str, str] = {}
580
+ if "ApiKeyAuth" in security and api_key:
581
+ headers["x-api-key"] = api_key
582
+ if "JWTAuth" in security and jwt_token:
583
+ headers["Authorization"] = jwt_token if jwt_token.lower().startswith(("token ", "bearer ")) else f"Token {jwt_token}"
584
+ return headers
585
+
586
+
587
+ def _merge_headers(*sources: Mapping[str, str]) -> dict[str, str]:
588
+ headers: dict[str, str] = {}
589
+ names: dict[str, str] = {}
590
+ for source in sources:
591
+ for name, value in source.items():
592
+ lower = name.lower()
593
+ existing = names.get(lower)
594
+ if existing and existing != name:
595
+ headers.pop(existing, None)
596
+ headers[name] = str(value)
597
+ names[lower] = name
598
+ return headers
599
+
600
+
601
+ def _validate_response_type(response_type: str) -> ResponseType:
602
+ if response_type in ("auto", "json", "text", "stream"):
603
+ return response_type # type: ignore[return-value]
604
+ raise ValueError("invalid response_type: expected one of auto, json, text, stream")
605
+
606
+
607
+ def _parse_response(body: bytes, content_type: str, response_type: str) -> Any:
608
+ if response_type == "text":
609
+ return body.decode()
610
+ if response_type == "json" or "application/json" in content_type.lower():
611
+ return json.loads(body.decode()) if body else None
612
+ return body.decode()
613
+
614
+
615
+ def _stringify_param(value: Any) -> str:
616
+ if isinstance(value, bool):
617
+ return "true" if value else "false"
618
+ return str(value)
619
+
620
+
621
+ def _should_retry(status: int) -> bool:
622
+ return status == 0 or status in DEFAULT_RETRY_STATUSES or status >= 500
623
+
624
+
625
+ def _ensure_request_id(headers: dict[str, str]) -> str:
626
+ existing = _header_value(headers, "x-request-id")
627
+ if existing:
628
+ return existing
629
+ request_id = uuid.uuid4().hex
630
+ headers["x-request-id"] = request_id
631
+ return request_id
632
+
633
+
634
+ def _retry_after_delay(headers: Mapping[str, str], cap: float) -> float | None:
635
+ value = _header_value(headers, "retry-after")
636
+ if not value:
637
+ return None
638
+ try:
639
+ seconds = float(value)
640
+ except ValueError:
641
+ seconds = None
642
+ if seconds is not None and seconds > 0:
643
+ return min(seconds, cap)
644
+ try:
645
+ from email.utils import parsedate_to_datetime
646
+
647
+ target = parsedate_to_datetime(value)
648
+ delay = target.timestamp() - time.time()
649
+ except (TypeError, ValueError, OverflowError):
650
+ return None
651
+ if delay > 0:
652
+ return min(delay, cap)
653
+ return None
654
+
655
+
656
+ def _header_value(headers: Mapping[str, str], name: str) -> str:
657
+ for key, value in headers.items():
658
+ if key.lower() == name.lower():
659
+ return value
660
+ return ""
661
+
662
+
663
+ def _is_timeout_error(exc: BaseException) -> bool:
664
+ if isinstance(exc, (TimeoutError, socket.timeout)):
665
+ return True
666
+ if isinstance(exc, URLError):
667
+ reason = exc.reason
668
+ if isinstance(reason, (TimeoutError, socket.timeout)):
669
+ return True
670
+ return "timed out" in str(reason).lower()
671
+ return "timed out" in str(exc).lower()