ccflow-http 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ccflow_http/__init__.py +3 -0
- ccflow_http/base.py +542 -0
- ccflow_http/tests/test_all.py +5 -0
- ccflow_http/tests/test_model.py +524 -0
- ccflow_http-0.1.0.dist-info/METADATA +74 -0
- ccflow_http-0.1.0.dist-info/RECORD +8 -0
- ccflow_http-0.1.0.dist-info/WHEEL +4 -0
- ccflow_http-0.1.0.dist-info/licenses/LICENSE +201 -0
ccflow_http/__init__.py
ADDED
ccflow_http/base.py
ADDED
|
@@ -0,0 +1,542 @@
|
|
|
1
|
+
from base64 import b64encode
|
|
2
|
+
from csv import DictReader
|
|
3
|
+
from gzip import decompress
|
|
4
|
+
from io import StringIO
|
|
5
|
+
from time import monotonic, sleep
|
|
6
|
+
from typing import Any, Dict, List, Literal, Optional, Tuple, Union
|
|
7
|
+
from urllib.parse import parse_qsl, urlsplit, urlunsplit
|
|
8
|
+
|
|
9
|
+
import httpx
|
|
10
|
+
from ccflow import BaseModel, CallableModel, ContextBase, Flow, GenericResult, PyObjectPath
|
|
11
|
+
from ccflow.utils.retry import RetryPolicy
|
|
12
|
+
from ccflow_etl import ExecutionPolicy
|
|
13
|
+
from jinja2 import Environment
|
|
14
|
+
from pydantic import Field
|
|
15
|
+
|
|
16
|
+
__all__ = (
|
|
17
|
+
"HTTPConfig",
|
|
18
|
+
"HTTPAuth",
|
|
19
|
+
"HTTPContext",
|
|
20
|
+
"HTTPRequestContext",
|
|
21
|
+
"HTTPRequest",
|
|
22
|
+
"HTTPRetryPolicy",
|
|
23
|
+
"HTTPResponseResult",
|
|
24
|
+
"HTTPResult",
|
|
25
|
+
"HTTPModel",
|
|
26
|
+
"redact_mapping",
|
|
27
|
+
"safe_request_dump",
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
ResponseFormat = Literal["json", "text", "bytes", "csv", "gzip"]
|
|
31
|
+
HTTPMethod = Literal["GET", "POST", "PUT", "PATCH", "DELETE", "HEAD"]
|
|
32
|
+
HTTPAuthStrategy = Literal["none", "bearer", "api_key_header", "api_key_query", "basic"]
|
|
33
|
+
HTTPPaginationMode = Literal["next_url", "cursor", "page", "offset"]
|
|
34
|
+
HTTPRetryOutcome = Literal["retry", "failed"]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class HTTPConfig(BaseModel):
|
|
38
|
+
base_url: str = ""
|
|
39
|
+
timeout: float = 30.0
|
|
40
|
+
follow_redirects: bool = True
|
|
41
|
+
headers: Dict[str, str] = Field(default_factory=dict)
|
|
42
|
+
transport: Optional[Any] = None
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class HTTPAuth(BaseModel):
|
|
46
|
+
strategy: HTTPAuthStrategy = "none"
|
|
47
|
+
token: Optional[str] = None
|
|
48
|
+
name: Optional[str] = None
|
|
49
|
+
value: Optional[str] = None
|
|
50
|
+
username: Optional[str] = None
|
|
51
|
+
password: Optional[str] = None
|
|
52
|
+
scheme: str = "Bearer"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class HTTPContext(ContextBase):
|
|
56
|
+
path: Optional[str] = None
|
|
57
|
+
query: Dict[str, Any] = Field(default_factory=dict)
|
|
58
|
+
headers: Dict[str, str] = Field(default_factory=dict)
|
|
59
|
+
template_values: Dict[str, Any] = Field(default_factory=dict)
|
|
60
|
+
json_body: Optional[Any] = None
|
|
61
|
+
content: Optional[Union[bytes, str]] = None
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class HTTPRequestContext(HTTPContext): ...
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class HTTPRequest(BaseModel):
|
|
68
|
+
method: HTTPMethod
|
|
69
|
+
url: str
|
|
70
|
+
params: Dict[str, Any] = Field(default_factory=dict)
|
|
71
|
+
headers: Dict[str, str] = Field(default_factory=dict)
|
|
72
|
+
json_data: Optional[Any] = None
|
|
73
|
+
content: Optional[Union[bytes, str]] = None
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def redact_mapping(values: Dict[str, Any]) -> Dict[str, Any]:
|
|
77
|
+
redacted = {}
|
|
78
|
+
for key, value in values.items():
|
|
79
|
+
normalized_key = key.lower().replace("_", "").replace("-", "")
|
|
80
|
+
if normalized_key in {"apikey", "authorization", "password"} or "token" in normalized_key or "secret" in normalized_key:
|
|
81
|
+
redacted[key] = "***"
|
|
82
|
+
else:
|
|
83
|
+
redacted[key] = value
|
|
84
|
+
return redacted
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def safe_request_dump(request: HTTPRequest) -> Dict[str, Any]:
|
|
88
|
+
request_data = request.model_dump(exclude={"type_"})
|
|
89
|
+
request_data["params"] = redact_mapping(request_data.get("params", {}))
|
|
90
|
+
request_data["headers"] = redact_mapping(request_data.get("headers", {}))
|
|
91
|
+
return request_data
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class HTTPRetryEvent(BaseModel):
|
|
95
|
+
attempt: int
|
|
96
|
+
outcome: HTTPRetryOutcome
|
|
97
|
+
delay_seconds: float = 0.0
|
|
98
|
+
status_code: Optional[int] = None
|
|
99
|
+
exception_type: Optional[str] = None
|
|
100
|
+
category: Optional[str] = None
|
|
101
|
+
message: Optional[str] = None
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class HTTPRetryPolicy(RetryPolicy):
|
|
105
|
+
retry_status_codes: List[int] = Field(default_factory=lambda: [429, 500, 502, 503, 504])
|
|
106
|
+
retry_exceptions: List[PyObjectPath] = Field(
|
|
107
|
+
default_factory=lambda: [PyObjectPath.validate(httpx.TimeoutException), PyObjectPath.validate(httpx.ConnectError)]
|
|
108
|
+
)
|
|
109
|
+
timeout_exception_types: List[str] = Field(
|
|
110
|
+
default_factory=lambda: ["TimeoutError", "TimeoutException", "ConnectTimeout", "ReadTimeout", "WriteTimeout", "PoolTimeout"]
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
def should_retry_status(self, status_code: Optional[int], attempt: int) -> bool:
|
|
114
|
+
return status_code in self.retry_status_codes and attempt < self.max_attempts
|
|
115
|
+
|
|
116
|
+
def should_retry_exception(self, exception: BaseException, attempt: int) -> bool:
|
|
117
|
+
return self._should_retry(exception) and attempt < self.max_attempts
|
|
118
|
+
|
|
119
|
+
def delay_seconds(self, attempt: int, jitter_value: Optional[float] = None) -> float:
|
|
120
|
+
if attempt < 1:
|
|
121
|
+
raise ValueError("attempt must be greater than or equal to 1")
|
|
122
|
+
return self._compute_delay(attempt, jitter_value=jitter_value)
|
|
123
|
+
|
|
124
|
+
def retry_delay_seconds(self, attempt: int, total_wait_seconds: float) -> Optional[float]:
|
|
125
|
+
delay_seconds = self.delay_seconds(attempt)
|
|
126
|
+
if self.max_delay is not None and total_wait_seconds + delay_seconds > self.max_delay:
|
|
127
|
+
return None
|
|
128
|
+
return delay_seconds
|
|
129
|
+
|
|
130
|
+
def exception_category(self, exception: BaseException) -> str:
|
|
131
|
+
exception_names = {type(exception).__name__}
|
|
132
|
+
exception_names.update(base.__name__ for base in type(exception).__mro__)
|
|
133
|
+
if exception_names.intersection(self.timeout_exception_types) or any("Timeout" in name for name in exception_names):
|
|
134
|
+
return "timeout"
|
|
135
|
+
if any("Connect" in name or "Connection" in name for name in exception_names):
|
|
136
|
+
return "connection"
|
|
137
|
+
return "exception"
|
|
138
|
+
|
|
139
|
+
def status_category(self, status_code: Optional[int]) -> str:
|
|
140
|
+
if status_code == 429:
|
|
141
|
+
return "rate_limit"
|
|
142
|
+
if status_code == 408:
|
|
143
|
+
return "timeout"
|
|
144
|
+
if status_code is not None and 500 <= status_code <= 599:
|
|
145
|
+
return "server_error"
|
|
146
|
+
return "status"
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class HTTPResponseResult(GenericResult[Any]):
|
|
150
|
+
status_code: int
|
|
151
|
+
headers: Dict[str, str] = Field(default_factory=dict)
|
|
152
|
+
url: str = ""
|
|
153
|
+
attempts: int = 1
|
|
154
|
+
pages: int = 1
|
|
155
|
+
rate_limit: Dict[str, str] = Field(default_factory=dict)
|
|
156
|
+
retry_events: List[Dict[str, Any]] = Field(default_factory=list)
|
|
157
|
+
retry_summary: Dict[str, int] = Field(default_factory=dict)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
class HTTPResult(HTTPResponseResult): ...
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class HTTPModel(CallableModel):
|
|
164
|
+
config: Optional[HTTPConfig] = None
|
|
165
|
+
auth: HTTPAuth = Field(default_factory=HTTPAuth)
|
|
166
|
+
method: HTTPMethod = "GET"
|
|
167
|
+
base_url: str = ""
|
|
168
|
+
path: str = ""
|
|
169
|
+
query: Dict[str, Any] = Field(default_factory=dict)
|
|
170
|
+
headers: Dict[str, str] = Field(default_factory=dict)
|
|
171
|
+
timeout: float = 30.0
|
|
172
|
+
follow_redirects: bool = True
|
|
173
|
+
response_format: ResponseFormat = "json"
|
|
174
|
+
json_body: Optional[Any] = None
|
|
175
|
+
content: Optional[Union[bytes, str]] = None
|
|
176
|
+
max_attempts: int = 1
|
|
177
|
+
retry_status_codes: List[int] = Field(default_factory=lambda: [429, 500, 502, 503, 504])
|
|
178
|
+
retry_policy: Optional[HTTPRetryPolicy] = None
|
|
179
|
+
execution_policy: Optional[ExecutionPolicy] = None
|
|
180
|
+
paginate: bool = False
|
|
181
|
+
max_pages: int = 100
|
|
182
|
+
pagination_mode: HTTPPaginationMode = "next_url"
|
|
183
|
+
next_url_field: str = "next_url"
|
|
184
|
+
next_cursor_field: str = "next_cursor"
|
|
185
|
+
cursor_param: str = "cursor"
|
|
186
|
+
cursor_start: Optional[str] = None
|
|
187
|
+
page_param: str = "page"
|
|
188
|
+
page_start: int = 1
|
|
189
|
+
offset_param: str = "offset"
|
|
190
|
+
offset_start: int = 0
|
|
191
|
+
limit_param: str = "limit"
|
|
192
|
+
limit: Optional[int] = None
|
|
193
|
+
results_field: str = "results"
|
|
194
|
+
|
|
195
|
+
@property
|
|
196
|
+
def context_type(self):
|
|
197
|
+
return HTTPRequestContext
|
|
198
|
+
|
|
199
|
+
@property
|
|
200
|
+
def result_type(self):
|
|
201
|
+
return HTTPResult
|
|
202
|
+
|
|
203
|
+
def _template_data(self, context: HTTPContext) -> Dict[str, Any]:
|
|
204
|
+
data = context.model_dump(exclude_none=True)
|
|
205
|
+
data.update(context.template_values)
|
|
206
|
+
return data
|
|
207
|
+
|
|
208
|
+
def _render(self, value: Any, data: Dict[str, Any]) -> Any:
|
|
209
|
+
if isinstance(value, str):
|
|
210
|
+
return Environment().from_string(value).render(**data)
|
|
211
|
+
return value
|
|
212
|
+
|
|
213
|
+
def _render_mapping(self, values: Dict[str, Any], data: Dict[str, Any]) -> Dict[str, Any]:
|
|
214
|
+
return {key: self._render(value, data) for key, value in values.items() if value is not None}
|
|
215
|
+
|
|
216
|
+
def _render_required_auth_value(self, value: Optional[str], field_name: str, data: Dict[str, Any]) -> str:
|
|
217
|
+
if value is None:
|
|
218
|
+
raise ValueError(f"HTTP auth strategy {self.auth.strategy!r} requires {field_name}.")
|
|
219
|
+
return str(self._render(value, data))
|
|
220
|
+
|
|
221
|
+
def _apply_auth(self, headers: Dict[str, str], query: Dict[str, Any], data: Dict[str, Any]) -> None:
|
|
222
|
+
match self.auth.strategy:
|
|
223
|
+
case "none":
|
|
224
|
+
return
|
|
225
|
+
case "bearer":
|
|
226
|
+
token = self._render_required_auth_value(self.auth.token, "token", data)
|
|
227
|
+
headers["Authorization"] = f"{self.auth.scheme} {token}"
|
|
228
|
+
case "api_key_header":
|
|
229
|
+
name = self._render_required_auth_value(self.auth.name, "name", data)
|
|
230
|
+
value = self._render_required_auth_value(self.auth.value, "value", data)
|
|
231
|
+
headers[name] = value
|
|
232
|
+
case "api_key_query":
|
|
233
|
+
name = self._render_required_auth_value(self.auth.name, "name", data)
|
|
234
|
+
value = self._render_required_auth_value(self.auth.value, "value", data)
|
|
235
|
+
query[name] = value
|
|
236
|
+
case "basic":
|
|
237
|
+
username = self._render_required_auth_value(self.auth.username, "username", data)
|
|
238
|
+
password = self._render_required_auth_value(self.auth.password, "password", data)
|
|
239
|
+
encoded = b64encode(f"{username}:{password}".encode("utf-8")).decode("ascii")
|
|
240
|
+
headers["Authorization"] = f"Basic {encoded}"
|
|
241
|
+
case _:
|
|
242
|
+
raise ValueError(f"Unsupported HTTP auth strategy: {self.auth.strategy}")
|
|
243
|
+
|
|
244
|
+
def _base_url(self) -> str:
|
|
245
|
+
return self.base_url or (self.config.base_url if self.config else "")
|
|
246
|
+
|
|
247
|
+
def _timeout(self) -> float:
|
|
248
|
+
if self.config and self.timeout == 30.0:
|
|
249
|
+
return self.config.timeout
|
|
250
|
+
return self.timeout
|
|
251
|
+
|
|
252
|
+
def _follow_redirects(self) -> bool:
|
|
253
|
+
if self.config and self.follow_redirects is True:
|
|
254
|
+
return self.config.follow_redirects
|
|
255
|
+
return self.follow_redirects
|
|
256
|
+
|
|
257
|
+
def _client_kwargs(self) -> Dict[str, Any]:
|
|
258
|
+
kwargs = {
|
|
259
|
+
"base_url": self._base_url(),
|
|
260
|
+
"timeout": self._timeout(),
|
|
261
|
+
"follow_redirects": self._follow_redirects(),
|
|
262
|
+
}
|
|
263
|
+
if self.config and self.config.transport is not None:
|
|
264
|
+
kwargs["transport"] = self.config.transport
|
|
265
|
+
return kwargs
|
|
266
|
+
|
|
267
|
+
def build_request(self, context: Optional[HTTPContext] = None) -> HTTPRequest:
|
|
268
|
+
context = context or HTTPRequestContext()
|
|
269
|
+
data = self._template_data(context)
|
|
270
|
+
|
|
271
|
+
path = context.path or self.path
|
|
272
|
+
query = {**self.query, **context.query}
|
|
273
|
+
config_headers = self.config.headers if self.config else {}
|
|
274
|
+
headers = {**config_headers, **self.headers, **context.headers}
|
|
275
|
+
|
|
276
|
+
rendered_query = self._render_mapping(query, data)
|
|
277
|
+
rendered_headers = self._render_mapping(headers, data)
|
|
278
|
+
self._apply_auth(rendered_headers, rendered_query, data)
|
|
279
|
+
|
|
280
|
+
return HTTPRequest(
|
|
281
|
+
method=self.method,
|
|
282
|
+
url=self._render(path, data),
|
|
283
|
+
params=rendered_query,
|
|
284
|
+
headers=rendered_headers,
|
|
285
|
+
json_data=context.json_body if context.json_body is not None else self.json_body,
|
|
286
|
+
content=context.content if context.content is not None else self.content,
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
def _response_value(self, response: httpx.Response) -> Any:
|
|
290
|
+
match self.response_format:
|
|
291
|
+
case "json":
|
|
292
|
+
return response.json()
|
|
293
|
+
case "text":
|
|
294
|
+
return response.text
|
|
295
|
+
case "bytes":
|
|
296
|
+
return response.content
|
|
297
|
+
case "csv":
|
|
298
|
+
return list(DictReader(StringIO(response.text)))
|
|
299
|
+
case "gzip":
|
|
300
|
+
return decompress(response.content)
|
|
301
|
+
case _:
|
|
302
|
+
raise ValueError(f"Unsupported response format: {self.response_format}")
|
|
303
|
+
|
|
304
|
+
def _safe_url(self, request: HTTPRequest) -> str:
|
|
305
|
+
return request.url.split("?", 1)[0]
|
|
306
|
+
|
|
307
|
+
def _rate_limit_headers(self, headers: Dict[str, str]) -> Dict[str, str]:
|
|
308
|
+
rate_limit = {}
|
|
309
|
+
for key, value in headers.items():
|
|
310
|
+
normalized_key = key.lower()
|
|
311
|
+
if "ratelimit" in normalized_key or normalized_key == "retry-after" or "rate-limit" in normalized_key:
|
|
312
|
+
rate_limit[normalized_key] = value
|
|
313
|
+
return rate_limit
|
|
314
|
+
|
|
315
|
+
def _retry_policy(self) -> HTTPRetryPolicy:
|
|
316
|
+
return self.retry_policy or HTTPRetryPolicy(max_attempts=self.max_attempts, retry_status_codes=self.retry_status_codes)
|
|
317
|
+
|
|
318
|
+
def _sleep(self, delay_seconds: float) -> None:
|
|
319
|
+
if delay_seconds > 0:
|
|
320
|
+
sleep(delay_seconds)
|
|
321
|
+
|
|
322
|
+
def _now(self) -> float:
|
|
323
|
+
return monotonic()
|
|
324
|
+
|
|
325
|
+
def _throttle(self, previous_started_at: Optional[float]) -> float:
|
|
326
|
+
now = self._now()
|
|
327
|
+
if self.execution_policy is None:
|
|
328
|
+
return now
|
|
329
|
+
delay_seconds = self.execution_policy.rate_delay_seconds(previous_started_at=previous_started_at, now=now)
|
|
330
|
+
if delay_seconds > 0:
|
|
331
|
+
self._sleep(delay_seconds)
|
|
332
|
+
return now + delay_seconds
|
|
333
|
+
|
|
334
|
+
def _retry_event(self, **values: Any) -> Dict[str, Any]:
|
|
335
|
+
return HTTPRetryEvent(**values).model_dump(exclude={"type_"}, exclude_none=True)
|
|
336
|
+
|
|
337
|
+
def _retry_summary(self, events: List[Dict[str, Any]], attempts: int, succeeded: bool) -> Dict[str, int]:
|
|
338
|
+
return {
|
|
339
|
+
"attempts": attempts,
|
|
340
|
+
"retried": sum(1 for event in events if event["outcome"] == "retry"),
|
|
341
|
+
"failed": sum(1 for event in events if event["outcome"] == "failed"),
|
|
342
|
+
"succeeded": 1 if succeeded else 0,
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
def _extract_field(self, value: Any, field: str) -> Any:
|
|
346
|
+
current = value
|
|
347
|
+
for part in field.split("."):
|
|
348
|
+
if not isinstance(current, dict):
|
|
349
|
+
return None
|
|
350
|
+
current = current.get(part)
|
|
351
|
+
return current
|
|
352
|
+
|
|
353
|
+
def _request_once(
|
|
354
|
+
self, client: httpx.Client, request: HTTPRequest, previous_started_at: Optional[float]
|
|
355
|
+
) -> Tuple[httpx.Response, int, List[Dict[str, Any]], float]:
|
|
356
|
+
attempts = 0
|
|
357
|
+
events: List[Dict[str, Any]] = []
|
|
358
|
+
retry_policy = self._retry_policy()
|
|
359
|
+
total_retry_wait_seconds = 0.0
|
|
360
|
+
while True:
|
|
361
|
+
attempts += 1
|
|
362
|
+
previous_started_at = self._throttle(previous_started_at)
|
|
363
|
+
try:
|
|
364
|
+
response = client.request(
|
|
365
|
+
method=request.method,
|
|
366
|
+
url=request.url,
|
|
367
|
+
params=request.params,
|
|
368
|
+
headers=request.headers,
|
|
369
|
+
json=request.json_data,
|
|
370
|
+
content=request.content,
|
|
371
|
+
)
|
|
372
|
+
response.raise_for_status()
|
|
373
|
+
return response, attempts, events, previous_started_at
|
|
374
|
+
except httpx.HTTPStatusError as exc:
|
|
375
|
+
status_code = exc.response.status_code if exc.response is not None else None
|
|
376
|
+
if retry_policy.should_retry_status(status_code, attempts):
|
|
377
|
+
delay_seconds = retry_policy.retry_delay_seconds(attempts, total_wait_seconds=total_retry_wait_seconds)
|
|
378
|
+
if delay_seconds is not None:
|
|
379
|
+
events.append(
|
|
380
|
+
self._retry_event(
|
|
381
|
+
attempt=attempts,
|
|
382
|
+
outcome="retry",
|
|
383
|
+
delay_seconds=delay_seconds,
|
|
384
|
+
status_code=status_code,
|
|
385
|
+
category=retry_policy.status_category(status_code),
|
|
386
|
+
message=f"retryable status code {status_code}",
|
|
387
|
+
)
|
|
388
|
+
)
|
|
389
|
+
self._sleep(delay_seconds)
|
|
390
|
+
total_retry_wait_seconds += delay_seconds
|
|
391
|
+
continue
|
|
392
|
+
events.append(
|
|
393
|
+
self._retry_event(
|
|
394
|
+
attempt=attempts,
|
|
395
|
+
outcome="failed",
|
|
396
|
+
status_code=status_code,
|
|
397
|
+
category=retry_policy.status_category(status_code),
|
|
398
|
+
message=f"retryable status code {status_code}",
|
|
399
|
+
)
|
|
400
|
+
)
|
|
401
|
+
status_label = status_code if status_code is not None else "unknown"
|
|
402
|
+
raise RuntimeError(f"HTTP {request.method} {self._safe_url(request)} failed with status {status_label}") from exc
|
|
403
|
+
except (httpx.TimeoutException, httpx.ConnectError) as exc:
|
|
404
|
+
if retry_policy.should_retry_exception(exc, attempts):
|
|
405
|
+
delay_seconds = retry_policy.retry_delay_seconds(attempts, total_wait_seconds=total_retry_wait_seconds)
|
|
406
|
+
if delay_seconds is not None:
|
|
407
|
+
events.append(
|
|
408
|
+
self._retry_event(
|
|
409
|
+
attempt=attempts,
|
|
410
|
+
outcome="retry",
|
|
411
|
+
delay_seconds=delay_seconds,
|
|
412
|
+
exception_type=type(exc).__name__,
|
|
413
|
+
category=retry_policy.exception_category(exc),
|
|
414
|
+
message=str(exc),
|
|
415
|
+
)
|
|
416
|
+
)
|
|
417
|
+
self._sleep(delay_seconds)
|
|
418
|
+
total_retry_wait_seconds += delay_seconds
|
|
419
|
+
continue
|
|
420
|
+
events.append(
|
|
421
|
+
self._retry_event(
|
|
422
|
+
attempt=attempts,
|
|
423
|
+
outcome="failed",
|
|
424
|
+
exception_type=type(exc).__name__,
|
|
425
|
+
category=retry_policy.exception_category(exc),
|
|
426
|
+
message=str(exc),
|
|
427
|
+
)
|
|
428
|
+
)
|
|
429
|
+
raise RuntimeError(f"HTTP {request.method} {self._safe_url(request)} failed with {type(exc).__name__}") from exc
|
|
430
|
+
except httpx.HTTPError as exc:
|
|
431
|
+
raise RuntimeError(f"HTTP {request.method} {self._safe_url(request)} failed with {type(exc).__name__}") from exc
|
|
432
|
+
|
|
433
|
+
def _merge_page_values(self, values: List[Any]) -> Any:
|
|
434
|
+
if not values:
|
|
435
|
+
return []
|
|
436
|
+
if all(isinstance(value, dict) and isinstance(value.get(self.results_field), list) for value in values):
|
|
437
|
+
merged = dict(values[-1])
|
|
438
|
+
merged[self.results_field] = [item for value in values for item in value[self.results_field]]
|
|
439
|
+
merged.pop(self.next_url_field, None)
|
|
440
|
+
merged.pop(self.next_cursor_field, None)
|
|
441
|
+
return merged
|
|
442
|
+
return values
|
|
443
|
+
|
|
444
|
+
def _page_items(self, value: Any) -> Optional[List[Any]]:
|
|
445
|
+
items = self._extract_field(value, self.results_field)
|
|
446
|
+
return items if isinstance(items, list) else None
|
|
447
|
+
|
|
448
|
+
def _request_with_params(self, request: HTTPRequest, params: Dict[str, Any]) -> HTTPRequest:
|
|
449
|
+
return request.model_copy(update={"params": {**request.params, **params}})
|
|
450
|
+
|
|
451
|
+
def _is_sensitive_query_param(self, key: str) -> bool:
|
|
452
|
+
normalized_key = key.lower().replace("_", "").replace("-", "")
|
|
453
|
+
return normalized_key in {"apikey", "authorization", "password"} or "token" in normalized_key or "secret" in normalized_key
|
|
454
|
+
|
|
455
|
+
def _next_url_request(self, request: HTTPRequest, next_url: str) -> HTTPRequest:
|
|
456
|
+
next_url_parts = urlsplit(next_url)
|
|
457
|
+
next_url_params = dict(parse_qsl(next_url_parts.query, keep_blank_values=True))
|
|
458
|
+
for key, value in request.params.items():
|
|
459
|
+
if key not in next_url_params and self._is_sensitive_query_param(key):
|
|
460
|
+
next_url_params[key] = value
|
|
461
|
+
next_url_without_query = urlunsplit((next_url_parts.scheme, next_url_parts.netloc, next_url_parts.path, "", next_url_parts.fragment))
|
|
462
|
+
return request.model_copy(update={"url": next_url_without_query, "params": next_url_params})
|
|
463
|
+
|
|
464
|
+
def _initial_paginated_request(self, request: HTTPRequest) -> HTTPRequest:
|
|
465
|
+
if not self.paginate:
|
|
466
|
+
return request
|
|
467
|
+
match self.pagination_mode:
|
|
468
|
+
case "next_url":
|
|
469
|
+
return request
|
|
470
|
+
case "cursor":
|
|
471
|
+
return self._request_with_params(request, {self.cursor_param: self.cursor_start}) if self.cursor_start is not None else request
|
|
472
|
+
case "page":
|
|
473
|
+
if self.page_param in request.params:
|
|
474
|
+
return request
|
|
475
|
+
return self._request_with_params(request, {self.page_param: self.page_start})
|
|
476
|
+
case "offset":
|
|
477
|
+
params = dict(request.params)
|
|
478
|
+
params.setdefault(self.offset_param, self.offset_start)
|
|
479
|
+
if self.limit is not None:
|
|
480
|
+
params.setdefault(self.limit_param, self.limit)
|
|
481
|
+
if self.limit_param not in params:
|
|
482
|
+
raise ValueError("Offset pagination requires a limit or existing limit parameter.")
|
|
483
|
+
return request.model_copy(update={"params": params})
|
|
484
|
+
case _:
|
|
485
|
+
raise ValueError(f"Unsupported pagination mode: {self.pagination_mode}")
|
|
486
|
+
|
|
487
|
+
def _next_paginated_request(self, request: HTTPRequest, value: Any) -> Optional[HTTPRequest]:
|
|
488
|
+
if self._page_items(value) == []:
|
|
489
|
+
return None
|
|
490
|
+
match self.pagination_mode:
|
|
491
|
+
case "next_url":
|
|
492
|
+
next_url = self._extract_field(value, self.next_url_field) if isinstance(value, dict) else None
|
|
493
|
+
return self._next_url_request(request, next_url) if next_url else None
|
|
494
|
+
case "cursor":
|
|
495
|
+
next_cursor = self._extract_field(value, self.next_cursor_field)
|
|
496
|
+
return self._request_with_params(request, {self.cursor_param: next_cursor}) if next_cursor else None
|
|
497
|
+
case "page":
|
|
498
|
+
next_page = int(request.params.get(self.page_param, self.page_start)) + 1
|
|
499
|
+
return self._request_with_params(request, {self.page_param: next_page})
|
|
500
|
+
case "offset":
|
|
501
|
+
limit = int(request.params[self.limit_param])
|
|
502
|
+
next_offset = int(request.params.get(self.offset_param, self.offset_start)) + limit
|
|
503
|
+
return self._request_with_params(request, {self.offset_param: next_offset, self.limit_param: limit})
|
|
504
|
+
case _:
|
|
505
|
+
raise ValueError(f"Unsupported pagination mode: {self.pagination_mode}")
|
|
506
|
+
|
|
507
|
+
@Flow.call
|
|
508
|
+
def __call__(self, context: HTTPRequestContext) -> HTTPResult:
|
|
509
|
+
request = self._initial_paginated_request(self.build_request(context))
|
|
510
|
+
|
|
511
|
+
with httpx.Client(**self._client_kwargs()) as client:
|
|
512
|
+
values = []
|
|
513
|
+
retry_events = []
|
|
514
|
+
total_attempts = 0
|
|
515
|
+
pages = 0
|
|
516
|
+
previous_started_at = None
|
|
517
|
+
while True:
|
|
518
|
+
response, attempts, events, previous_started_at = self._request_once(client, request, previous_started_at)
|
|
519
|
+
total_attempts += attempts
|
|
520
|
+
retry_events.extend(events)
|
|
521
|
+
pages += 1
|
|
522
|
+
value = self._response_value(response)
|
|
523
|
+
values.append(value)
|
|
524
|
+
|
|
525
|
+
if not self.paginate or pages >= self.max_pages:
|
|
526
|
+
break
|
|
527
|
+
next_request = self._next_paginated_request(request, value)
|
|
528
|
+
if next_request is None:
|
|
529
|
+
break
|
|
530
|
+
request = next_request
|
|
531
|
+
|
|
532
|
+
return HTTPResult(
|
|
533
|
+
value=self._merge_page_values(values) if self.paginate else values[-1],
|
|
534
|
+
status_code=response.status_code,
|
|
535
|
+
headers=dict(response.headers or {}),
|
|
536
|
+
url=str(response.url),
|
|
537
|
+
attempts=total_attempts,
|
|
538
|
+
pages=pages,
|
|
539
|
+
rate_limit=self._rate_limit_headers(dict(response.headers or {})),
|
|
540
|
+
retry_events=retry_events,
|
|
541
|
+
retry_summary=self._retry_summary(retry_events, attempts=total_attempts, succeeded=True),
|
|
542
|
+
)
|