ccflow-http 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ from .base import *
2
+
3
+ __version__ = "0.1.0"
ccflow_http/base.py ADDED
@@ -0,0 +1,542 @@
1
+ from base64 import b64encode
2
+ from csv import DictReader
3
+ from gzip import decompress
4
+ from io import StringIO
5
+ from time import monotonic, sleep
6
+ from typing import Any, Dict, List, Literal, Optional, Tuple, Union
7
+ from urllib.parse import parse_qsl, urlsplit, urlunsplit
8
+
9
+ import httpx
10
+ from ccflow import BaseModel, CallableModel, ContextBase, Flow, GenericResult, PyObjectPath
11
+ from ccflow.utils.retry import RetryPolicy
12
+ from ccflow_etl import ExecutionPolicy
13
+ from jinja2 import Environment
14
+ from pydantic import Field
15
+
16
+ __all__ = (
17
+ "HTTPConfig",
18
+ "HTTPAuth",
19
+ "HTTPContext",
20
+ "HTTPRequestContext",
21
+ "HTTPRequest",
22
+ "HTTPRetryPolicy",
23
+ "HTTPResponseResult",
24
+ "HTTPResult",
25
+ "HTTPModel",
26
+ "redact_mapping",
27
+ "safe_request_dump",
28
+ )
29
+
30
+ ResponseFormat = Literal["json", "text", "bytes", "csv", "gzip"]
31
+ HTTPMethod = Literal["GET", "POST", "PUT", "PATCH", "DELETE", "HEAD"]
32
+ HTTPAuthStrategy = Literal["none", "bearer", "api_key_header", "api_key_query", "basic"]
33
+ HTTPPaginationMode = Literal["next_url", "cursor", "page", "offset"]
34
+ HTTPRetryOutcome = Literal["retry", "failed"]
35
+
36
+
37
+ class HTTPConfig(BaseModel):
38
+ base_url: str = ""
39
+ timeout: float = 30.0
40
+ follow_redirects: bool = True
41
+ headers: Dict[str, str] = Field(default_factory=dict)
42
+ transport: Optional[Any] = None
43
+
44
+
45
+ class HTTPAuth(BaseModel):
46
+ strategy: HTTPAuthStrategy = "none"
47
+ token: Optional[str] = None
48
+ name: Optional[str] = None
49
+ value: Optional[str] = None
50
+ username: Optional[str] = None
51
+ password: Optional[str] = None
52
+ scheme: str = "Bearer"
53
+
54
+
55
+ class HTTPContext(ContextBase):
56
+ path: Optional[str] = None
57
+ query: Dict[str, Any] = Field(default_factory=dict)
58
+ headers: Dict[str, str] = Field(default_factory=dict)
59
+ template_values: Dict[str, Any] = Field(default_factory=dict)
60
+ json_body: Optional[Any] = None
61
+ content: Optional[Union[bytes, str]] = None
62
+
63
+
64
+ class HTTPRequestContext(HTTPContext): ...
65
+
66
+
67
+ class HTTPRequest(BaseModel):
68
+ method: HTTPMethod
69
+ url: str
70
+ params: Dict[str, Any] = Field(default_factory=dict)
71
+ headers: Dict[str, str] = Field(default_factory=dict)
72
+ json_data: Optional[Any] = None
73
+ content: Optional[Union[bytes, str]] = None
74
+
75
+
76
+ def redact_mapping(values: Dict[str, Any]) -> Dict[str, Any]:
77
+ redacted = {}
78
+ for key, value in values.items():
79
+ normalized_key = key.lower().replace("_", "").replace("-", "")
80
+ if normalized_key in {"apikey", "authorization", "password"} or "token" in normalized_key or "secret" in normalized_key:
81
+ redacted[key] = "***"
82
+ else:
83
+ redacted[key] = value
84
+ return redacted
85
+
86
+
87
+ def safe_request_dump(request: HTTPRequest) -> Dict[str, Any]:
88
+ request_data = request.model_dump(exclude={"type_"})
89
+ request_data["params"] = redact_mapping(request_data.get("params", {}))
90
+ request_data["headers"] = redact_mapping(request_data.get("headers", {}))
91
+ return request_data
92
+
93
+
94
+ class HTTPRetryEvent(BaseModel):
95
+ attempt: int
96
+ outcome: HTTPRetryOutcome
97
+ delay_seconds: float = 0.0
98
+ status_code: Optional[int] = None
99
+ exception_type: Optional[str] = None
100
+ category: Optional[str] = None
101
+ message: Optional[str] = None
102
+
103
+
104
+ class HTTPRetryPolicy(RetryPolicy):
105
+ retry_status_codes: List[int] = Field(default_factory=lambda: [429, 500, 502, 503, 504])
106
+ retry_exceptions: List[PyObjectPath] = Field(
107
+ default_factory=lambda: [PyObjectPath.validate(httpx.TimeoutException), PyObjectPath.validate(httpx.ConnectError)]
108
+ )
109
+ timeout_exception_types: List[str] = Field(
110
+ default_factory=lambda: ["TimeoutError", "TimeoutException", "ConnectTimeout", "ReadTimeout", "WriteTimeout", "PoolTimeout"]
111
+ )
112
+
113
+ def should_retry_status(self, status_code: Optional[int], attempt: int) -> bool:
114
+ return status_code in self.retry_status_codes and attempt < self.max_attempts
115
+
116
+ def should_retry_exception(self, exception: BaseException, attempt: int) -> bool:
117
+ return self._should_retry(exception) and attempt < self.max_attempts
118
+
119
+ def delay_seconds(self, attempt: int, jitter_value: Optional[float] = None) -> float:
120
+ if attempt < 1:
121
+ raise ValueError("attempt must be greater than or equal to 1")
122
+ return self._compute_delay(attempt, jitter_value=jitter_value)
123
+
124
+ def retry_delay_seconds(self, attempt: int, total_wait_seconds: float) -> Optional[float]:
125
+ delay_seconds = self.delay_seconds(attempt)
126
+ if self.max_delay is not None and total_wait_seconds + delay_seconds > self.max_delay:
127
+ return None
128
+ return delay_seconds
129
+
130
+ def exception_category(self, exception: BaseException) -> str:
131
+ exception_names = {type(exception).__name__}
132
+ exception_names.update(base.__name__ for base in type(exception).__mro__)
133
+ if exception_names.intersection(self.timeout_exception_types) or any("Timeout" in name for name in exception_names):
134
+ return "timeout"
135
+ if any("Connect" in name or "Connection" in name for name in exception_names):
136
+ return "connection"
137
+ return "exception"
138
+
139
+ def status_category(self, status_code: Optional[int]) -> str:
140
+ if status_code == 429:
141
+ return "rate_limit"
142
+ if status_code == 408:
143
+ return "timeout"
144
+ if status_code is not None and 500 <= status_code <= 599:
145
+ return "server_error"
146
+ return "status"
147
+
148
+
149
+ class HTTPResponseResult(GenericResult[Any]):
150
+ status_code: int
151
+ headers: Dict[str, str] = Field(default_factory=dict)
152
+ url: str = ""
153
+ attempts: int = 1
154
+ pages: int = 1
155
+ rate_limit: Dict[str, str] = Field(default_factory=dict)
156
+ retry_events: List[Dict[str, Any]] = Field(default_factory=list)
157
+ retry_summary: Dict[str, int] = Field(default_factory=dict)
158
+
159
+
160
+ class HTTPResult(HTTPResponseResult): ...
161
+
162
+
163
+ class HTTPModel(CallableModel):
164
+ config: Optional[HTTPConfig] = None
165
+ auth: HTTPAuth = Field(default_factory=HTTPAuth)
166
+ method: HTTPMethod = "GET"
167
+ base_url: str = ""
168
+ path: str = ""
169
+ query: Dict[str, Any] = Field(default_factory=dict)
170
+ headers: Dict[str, str] = Field(default_factory=dict)
171
+ timeout: float = 30.0
172
+ follow_redirects: bool = True
173
+ response_format: ResponseFormat = "json"
174
+ json_body: Optional[Any] = None
175
+ content: Optional[Union[bytes, str]] = None
176
+ max_attempts: int = 1
177
+ retry_status_codes: List[int] = Field(default_factory=lambda: [429, 500, 502, 503, 504])
178
+ retry_policy: Optional[HTTPRetryPolicy] = None
179
+ execution_policy: Optional[ExecutionPolicy] = None
180
+ paginate: bool = False
181
+ max_pages: int = 100
182
+ pagination_mode: HTTPPaginationMode = "next_url"
183
+ next_url_field: str = "next_url"
184
+ next_cursor_field: str = "next_cursor"
185
+ cursor_param: str = "cursor"
186
+ cursor_start: Optional[str] = None
187
+ page_param: str = "page"
188
+ page_start: int = 1
189
+ offset_param: str = "offset"
190
+ offset_start: int = 0
191
+ limit_param: str = "limit"
192
+ limit: Optional[int] = None
193
+ results_field: str = "results"
194
+
195
+ @property
196
+ def context_type(self):
197
+ return HTTPRequestContext
198
+
199
+ @property
200
+ def result_type(self):
201
+ return HTTPResult
202
+
203
+ def _template_data(self, context: HTTPContext) -> Dict[str, Any]:
204
+ data = context.model_dump(exclude_none=True)
205
+ data.update(context.template_values)
206
+ return data
207
+
208
+ def _render(self, value: Any, data: Dict[str, Any]) -> Any:
209
+ if isinstance(value, str):
210
+ return Environment().from_string(value).render(**data)
211
+ return value
212
+
213
+ def _render_mapping(self, values: Dict[str, Any], data: Dict[str, Any]) -> Dict[str, Any]:
214
+ return {key: self._render(value, data) for key, value in values.items() if value is not None}
215
+
216
+ def _render_required_auth_value(self, value: Optional[str], field_name: str, data: Dict[str, Any]) -> str:
217
+ if value is None:
218
+ raise ValueError(f"HTTP auth strategy {self.auth.strategy!r} requires {field_name}.")
219
+ return str(self._render(value, data))
220
+
221
+ def _apply_auth(self, headers: Dict[str, str], query: Dict[str, Any], data: Dict[str, Any]) -> None:
222
+ match self.auth.strategy:
223
+ case "none":
224
+ return
225
+ case "bearer":
226
+ token = self._render_required_auth_value(self.auth.token, "token", data)
227
+ headers["Authorization"] = f"{self.auth.scheme} {token}"
228
+ case "api_key_header":
229
+ name = self._render_required_auth_value(self.auth.name, "name", data)
230
+ value = self._render_required_auth_value(self.auth.value, "value", data)
231
+ headers[name] = value
232
+ case "api_key_query":
233
+ name = self._render_required_auth_value(self.auth.name, "name", data)
234
+ value = self._render_required_auth_value(self.auth.value, "value", data)
235
+ query[name] = value
236
+ case "basic":
237
+ username = self._render_required_auth_value(self.auth.username, "username", data)
238
+ password = self._render_required_auth_value(self.auth.password, "password", data)
239
+ encoded = b64encode(f"{username}:{password}".encode("utf-8")).decode("ascii")
240
+ headers["Authorization"] = f"Basic {encoded}"
241
+ case _:
242
+ raise ValueError(f"Unsupported HTTP auth strategy: {self.auth.strategy}")
243
+
244
+ def _base_url(self) -> str:
245
+ return self.base_url or (self.config.base_url if self.config else "")
246
+
247
+ def _timeout(self) -> float:
248
+ if self.config and self.timeout == 30.0:
249
+ return self.config.timeout
250
+ return self.timeout
251
+
252
+ def _follow_redirects(self) -> bool:
253
+ if self.config and self.follow_redirects is True:
254
+ return self.config.follow_redirects
255
+ return self.follow_redirects
256
+
257
+ def _client_kwargs(self) -> Dict[str, Any]:
258
+ kwargs = {
259
+ "base_url": self._base_url(),
260
+ "timeout": self._timeout(),
261
+ "follow_redirects": self._follow_redirects(),
262
+ }
263
+ if self.config and self.config.transport is not None:
264
+ kwargs["transport"] = self.config.transport
265
+ return kwargs
266
+
267
+ def build_request(self, context: Optional[HTTPContext] = None) -> HTTPRequest:
268
+ context = context or HTTPRequestContext()
269
+ data = self._template_data(context)
270
+
271
+ path = context.path or self.path
272
+ query = {**self.query, **context.query}
273
+ config_headers = self.config.headers if self.config else {}
274
+ headers = {**config_headers, **self.headers, **context.headers}
275
+
276
+ rendered_query = self._render_mapping(query, data)
277
+ rendered_headers = self._render_mapping(headers, data)
278
+ self._apply_auth(rendered_headers, rendered_query, data)
279
+
280
+ return HTTPRequest(
281
+ method=self.method,
282
+ url=self._render(path, data),
283
+ params=rendered_query,
284
+ headers=rendered_headers,
285
+ json_data=context.json_body if context.json_body is not None else self.json_body,
286
+ content=context.content if context.content is not None else self.content,
287
+ )
288
+
289
+ def _response_value(self, response: httpx.Response) -> Any:
290
+ match self.response_format:
291
+ case "json":
292
+ return response.json()
293
+ case "text":
294
+ return response.text
295
+ case "bytes":
296
+ return response.content
297
+ case "csv":
298
+ return list(DictReader(StringIO(response.text)))
299
+ case "gzip":
300
+ return decompress(response.content)
301
+ case _:
302
+ raise ValueError(f"Unsupported response format: {self.response_format}")
303
+
304
+ def _safe_url(self, request: HTTPRequest) -> str:
305
+ return request.url.split("?", 1)[0]
306
+
307
+ def _rate_limit_headers(self, headers: Dict[str, str]) -> Dict[str, str]:
308
+ rate_limit = {}
309
+ for key, value in headers.items():
310
+ normalized_key = key.lower()
311
+ if "ratelimit" in normalized_key or normalized_key == "retry-after" or "rate-limit" in normalized_key:
312
+ rate_limit[normalized_key] = value
313
+ return rate_limit
314
+
315
+ def _retry_policy(self) -> HTTPRetryPolicy:
316
+ return self.retry_policy or HTTPRetryPolicy(max_attempts=self.max_attempts, retry_status_codes=self.retry_status_codes)
317
+
318
+ def _sleep(self, delay_seconds: float) -> None:
319
+ if delay_seconds > 0:
320
+ sleep(delay_seconds)
321
+
322
+ def _now(self) -> float:
323
+ return monotonic()
324
+
325
+ def _throttle(self, previous_started_at: Optional[float]) -> float:
326
+ now = self._now()
327
+ if self.execution_policy is None:
328
+ return now
329
+ delay_seconds = self.execution_policy.rate_delay_seconds(previous_started_at=previous_started_at, now=now)
330
+ if delay_seconds > 0:
331
+ self._sleep(delay_seconds)
332
+ return now + delay_seconds
333
+
334
+ def _retry_event(self, **values: Any) -> Dict[str, Any]:
335
+ return HTTPRetryEvent(**values).model_dump(exclude={"type_"}, exclude_none=True)
336
+
337
+ def _retry_summary(self, events: List[Dict[str, Any]], attempts: int, succeeded: bool) -> Dict[str, int]:
338
+ return {
339
+ "attempts": attempts,
340
+ "retried": sum(1 for event in events if event["outcome"] == "retry"),
341
+ "failed": sum(1 for event in events if event["outcome"] == "failed"),
342
+ "succeeded": 1 if succeeded else 0,
343
+ }
344
+
345
+ def _extract_field(self, value: Any, field: str) -> Any:
346
+ current = value
347
+ for part in field.split("."):
348
+ if not isinstance(current, dict):
349
+ return None
350
+ current = current.get(part)
351
+ return current
352
+
353
+ def _request_once(
354
+ self, client: httpx.Client, request: HTTPRequest, previous_started_at: Optional[float]
355
+ ) -> Tuple[httpx.Response, int, List[Dict[str, Any]], float]:
356
+ attempts = 0
357
+ events: List[Dict[str, Any]] = []
358
+ retry_policy = self._retry_policy()
359
+ total_retry_wait_seconds = 0.0
360
+ while True:
361
+ attempts += 1
362
+ previous_started_at = self._throttle(previous_started_at)
363
+ try:
364
+ response = client.request(
365
+ method=request.method,
366
+ url=request.url,
367
+ params=request.params,
368
+ headers=request.headers,
369
+ json=request.json_data,
370
+ content=request.content,
371
+ )
372
+ response.raise_for_status()
373
+ return response, attempts, events, previous_started_at
374
+ except httpx.HTTPStatusError as exc:
375
+ status_code = exc.response.status_code if exc.response is not None else None
376
+ if retry_policy.should_retry_status(status_code, attempts):
377
+ delay_seconds = retry_policy.retry_delay_seconds(attempts, total_wait_seconds=total_retry_wait_seconds)
378
+ if delay_seconds is not None:
379
+ events.append(
380
+ self._retry_event(
381
+ attempt=attempts,
382
+ outcome="retry",
383
+ delay_seconds=delay_seconds,
384
+ status_code=status_code,
385
+ category=retry_policy.status_category(status_code),
386
+ message=f"retryable status code {status_code}",
387
+ )
388
+ )
389
+ self._sleep(delay_seconds)
390
+ total_retry_wait_seconds += delay_seconds
391
+ continue
392
+ events.append(
393
+ self._retry_event(
394
+ attempt=attempts,
395
+ outcome="failed",
396
+ status_code=status_code,
397
+ category=retry_policy.status_category(status_code),
398
+ message=f"retryable status code {status_code}",
399
+ )
400
+ )
401
+ status_label = status_code if status_code is not None else "unknown"
402
+ raise RuntimeError(f"HTTP {request.method} {self._safe_url(request)} failed with status {status_label}") from exc
403
+ except (httpx.TimeoutException, httpx.ConnectError) as exc:
404
+ if retry_policy.should_retry_exception(exc, attempts):
405
+ delay_seconds = retry_policy.retry_delay_seconds(attempts, total_wait_seconds=total_retry_wait_seconds)
406
+ if delay_seconds is not None:
407
+ events.append(
408
+ self._retry_event(
409
+ attempt=attempts,
410
+ outcome="retry",
411
+ delay_seconds=delay_seconds,
412
+ exception_type=type(exc).__name__,
413
+ category=retry_policy.exception_category(exc),
414
+ message=str(exc),
415
+ )
416
+ )
417
+ self._sleep(delay_seconds)
418
+ total_retry_wait_seconds += delay_seconds
419
+ continue
420
+ events.append(
421
+ self._retry_event(
422
+ attempt=attempts,
423
+ outcome="failed",
424
+ exception_type=type(exc).__name__,
425
+ category=retry_policy.exception_category(exc),
426
+ message=str(exc),
427
+ )
428
+ )
429
+ raise RuntimeError(f"HTTP {request.method} {self._safe_url(request)} failed with {type(exc).__name__}") from exc
430
+ except httpx.HTTPError as exc:
431
+ raise RuntimeError(f"HTTP {request.method} {self._safe_url(request)} failed with {type(exc).__name__}") from exc
432
+
433
+ def _merge_page_values(self, values: List[Any]) -> Any:
434
+ if not values:
435
+ return []
436
+ if all(isinstance(value, dict) and isinstance(value.get(self.results_field), list) for value in values):
437
+ merged = dict(values[-1])
438
+ merged[self.results_field] = [item for value in values for item in value[self.results_field]]
439
+ merged.pop(self.next_url_field, None)
440
+ merged.pop(self.next_cursor_field, None)
441
+ return merged
442
+ return values
443
+
444
+ def _page_items(self, value: Any) -> Optional[List[Any]]:
445
+ items = self._extract_field(value, self.results_field)
446
+ return items if isinstance(items, list) else None
447
+
448
+ def _request_with_params(self, request: HTTPRequest, params: Dict[str, Any]) -> HTTPRequest:
449
+ return request.model_copy(update={"params": {**request.params, **params}})
450
+
451
+ def _is_sensitive_query_param(self, key: str) -> bool:
452
+ normalized_key = key.lower().replace("_", "").replace("-", "")
453
+ return normalized_key in {"apikey", "authorization", "password"} or "token" in normalized_key or "secret" in normalized_key
454
+
455
+ def _next_url_request(self, request: HTTPRequest, next_url: str) -> HTTPRequest:
456
+ next_url_parts = urlsplit(next_url)
457
+ next_url_params = dict(parse_qsl(next_url_parts.query, keep_blank_values=True))
458
+ for key, value in request.params.items():
459
+ if key not in next_url_params and self._is_sensitive_query_param(key):
460
+ next_url_params[key] = value
461
+ next_url_without_query = urlunsplit((next_url_parts.scheme, next_url_parts.netloc, next_url_parts.path, "", next_url_parts.fragment))
462
+ return request.model_copy(update={"url": next_url_without_query, "params": next_url_params})
463
+
464
+ def _initial_paginated_request(self, request: HTTPRequest) -> HTTPRequest:
465
+ if not self.paginate:
466
+ return request
467
+ match self.pagination_mode:
468
+ case "next_url":
469
+ return request
470
+ case "cursor":
471
+ return self._request_with_params(request, {self.cursor_param: self.cursor_start}) if self.cursor_start is not None else request
472
+ case "page":
473
+ if self.page_param in request.params:
474
+ return request
475
+ return self._request_with_params(request, {self.page_param: self.page_start})
476
+ case "offset":
477
+ params = dict(request.params)
478
+ params.setdefault(self.offset_param, self.offset_start)
479
+ if self.limit is not None:
480
+ params.setdefault(self.limit_param, self.limit)
481
+ if self.limit_param not in params:
482
+ raise ValueError("Offset pagination requires a limit or existing limit parameter.")
483
+ return request.model_copy(update={"params": params})
484
+ case _:
485
+ raise ValueError(f"Unsupported pagination mode: {self.pagination_mode}")
486
+
487
+ def _next_paginated_request(self, request: HTTPRequest, value: Any) -> Optional[HTTPRequest]:
488
+ if self._page_items(value) == []:
489
+ return None
490
+ match self.pagination_mode:
491
+ case "next_url":
492
+ next_url = self._extract_field(value, self.next_url_field) if isinstance(value, dict) else None
493
+ return self._next_url_request(request, next_url) if next_url else None
494
+ case "cursor":
495
+ next_cursor = self._extract_field(value, self.next_cursor_field)
496
+ return self._request_with_params(request, {self.cursor_param: next_cursor}) if next_cursor else None
497
+ case "page":
498
+ next_page = int(request.params.get(self.page_param, self.page_start)) + 1
499
+ return self._request_with_params(request, {self.page_param: next_page})
500
+ case "offset":
501
+ limit = int(request.params[self.limit_param])
502
+ next_offset = int(request.params.get(self.offset_param, self.offset_start)) + limit
503
+ return self._request_with_params(request, {self.offset_param: next_offset, self.limit_param: limit})
504
+ case _:
505
+ raise ValueError(f"Unsupported pagination mode: {self.pagination_mode}")
506
+
507
+ @Flow.call
508
+ def __call__(self, context: HTTPRequestContext) -> HTTPResult:
509
+ request = self._initial_paginated_request(self.build_request(context))
510
+
511
+ with httpx.Client(**self._client_kwargs()) as client:
512
+ values = []
513
+ retry_events = []
514
+ total_attempts = 0
515
+ pages = 0
516
+ previous_started_at = None
517
+ while True:
518
+ response, attempts, events, previous_started_at = self._request_once(client, request, previous_started_at)
519
+ total_attempts += attempts
520
+ retry_events.extend(events)
521
+ pages += 1
522
+ value = self._response_value(response)
523
+ values.append(value)
524
+
525
+ if not self.paginate or pages >= self.max_pages:
526
+ break
527
+ next_request = self._next_paginated_request(request, value)
528
+ if next_request is None:
529
+ break
530
+ request = next_request
531
+
532
+ return HTTPResult(
533
+ value=self._merge_page_values(values) if self.paginate else values[-1],
534
+ status_code=response.status_code,
535
+ headers=dict(response.headers or {}),
536
+ url=str(response.url),
537
+ attempts=total_attempts,
538
+ pages=pages,
539
+ rate_limit=self._rate_limit_headers(dict(response.headers or {})),
540
+ retry_events=retry_events,
541
+ retry_summary=self._retry_summary(retry_events, attempts=total_attempts, succeeded=True),
542
+ )
@@ -0,0 +1,5 @@
1
+ from ccflow_http import * # noqa
2
+
3
+
4
+ def test_all():
5
+ assert True