etlplus 0.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. etlplus/__init__.py +43 -0
  2. etlplus/__main__.py +22 -0
  3. etlplus/__version__.py +14 -0
  4. etlplus/api/README.md +237 -0
  5. etlplus/api/__init__.py +136 -0
  6. etlplus/api/auth.py +432 -0
  7. etlplus/api/config.py +633 -0
  8. etlplus/api/endpoint_client.py +885 -0
  9. etlplus/api/errors.py +170 -0
  10. etlplus/api/pagination/__init__.py +47 -0
  11. etlplus/api/pagination/client.py +188 -0
  12. etlplus/api/pagination/config.py +440 -0
  13. etlplus/api/pagination/paginator.py +775 -0
  14. etlplus/api/rate_limiting/__init__.py +38 -0
  15. etlplus/api/rate_limiting/config.py +343 -0
  16. etlplus/api/rate_limiting/rate_limiter.py +266 -0
  17. etlplus/api/request_manager.py +589 -0
  18. etlplus/api/retry_manager.py +430 -0
  19. etlplus/api/transport.py +325 -0
  20. etlplus/api/types.py +172 -0
  21. etlplus/cli/__init__.py +15 -0
  22. etlplus/cli/app.py +1367 -0
  23. etlplus/cli/handlers.py +775 -0
  24. etlplus/cli/main.py +616 -0
  25. etlplus/config/__init__.py +56 -0
  26. etlplus/config/connector.py +372 -0
  27. etlplus/config/jobs.py +311 -0
  28. etlplus/config/pipeline.py +339 -0
  29. etlplus/config/profile.py +78 -0
  30. etlplus/config/types.py +204 -0
  31. etlplus/config/utils.py +120 -0
  32. etlplus/ddl.py +197 -0
  33. etlplus/enums.py +414 -0
  34. etlplus/extract.py +218 -0
  35. etlplus/file.py +657 -0
  36. etlplus/load.py +336 -0
  37. etlplus/mixins.py +62 -0
  38. etlplus/py.typed +0 -0
  39. etlplus/run.py +368 -0
  40. etlplus/run_helpers.py +843 -0
  41. etlplus/templates/__init__.py +5 -0
  42. etlplus/templates/ddl.sql.j2 +128 -0
  43. etlplus/templates/view.sql.j2 +69 -0
  44. etlplus/transform.py +1049 -0
  45. etlplus/types.py +227 -0
  46. etlplus/utils.py +638 -0
  47. etlplus/validate.py +493 -0
  48. etlplus/validation/__init__.py +44 -0
  49. etlplus/validation/utils.py +389 -0
  50. etlplus-0.5.4.dist-info/METADATA +616 -0
  51. etlplus-0.5.4.dist-info/RECORD +55 -0
  52. etlplus-0.5.4.dist-info/WHEEL +5 -0
  53. etlplus-0.5.4.dist-info/entry_points.txt +2 -0
  54. etlplus-0.5.4.dist-info/licenses/LICENSE +21 -0
  55. etlplus-0.5.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,430 @@
1
+ """
2
+ :mod:`etlplus.api.retry_manager` module.
3
+
4
+ Retry policies and exponential backoff helpers.
5
+
6
+ This module centralizes retry behavior for HTTP requests, including policy
7
+ parsing and exponential backoff with jitter.
8
+
9
+ Examples
10
+ --------
11
+ Retry a request with exponential backoff::
12
+
13
+ >>> from etlplus.api.retry_manager import RetryManager
14
+ >>> policy = {"max_attempts": 3, "backoff": 0.25, "retry_on": [429]}
15
+ >>> mgr = RetryManager(policy=policy)
16
+ >>> mgr.get_sleep_time(1)
17
+ 0.123 # jittered value in [0, min(backoff, cap)]
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import random
23
+ import time
24
+ from collections.abc import Callable
25
+ from dataclasses import dataclass
26
+ from dataclasses import field
27
+ from typing import Any
28
+ from typing import ClassVar
29
+ from typing import Final
30
+ from typing import TypedDict
31
+
32
+ import requests # type: ignore[import]
33
+
34
+ from ..types import JSONData
35
+ from ..types import Sleeper
36
+ from ..utils import to_float
37
+ from ..utils import to_int
38
+ from ..utils import to_positive_int
39
+ from .errors import ApiAuthError
40
+ from .errors import ApiRequestError
41
+
42
+ # SECTION: EXPORTS ========================================================== #
43
+
44
+
45
+ __all__ = [
46
+ # Classes
47
+ 'RetryStrategy',
48
+ 'RetryManager',
49
+ # Typed Dicts
50
+ 'RetryPolicy',
51
+ # Type Aliases
52
+ 'RetryInput',
53
+ ]
54
+
55
+
56
+ # SECTION: CONSTANTS ======================================================== #
57
+
58
+
59
+ DEFAULT_RETRY_STATUS_CODES: Final[frozenset[int]] = frozenset(
60
+ {
61
+ 429,
62
+ 502,
63
+ 503,
64
+ 504,
65
+ },
66
+ )
67
+
68
+
69
+ # SECTION: TYPED DICTS ====================================================== #
70
+
71
+
72
+ class RetryPolicy(TypedDict, total=False):
73
+ """
74
+ Optional retry policy for HTTP requests.
75
+
76
+ All keys are optional.
77
+
78
+ Attributes
79
+ ----------
80
+ max_attempts : int, optional
81
+ Maximum number of attempts (including the first). When omitted,
82
+ callers may apply defaults.
83
+ backoff : float, optional
84
+ Base backoff seconds; attempt ``n`` sleeps ``backoff * 2**(n-1)``
85
+ before retrying.
86
+ retry_on : list[int], optional
87
+ HTTP status codes that should trigger a retry.
88
+
89
+ Notes
90
+ -----
91
+ - Controls exponential backoff with jitter (applied externally) and retry
92
+ eligibility by HTTP status code. Used by :class:`RetryManager`.
93
+ """
94
+
95
+ max_attempts: int
96
+ backoff: float
97
+ retry_on: list[int]
98
+
99
+
100
+ # SECTION: TYPE ALIASES ===================================================== #
101
+
102
+
103
+ type RetryInput = RetryPolicy | None
104
+
105
+
106
+ # SECTION: DATA CLASSES ===================================================== #
107
+
108
+
109
+ @dataclass(frozen=True, slots=True)
110
+ class RetryStrategy:
111
+ """Normalized retry settings derived from a :class:`RetryPolicy`."""
112
+
113
+ # -- Attributes -- #
114
+
115
+ max_attempts: int
116
+ backoff: float
117
+ retry_on_codes: frozenset[int]
118
+
119
+ DEFAULT_ATTEMPTS: ClassVar[int] = 3
120
+ DEFAULT_BACKOFF: ClassVar[float] = 0.5
121
+
122
+ # -- Class Methods -- #
123
+
124
+ @classmethod
125
+ def from_policy(
126
+ cls,
127
+ policy: RetryInput,
128
+ *,
129
+ default_codes: frozenset[int] = DEFAULT_RETRY_STATUS_CODES,
130
+ ) -> RetryStrategy:
131
+ """Normalize user policy values into a deterministic strategy."""
132
+ policy = policy or {}
133
+ attempts = to_positive_int(
134
+ policy.get('max_attempts'),
135
+ cls.DEFAULT_ATTEMPTS,
136
+ )
137
+ backoff = (
138
+ to_float(
139
+ policy.get('backoff'),
140
+ default=cls.DEFAULT_BACKOFF,
141
+ minimum=0.0,
142
+ )
143
+ or cls.DEFAULT_BACKOFF
144
+ )
145
+ retry_on = policy.get('retry_on') or []
146
+ normalized: set[int] = set()
147
+ for code in retry_on:
148
+ value = to_int(code)
149
+ if value is not None and value > 0:
150
+ normalized.add(value)
151
+ if not normalized:
152
+ normalized = set(default_codes)
153
+ return cls(
154
+ max_attempts=attempts,
155
+ backoff=backoff,
156
+ retry_on_codes=frozenset(normalized),
157
+ )
158
+
159
+
160
+ # SECTION: CLASSES ========================================================== #
161
+
162
+
163
+ @dataclass(frozen=True, slots=True, kw_only=True)
164
+ class RetryManager:
165
+ """
166
+ Centralized retry logic for HTTP requests.
167
+
168
+ Attributes
169
+ ----------
170
+ DEFAULT_STATUS_CODES : ClassVar[frozenset[int]]
171
+ Default HTTP status codes considered retryable.
172
+ DEFAULT_CAP : ClassVar[float]
173
+ Default maximum sleep seconds for jittered backoff.
174
+ policy : RetryPolicy
175
+ Retry policy configuration.
176
+ retry_network_errors : bool
177
+ Whether to retry on network errors (timeouts, connection errors).
178
+ cap : float
179
+ Maximum sleep seconds for jittered backoff.
180
+ sleeper : Sleeper
181
+ Callable used to sleep between retry attempts. Defaults to
182
+ :func:`time.sleep`.
183
+ strategy : RetryStrategy
184
+ Normalized view of the retry policy (backoff, attempts, codes).
185
+ """
186
+
187
+ # -- Class Attributes -- #
188
+
189
+ DEFAULT_STATUS_CODES: ClassVar[frozenset[int]] = DEFAULT_RETRY_STATUS_CODES
190
+ DEFAULT_CAP: ClassVar[float] = 30.0
191
+
192
+ # -- Instance Attributes-- #
193
+
194
+ policy: RetryPolicy
195
+ retry_network_errors: bool = False
196
+ cap: float = DEFAULT_CAP
197
+ sleeper: Sleeper = time.sleep
198
+ strategy: RetryStrategy = field(init=False, repr=False)
199
+
200
+ # -- Magic Methods (Object Lifecycle) -- #
201
+
202
+ def __post_init__(self) -> None:
203
+ object.__setattr__(
204
+ self,
205
+ 'strategy',
206
+ RetryStrategy.from_policy(
207
+ self.policy,
208
+ default_codes=self.DEFAULT_STATUS_CODES,
209
+ ),
210
+ )
211
+
212
+ # -- Properties -- #
213
+
214
+ @property
215
+ def backoff(self) -> float:
216
+ """
217
+ Backoff factor.
218
+
219
+ Returns
220
+ -------
221
+ float
222
+ Backoff factor.
223
+ """
224
+ return self.strategy.backoff
225
+
226
+ @property
227
+ def max_attempts(self) -> int:
228
+ """
229
+ Maximum number of retry attempts.
230
+
231
+ Returns
232
+ -------
233
+ int
234
+ Maximum number of retry attempts.
235
+ """
236
+ return self.strategy.max_attempts
237
+
238
+ @property
239
+ def retry_on_codes(self) -> set[int]:
240
+ """
241
+ Set of HTTP status codes that should trigger a retry.
242
+
243
+ Returns
244
+ -------
245
+ set[int]
246
+ Retry HTTP status codes.
247
+ """
248
+ return set(self.strategy.retry_on_codes)
249
+
250
+ # -- Instance Methods -- #
251
+
252
+ def get_sleep_time(
253
+ self,
254
+ attempt: int,
255
+ ) -> float:
256
+ """
257
+ Sleep time in seconds.
258
+
259
+ Parameters
260
+ ----------
261
+ attempt : int
262
+ Attempt number.
263
+
264
+ Returns
265
+ -------
266
+ float
267
+ Sleep time in seconds.
268
+ """
269
+ attempt = max(1, attempt)
270
+ exp = self.backoff * (2 ** (attempt - 1))
271
+ upper = min(exp, self.cap)
272
+ return random.uniform(0.0, upper)
273
+
274
+ def run_with_retry(
275
+ self,
276
+ func: Callable[..., JSONData],
277
+ url: str,
278
+ **kwargs: Any,
279
+ ) -> JSONData:
280
+ """
281
+ Execute ``func`` with exponential-backoff retries.
282
+
283
+ Parameters
284
+ ----------
285
+ func : Callable[..., JSONData]
286
+ Function to run with retry logic.
287
+ url : str
288
+ URL for the API request.
289
+ **kwargs : Any
290
+ Additional keyword arguments to pass to ``func``
291
+
292
+ Returns
293
+ -------
294
+ JSONData
295
+ Response data from the API request.
296
+
297
+ Raises
298
+ ------
299
+ ApiRequestError
300
+ Request failed even after exhausting API request retries.
301
+
302
+ Notes
303
+ -----
304
+ Authentication failures propagate as :class:`ApiAuthError` from the
305
+ internal ``_raise_terminal_error`` helper when the status code is 401
306
+ or 403.
307
+ """
308
+ for attempt in range(1, self.max_attempts + 1):
309
+ try:
310
+ return func(url, **kwargs)
311
+ except requests.RequestException as e:
312
+ status = self._extract_status(e)
313
+ exhausted = attempt == self.max_attempts
314
+ if not self.should_retry(status, e) or exhausted:
315
+ self._raise_terminal_error(url, attempt, status, e)
316
+ self.sleeper(self.get_sleep_time(attempt))
317
+
318
+ # ``range`` already covered all attempts; reaching this line would
319
+ # indicate a logical error.
320
+ raise ApiRequestError( # pragma: no cover - defensive
321
+ url=url,
322
+ status=None,
323
+ attempts=self.max_attempts,
324
+ retried=True,
325
+ retry_policy=self.policy,
326
+ cause=None,
327
+ )
328
+
329
+ def should_retry(
330
+ self,
331
+ status: int | None,
332
+ error: Exception,
333
+ ) -> bool:
334
+ """
335
+ Determine whether a request should be retried.
336
+
337
+ Parameters
338
+ ----------
339
+ status : int | None
340
+ HTTP status code extracted from the failed response, if any.
341
+ error : Exception
342
+ The exception that was raised.
343
+
344
+ Returns
345
+ -------
346
+ bool
347
+ ``True`` when the request should be retried, ``False`` otherwise.
348
+ """
349
+ # HTTP status-based retry
350
+ if status is not None and status in self.retry_on_codes:
351
+ return True
352
+
353
+ # Network error retry
354
+ if self.retry_network_errors:
355
+ if isinstance(error, (requests.Timeout, requests.ConnectionError)):
356
+ return True
357
+
358
+ return False
359
+
360
+ # -- Internal Instance Methods -- #
361
+
362
+ def _raise_terminal_error(
363
+ self,
364
+ url: str,
365
+ attempt: int,
366
+ status: int | None,
367
+ error: requests.RequestException,
368
+ ) -> None:
369
+ """
370
+ Raise the appropriate terminal error after exhausting retries.
371
+
372
+ Parameters
373
+ ----------
374
+ url : str
375
+ URL for the API request.
376
+ attempt : int
377
+ Attempt number.
378
+ status : int | None
379
+ HTTP status code if available.
380
+ error : requests.RequestException
381
+ The exception that was raised.
382
+
383
+ Raises
384
+ ------
385
+ ApiAuthError
386
+ Authentication error during API request.
387
+ ApiRequestError
388
+ Request error during API request.
389
+ """
390
+ retried = attempt > 1
391
+ if status in {401, 403}:
392
+ raise ApiAuthError(
393
+ url=url,
394
+ status=status,
395
+ attempts=attempt,
396
+ retried=retried,
397
+ retry_policy=self.policy,
398
+ cause=error,
399
+ ) from error
400
+
401
+ raise ApiRequestError(
402
+ url=url,
403
+ status=status,
404
+ attempts=attempt,
405
+ retried=retried,
406
+ retry_policy=self.policy,
407
+ cause=error,
408
+ ) from error
409
+
410
+ # -- Internal Static Methods -- #
411
+
412
+ @staticmethod
413
+ def _extract_status(
414
+ error: requests.RequestException,
415
+ ) -> int | None:
416
+ """
417
+ Extract the HTTP status code from a RequestException.
418
+
419
+ Parameters
420
+ ----------
421
+ error : requests.RequestException
422
+ The exception from which to extract the status code.
423
+
424
+ Returns
425
+ -------
426
+ int | None
427
+ The HTTP status code if available, else ``None``.
428
+ """
429
+ response = getattr(error, 'response', None)
430
+ return getattr(response, 'status_code', None)