etlplus 0.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/__init__.py +43 -0
- etlplus/__main__.py +22 -0
- etlplus/__version__.py +14 -0
- etlplus/api/README.md +237 -0
- etlplus/api/__init__.py +136 -0
- etlplus/api/auth.py +432 -0
- etlplus/api/config.py +633 -0
- etlplus/api/endpoint_client.py +885 -0
- etlplus/api/errors.py +170 -0
- etlplus/api/pagination/__init__.py +47 -0
- etlplus/api/pagination/client.py +188 -0
- etlplus/api/pagination/config.py +440 -0
- etlplus/api/pagination/paginator.py +775 -0
- etlplus/api/rate_limiting/__init__.py +38 -0
- etlplus/api/rate_limiting/config.py +343 -0
- etlplus/api/rate_limiting/rate_limiter.py +266 -0
- etlplus/api/request_manager.py +589 -0
- etlplus/api/retry_manager.py +430 -0
- etlplus/api/transport.py +325 -0
- etlplus/api/types.py +172 -0
- etlplus/cli/__init__.py +15 -0
- etlplus/cli/app.py +1367 -0
- etlplus/cli/handlers.py +775 -0
- etlplus/cli/main.py +616 -0
- etlplus/config/__init__.py +56 -0
- etlplus/config/connector.py +372 -0
- etlplus/config/jobs.py +311 -0
- etlplus/config/pipeline.py +339 -0
- etlplus/config/profile.py +78 -0
- etlplus/config/types.py +204 -0
- etlplus/config/utils.py +120 -0
- etlplus/ddl.py +197 -0
- etlplus/enums.py +414 -0
- etlplus/extract.py +218 -0
- etlplus/file.py +657 -0
- etlplus/load.py +336 -0
- etlplus/mixins.py +62 -0
- etlplus/py.typed +0 -0
- etlplus/run.py +368 -0
- etlplus/run_helpers.py +843 -0
- etlplus/templates/__init__.py +5 -0
- etlplus/templates/ddl.sql.j2 +128 -0
- etlplus/templates/view.sql.j2 +69 -0
- etlplus/transform.py +1049 -0
- etlplus/types.py +227 -0
- etlplus/utils.py +638 -0
- etlplus/validate.py +493 -0
- etlplus/validation/__init__.py +44 -0
- etlplus/validation/utils.py +389 -0
- etlplus-0.5.4.dist-info/METADATA +616 -0
- etlplus-0.5.4.dist-info/RECORD +55 -0
- etlplus-0.5.4.dist-info/WHEEL +5 -0
- etlplus-0.5.4.dist-info/entry_points.txt +2 -0
- etlplus-0.5.4.dist-info/licenses/LICENSE +21 -0
- etlplus-0.5.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,430 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.api.retry_manager` module.
|
|
3
|
+
|
|
4
|
+
Retry policies and exponential backoff helpers.
|
|
5
|
+
|
|
6
|
+
This module centralizes retry behavior for HTTP requests, including policy
|
|
7
|
+
parsing and exponential backoff with jitter.
|
|
8
|
+
|
|
9
|
+
Examples
|
|
10
|
+
--------
|
|
11
|
+
Retry a request with exponential backoff::
|
|
12
|
+
|
|
13
|
+
>>> from etlplus.api.retry_manager import RetryManager
|
|
14
|
+
>>> policy = {"max_attempts": 3, "backoff": 0.25, "retry_on": [429]}
|
|
15
|
+
>>> mgr = RetryManager(policy=policy)
|
|
16
|
+
>>> mgr.get_sleep_time(1)
|
|
17
|
+
0.123 # jittered value in [0, min(backoff, cap)]
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import random
|
|
23
|
+
import time
|
|
24
|
+
from collections.abc import Callable
|
|
25
|
+
from dataclasses import dataclass
|
|
26
|
+
from dataclasses import field
|
|
27
|
+
from typing import Any
|
|
28
|
+
from typing import ClassVar
|
|
29
|
+
from typing import Final
|
|
30
|
+
from typing import TypedDict
|
|
31
|
+
|
|
32
|
+
import requests # type: ignore[import]
|
|
33
|
+
|
|
34
|
+
from ..types import JSONData
|
|
35
|
+
from ..types import Sleeper
|
|
36
|
+
from ..utils import to_float
|
|
37
|
+
from ..utils import to_int
|
|
38
|
+
from ..utils import to_positive_int
|
|
39
|
+
from .errors import ApiAuthError
|
|
40
|
+
from .errors import ApiRequestError
|
|
41
|
+
|
|
42
|
+
# SECTION: EXPORTS ========================================================== #
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
__all__ = [
|
|
46
|
+
# Classes
|
|
47
|
+
'RetryStrategy',
|
|
48
|
+
'RetryManager',
|
|
49
|
+
# Typed Dicts
|
|
50
|
+
'RetryPolicy',
|
|
51
|
+
# Type Aliases
|
|
52
|
+
'RetryInput',
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# SECTION: CONSTANTS ======================================================== #
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
DEFAULT_RETRY_STATUS_CODES: Final[frozenset[int]] = frozenset(
|
|
60
|
+
{
|
|
61
|
+
429,
|
|
62
|
+
502,
|
|
63
|
+
503,
|
|
64
|
+
504,
|
|
65
|
+
},
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# SECTION: TYPED DICTS ====================================================== #
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class RetryPolicy(TypedDict, total=False):
|
|
73
|
+
"""
|
|
74
|
+
Optional retry policy for HTTP requests.
|
|
75
|
+
|
|
76
|
+
All keys are optional.
|
|
77
|
+
|
|
78
|
+
Attributes
|
|
79
|
+
----------
|
|
80
|
+
max_attempts : int, optional
|
|
81
|
+
Maximum number of attempts (including the first). When omitted,
|
|
82
|
+
callers may apply defaults.
|
|
83
|
+
backoff : float, optional
|
|
84
|
+
Base backoff seconds; attempt ``n`` sleeps ``backoff * 2**(n-1)``
|
|
85
|
+
before retrying.
|
|
86
|
+
retry_on : list[int], optional
|
|
87
|
+
HTTP status codes that should trigger a retry.
|
|
88
|
+
|
|
89
|
+
Notes
|
|
90
|
+
-----
|
|
91
|
+
- Controls exponential backoff with jitter (applied externally) and retry
|
|
92
|
+
eligibility by HTTP status code. Used by :class:`RetryManager`.
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
max_attempts: int
|
|
96
|
+
backoff: float
|
|
97
|
+
retry_on: list[int]
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# SECTION: TYPE ALIASES ===================================================== #
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
type RetryInput = RetryPolicy | None
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
# SECTION: DATA CLASSES ===================================================== #
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@dataclass(frozen=True, slots=True)
|
|
110
|
+
class RetryStrategy:
|
|
111
|
+
"""Normalized retry settings derived from a :class:`RetryPolicy`."""
|
|
112
|
+
|
|
113
|
+
# -- Attributes -- #
|
|
114
|
+
|
|
115
|
+
max_attempts: int
|
|
116
|
+
backoff: float
|
|
117
|
+
retry_on_codes: frozenset[int]
|
|
118
|
+
|
|
119
|
+
DEFAULT_ATTEMPTS: ClassVar[int] = 3
|
|
120
|
+
DEFAULT_BACKOFF: ClassVar[float] = 0.5
|
|
121
|
+
|
|
122
|
+
# -- Class Methods -- #
|
|
123
|
+
|
|
124
|
+
@classmethod
|
|
125
|
+
def from_policy(
|
|
126
|
+
cls,
|
|
127
|
+
policy: RetryInput,
|
|
128
|
+
*,
|
|
129
|
+
default_codes: frozenset[int] = DEFAULT_RETRY_STATUS_CODES,
|
|
130
|
+
) -> RetryStrategy:
|
|
131
|
+
"""Normalize user policy values into a deterministic strategy."""
|
|
132
|
+
policy = policy or {}
|
|
133
|
+
attempts = to_positive_int(
|
|
134
|
+
policy.get('max_attempts'),
|
|
135
|
+
cls.DEFAULT_ATTEMPTS,
|
|
136
|
+
)
|
|
137
|
+
backoff = (
|
|
138
|
+
to_float(
|
|
139
|
+
policy.get('backoff'),
|
|
140
|
+
default=cls.DEFAULT_BACKOFF,
|
|
141
|
+
minimum=0.0,
|
|
142
|
+
)
|
|
143
|
+
or cls.DEFAULT_BACKOFF
|
|
144
|
+
)
|
|
145
|
+
retry_on = policy.get('retry_on') or []
|
|
146
|
+
normalized: set[int] = set()
|
|
147
|
+
for code in retry_on:
|
|
148
|
+
value = to_int(code)
|
|
149
|
+
if value is not None and value > 0:
|
|
150
|
+
normalized.add(value)
|
|
151
|
+
if not normalized:
|
|
152
|
+
normalized = set(default_codes)
|
|
153
|
+
return cls(
|
|
154
|
+
max_attempts=attempts,
|
|
155
|
+
backoff=backoff,
|
|
156
|
+
retry_on_codes=frozenset(normalized),
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
# SECTION: CLASSES ========================================================== #
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
@dataclass(frozen=True, slots=True, kw_only=True)
|
|
164
|
+
class RetryManager:
|
|
165
|
+
"""
|
|
166
|
+
Centralized retry logic for HTTP requests.
|
|
167
|
+
|
|
168
|
+
Attributes
|
|
169
|
+
----------
|
|
170
|
+
DEFAULT_STATUS_CODES : ClassVar[frozenset[int]]
|
|
171
|
+
Default HTTP status codes considered retryable.
|
|
172
|
+
DEFAULT_CAP : ClassVar[float]
|
|
173
|
+
Default maximum sleep seconds for jittered backoff.
|
|
174
|
+
policy : RetryPolicy
|
|
175
|
+
Retry policy configuration.
|
|
176
|
+
retry_network_errors : bool
|
|
177
|
+
Whether to retry on network errors (timeouts, connection errors).
|
|
178
|
+
cap : float
|
|
179
|
+
Maximum sleep seconds for jittered backoff.
|
|
180
|
+
sleeper : Sleeper
|
|
181
|
+
Callable used to sleep between retry attempts. Defaults to
|
|
182
|
+
:func:`time.sleep`.
|
|
183
|
+
strategy : RetryStrategy
|
|
184
|
+
Normalized view of the retry policy (backoff, attempts, codes).
|
|
185
|
+
"""
|
|
186
|
+
|
|
187
|
+
# -- Class Attributes -- #
|
|
188
|
+
|
|
189
|
+
DEFAULT_STATUS_CODES: ClassVar[frozenset[int]] = DEFAULT_RETRY_STATUS_CODES
|
|
190
|
+
DEFAULT_CAP: ClassVar[float] = 30.0
|
|
191
|
+
|
|
192
|
+
# -- Instance Attributes-- #
|
|
193
|
+
|
|
194
|
+
policy: RetryPolicy
|
|
195
|
+
retry_network_errors: bool = False
|
|
196
|
+
cap: float = DEFAULT_CAP
|
|
197
|
+
sleeper: Sleeper = time.sleep
|
|
198
|
+
strategy: RetryStrategy = field(init=False, repr=False)
|
|
199
|
+
|
|
200
|
+
# -- Magic Methods (Object Lifecycle) -- #
|
|
201
|
+
|
|
202
|
+
def __post_init__(self) -> None:
|
|
203
|
+
object.__setattr__(
|
|
204
|
+
self,
|
|
205
|
+
'strategy',
|
|
206
|
+
RetryStrategy.from_policy(
|
|
207
|
+
self.policy,
|
|
208
|
+
default_codes=self.DEFAULT_STATUS_CODES,
|
|
209
|
+
),
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
# -- Properties -- #
|
|
213
|
+
|
|
214
|
+
@property
|
|
215
|
+
def backoff(self) -> float:
|
|
216
|
+
"""
|
|
217
|
+
Backoff factor.
|
|
218
|
+
|
|
219
|
+
Returns
|
|
220
|
+
-------
|
|
221
|
+
float
|
|
222
|
+
Backoff factor.
|
|
223
|
+
"""
|
|
224
|
+
return self.strategy.backoff
|
|
225
|
+
|
|
226
|
+
@property
|
|
227
|
+
def max_attempts(self) -> int:
|
|
228
|
+
"""
|
|
229
|
+
Maximum number of retry attempts.
|
|
230
|
+
|
|
231
|
+
Returns
|
|
232
|
+
-------
|
|
233
|
+
int
|
|
234
|
+
Maximum number of retry attempts.
|
|
235
|
+
"""
|
|
236
|
+
return self.strategy.max_attempts
|
|
237
|
+
|
|
238
|
+
@property
|
|
239
|
+
def retry_on_codes(self) -> set[int]:
|
|
240
|
+
"""
|
|
241
|
+
Set of HTTP status codes that should trigger a retry.
|
|
242
|
+
|
|
243
|
+
Returns
|
|
244
|
+
-------
|
|
245
|
+
set[int]
|
|
246
|
+
Retry HTTP status codes.
|
|
247
|
+
"""
|
|
248
|
+
return set(self.strategy.retry_on_codes)
|
|
249
|
+
|
|
250
|
+
# -- Instance Methods -- #
|
|
251
|
+
|
|
252
|
+
def get_sleep_time(
|
|
253
|
+
self,
|
|
254
|
+
attempt: int,
|
|
255
|
+
) -> float:
|
|
256
|
+
"""
|
|
257
|
+
Sleep time in seconds.
|
|
258
|
+
|
|
259
|
+
Parameters
|
|
260
|
+
----------
|
|
261
|
+
attempt : int
|
|
262
|
+
Attempt number.
|
|
263
|
+
|
|
264
|
+
Returns
|
|
265
|
+
-------
|
|
266
|
+
float
|
|
267
|
+
Sleep time in seconds.
|
|
268
|
+
"""
|
|
269
|
+
attempt = max(1, attempt)
|
|
270
|
+
exp = self.backoff * (2 ** (attempt - 1))
|
|
271
|
+
upper = min(exp, self.cap)
|
|
272
|
+
return random.uniform(0.0, upper)
|
|
273
|
+
|
|
274
|
+
def run_with_retry(
|
|
275
|
+
self,
|
|
276
|
+
func: Callable[..., JSONData],
|
|
277
|
+
url: str,
|
|
278
|
+
**kwargs: Any,
|
|
279
|
+
) -> JSONData:
|
|
280
|
+
"""
|
|
281
|
+
Execute ``func`` with exponential-backoff retries.
|
|
282
|
+
|
|
283
|
+
Parameters
|
|
284
|
+
----------
|
|
285
|
+
func : Callable[..., JSONData]
|
|
286
|
+
Function to run with retry logic.
|
|
287
|
+
url : str
|
|
288
|
+
URL for the API request.
|
|
289
|
+
**kwargs : Any
|
|
290
|
+
Additional keyword arguments to pass to ``func``
|
|
291
|
+
|
|
292
|
+
Returns
|
|
293
|
+
-------
|
|
294
|
+
JSONData
|
|
295
|
+
Response data from the API request.
|
|
296
|
+
|
|
297
|
+
Raises
|
|
298
|
+
------
|
|
299
|
+
ApiRequestError
|
|
300
|
+
Request failed even after exhausting API request retries.
|
|
301
|
+
|
|
302
|
+
Notes
|
|
303
|
+
-----
|
|
304
|
+
Authentication failures propagate as :class:`ApiAuthError` from the
|
|
305
|
+
internal ``_raise_terminal_error`` helper when the status code is 401
|
|
306
|
+
or 403.
|
|
307
|
+
"""
|
|
308
|
+
for attempt in range(1, self.max_attempts + 1):
|
|
309
|
+
try:
|
|
310
|
+
return func(url, **kwargs)
|
|
311
|
+
except requests.RequestException as e:
|
|
312
|
+
status = self._extract_status(e)
|
|
313
|
+
exhausted = attempt == self.max_attempts
|
|
314
|
+
if not self.should_retry(status, e) or exhausted:
|
|
315
|
+
self._raise_terminal_error(url, attempt, status, e)
|
|
316
|
+
self.sleeper(self.get_sleep_time(attempt))
|
|
317
|
+
|
|
318
|
+
# ``range`` already covered all attempts; reaching this line would
|
|
319
|
+
# indicate a logical error.
|
|
320
|
+
raise ApiRequestError( # pragma: no cover - defensive
|
|
321
|
+
url=url,
|
|
322
|
+
status=None,
|
|
323
|
+
attempts=self.max_attempts,
|
|
324
|
+
retried=True,
|
|
325
|
+
retry_policy=self.policy,
|
|
326
|
+
cause=None,
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
def should_retry(
|
|
330
|
+
self,
|
|
331
|
+
status: int | None,
|
|
332
|
+
error: Exception,
|
|
333
|
+
) -> bool:
|
|
334
|
+
"""
|
|
335
|
+
Determine whether a request should be retried.
|
|
336
|
+
|
|
337
|
+
Parameters
|
|
338
|
+
----------
|
|
339
|
+
status : int | None
|
|
340
|
+
HTTP status code extracted from the failed response, if any.
|
|
341
|
+
error : Exception
|
|
342
|
+
The exception that was raised.
|
|
343
|
+
|
|
344
|
+
Returns
|
|
345
|
+
-------
|
|
346
|
+
bool
|
|
347
|
+
``True`` when the request should be retried, ``False`` otherwise.
|
|
348
|
+
"""
|
|
349
|
+
# HTTP status-based retry
|
|
350
|
+
if status is not None and status in self.retry_on_codes:
|
|
351
|
+
return True
|
|
352
|
+
|
|
353
|
+
# Network error retry
|
|
354
|
+
if self.retry_network_errors:
|
|
355
|
+
if isinstance(error, (requests.Timeout, requests.ConnectionError)):
|
|
356
|
+
return True
|
|
357
|
+
|
|
358
|
+
return False
|
|
359
|
+
|
|
360
|
+
# -- Internal Instance Methods -- #
|
|
361
|
+
|
|
362
|
+
def _raise_terminal_error(
|
|
363
|
+
self,
|
|
364
|
+
url: str,
|
|
365
|
+
attempt: int,
|
|
366
|
+
status: int | None,
|
|
367
|
+
error: requests.RequestException,
|
|
368
|
+
) -> None:
|
|
369
|
+
"""
|
|
370
|
+
Raise the appropriate terminal error after exhausting retries.
|
|
371
|
+
|
|
372
|
+
Parameters
|
|
373
|
+
----------
|
|
374
|
+
url : str
|
|
375
|
+
URL for the API request.
|
|
376
|
+
attempt : int
|
|
377
|
+
Attempt number.
|
|
378
|
+
status : int | None
|
|
379
|
+
HTTP status code if available.
|
|
380
|
+
error : requests.RequestException
|
|
381
|
+
The exception that was raised.
|
|
382
|
+
|
|
383
|
+
Raises
|
|
384
|
+
------
|
|
385
|
+
ApiAuthError
|
|
386
|
+
Authentication error during API request.
|
|
387
|
+
ApiRequestError
|
|
388
|
+
Request error during API request.
|
|
389
|
+
"""
|
|
390
|
+
retried = attempt > 1
|
|
391
|
+
if status in {401, 403}:
|
|
392
|
+
raise ApiAuthError(
|
|
393
|
+
url=url,
|
|
394
|
+
status=status,
|
|
395
|
+
attempts=attempt,
|
|
396
|
+
retried=retried,
|
|
397
|
+
retry_policy=self.policy,
|
|
398
|
+
cause=error,
|
|
399
|
+
) from error
|
|
400
|
+
|
|
401
|
+
raise ApiRequestError(
|
|
402
|
+
url=url,
|
|
403
|
+
status=status,
|
|
404
|
+
attempts=attempt,
|
|
405
|
+
retried=retried,
|
|
406
|
+
retry_policy=self.policy,
|
|
407
|
+
cause=error,
|
|
408
|
+
) from error
|
|
409
|
+
|
|
410
|
+
# -- Internal Static Methods -- #
|
|
411
|
+
|
|
412
|
+
@staticmethod
|
|
413
|
+
def _extract_status(
|
|
414
|
+
error: requests.RequestException,
|
|
415
|
+
) -> int | None:
|
|
416
|
+
"""
|
|
417
|
+
Extract the HTTP status code from a RequestException.
|
|
418
|
+
|
|
419
|
+
Parameters
|
|
420
|
+
----------
|
|
421
|
+
error : requests.RequestException
|
|
422
|
+
The exception from which to extract the status code.
|
|
423
|
+
|
|
424
|
+
Returns
|
|
425
|
+
-------
|
|
426
|
+
int | None
|
|
427
|
+
The HTTP status code if available, else ``None``.
|
|
428
|
+
"""
|
|
429
|
+
response = getattr(error, 'response', None)
|
|
430
|
+
return getattr(response, 'status_code', None)
|