etlplus 0.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/__init__.py +43 -0
- etlplus/__main__.py +22 -0
- etlplus/__version__.py +14 -0
- etlplus/api/README.md +237 -0
- etlplus/api/__init__.py +136 -0
- etlplus/api/auth.py +432 -0
- etlplus/api/config.py +633 -0
- etlplus/api/endpoint_client.py +885 -0
- etlplus/api/errors.py +170 -0
- etlplus/api/pagination/__init__.py +47 -0
- etlplus/api/pagination/client.py +188 -0
- etlplus/api/pagination/config.py +440 -0
- etlplus/api/pagination/paginator.py +775 -0
- etlplus/api/rate_limiting/__init__.py +38 -0
- etlplus/api/rate_limiting/config.py +343 -0
- etlplus/api/rate_limiting/rate_limiter.py +266 -0
- etlplus/api/request_manager.py +589 -0
- etlplus/api/retry_manager.py +430 -0
- etlplus/api/transport.py +325 -0
- etlplus/api/types.py +172 -0
- etlplus/cli/__init__.py +15 -0
- etlplus/cli/app.py +1367 -0
- etlplus/cli/handlers.py +775 -0
- etlplus/cli/main.py +616 -0
- etlplus/config/__init__.py +56 -0
- etlplus/config/connector.py +372 -0
- etlplus/config/jobs.py +311 -0
- etlplus/config/pipeline.py +339 -0
- etlplus/config/profile.py +78 -0
- etlplus/config/types.py +204 -0
- etlplus/config/utils.py +120 -0
- etlplus/ddl.py +197 -0
- etlplus/enums.py +414 -0
- etlplus/extract.py +218 -0
- etlplus/file.py +657 -0
- etlplus/load.py +336 -0
- etlplus/mixins.py +62 -0
- etlplus/py.typed +0 -0
- etlplus/run.py +368 -0
- etlplus/run_helpers.py +843 -0
- etlplus/templates/__init__.py +5 -0
- etlplus/templates/ddl.sql.j2 +128 -0
- etlplus/templates/view.sql.j2 +69 -0
- etlplus/transform.py +1049 -0
- etlplus/types.py +227 -0
- etlplus/utils.py +638 -0
- etlplus/validate.py +493 -0
- etlplus/validation/__init__.py +44 -0
- etlplus/validation/utils.py +389 -0
- etlplus-0.5.4.dist-info/METADATA +616 -0
- etlplus-0.5.4.dist-info/RECORD +55 -0
- etlplus-0.5.4.dist-info/WHEEL +5 -0
- etlplus-0.5.4.dist-info/entry_points.txt +2 -0
- etlplus-0.5.4.dist-info/licenses/LICENSE +21 -0
- etlplus-0.5.4.dist-info/top_level.txt +1 -0
etlplus/api/auth.py
ADDED
|
@@ -0,0 +1,432 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.api.auth` module.
|
|
3
|
+
|
|
4
|
+
Bearer token authentication for REST APIs using the OAuth2 Client Credentials
|
|
5
|
+
flow.
|
|
6
|
+
|
|
7
|
+
Summary
|
|
8
|
+
-------
|
|
9
|
+
Use :class:`EndpointCredentialsBearer` with ``requests`` to add
|
|
10
|
+
``Authorization: Bearer <token>`` headers. Tokens are fetched and refreshed
|
|
11
|
+
on demand with a small clock skew to avoid edge-of-expiry races.
|
|
12
|
+
|
|
13
|
+
Notes
|
|
14
|
+
-----
|
|
15
|
+
- Tokens are refreshed when remaining lifetime < ``CLOCK_SKEW_SEC`` seconds.
|
|
16
|
+
- Network/HTTP errors are surfaced from ``requests`` with concise logging.
|
|
17
|
+
|
|
18
|
+
Examples
|
|
19
|
+
--------
|
|
20
|
+
Basic usage with ``requests.Session``
|
|
21
|
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
22
|
+
>>> from etlplus.api import EndpointCredentialsBearer
|
|
23
|
+
>>> auth = EndpointCredentialsBearer(
|
|
24
|
+
... token_url="https://auth.example.com/oauth2/token",
|
|
25
|
+
... client_id="id",
|
|
26
|
+
... client_secret="secret",
|
|
27
|
+
... scope="read",
|
|
28
|
+
... )
|
|
29
|
+
>>> import requests
|
|
30
|
+
>>> s = requests.Session()
|
|
31
|
+
>>> s.auth = auth
|
|
32
|
+
>>> r = s.get("https://api.example.com/v1/items")
|
|
33
|
+
>>> r.raise_for_status()
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
from __future__ import annotations
|
|
37
|
+
|
|
38
|
+
import logging
|
|
39
|
+
import time
|
|
40
|
+
from collections.abc import Mapping
|
|
41
|
+
from dataclasses import dataclass
|
|
42
|
+
from types import MappingProxyType
|
|
43
|
+
from typing import Any
|
|
44
|
+
from typing import Protocol
|
|
45
|
+
from typing import TypedDict
|
|
46
|
+
from typing import cast
|
|
47
|
+
|
|
48
|
+
import requests # type: ignore[import]
|
|
49
|
+
from requests import PreparedRequest # type: ignore
|
|
50
|
+
from requests import Response # type: ignore
|
|
51
|
+
from requests.auth import AuthBase # type: ignore
|
|
52
|
+
|
|
53
|
+
from .types import Url
|
|
54
|
+
|
|
55
|
+
logger = logging.getLogger(__name__)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# SECTION: EXPORTS ========================================================== #
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
__all__ = ['EndpointCredentialsBearer']
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# SECTION: CONSTANTS ======================================================== #
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
CLOCK_SKEW_SEC = 30
|
|
68
|
+
DEFAULT_TOKEN_TTL = 3600
|
|
69
|
+
DEFAULT_TOKEN_TIMEOUT = 15.0
|
|
70
|
+
MAX_LOG_BODY = 500
|
|
71
|
+
FORM_HEADERS = MappingProxyType(
|
|
72
|
+
{'Content-Type': 'application/x-www-form-urlencoded'},
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# SECTION: TYPED DICTS ====================================================== #
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class _TokenResponse(TypedDict):
|
|
80
|
+
"""Minimal shape of an OAuth token response body."""
|
|
81
|
+
|
|
82
|
+
access_token: str
|
|
83
|
+
expires_in: int | float
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class _TokenHttpClient(Protocol):
|
|
87
|
+
"""Protocol for objects that expose a ``post`` helper like ``requests``."""
|
|
88
|
+
|
|
89
|
+
def post(
|
|
90
|
+
self,
|
|
91
|
+
url: Url,
|
|
92
|
+
**kwargs: Any,
|
|
93
|
+
) -> Response:
|
|
94
|
+
"""
|
|
95
|
+
Issue an HTTP POST request and return the response object.
|
|
96
|
+
|
|
97
|
+
Parameters
|
|
98
|
+
----------
|
|
99
|
+
url : Url
|
|
100
|
+
The URL to which the request is sent.
|
|
101
|
+
**kwargs : Any
|
|
102
|
+
Arbitrary request keyword arguments (payload, headers, timeout).
|
|
103
|
+
|
|
104
|
+
Returns
|
|
105
|
+
-------
|
|
106
|
+
Response
|
|
107
|
+
HTTP response produced by the client.
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _response_excerpt(
|
|
115
|
+
resp: Response | None,
|
|
116
|
+
) -> str:
|
|
117
|
+
"""
|
|
118
|
+
Return a short excerpt of ``resp.text`` for diagnostics.
|
|
119
|
+
|
|
120
|
+
Parameters
|
|
121
|
+
----------
|
|
122
|
+
resp : Response | None
|
|
123
|
+
The HTTP response object.
|
|
124
|
+
|
|
125
|
+
Returns
|
|
126
|
+
-------
|
|
127
|
+
str
|
|
128
|
+
The first ``MAX_LOG_BODY`` characters of the response body.
|
|
129
|
+
"""
|
|
130
|
+
return _truncate(resp.text if resp is not None else '')
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _truncate(
|
|
134
|
+
text: str | None,
|
|
135
|
+
*,
|
|
136
|
+
limit: int = MAX_LOG_BODY,
|
|
137
|
+
) -> str:
|
|
138
|
+
"""
|
|
139
|
+
Return ``text`` shortened to ``limit`` characters for logging.
|
|
140
|
+
|
|
141
|
+
Parameters
|
|
142
|
+
----------
|
|
143
|
+
text : str | None
|
|
144
|
+
The text to truncate.
|
|
145
|
+
limit : int, optional
|
|
146
|
+
The maximum length of the returned string (default is
|
|
147
|
+
``MAX_LOG_BODY``).
|
|
148
|
+
|
|
149
|
+
Returns
|
|
150
|
+
-------
|
|
151
|
+
str
|
|
152
|
+
The truncated text.
|
|
153
|
+
"""
|
|
154
|
+
if not text:
|
|
155
|
+
return ''
|
|
156
|
+
return text[:limit]
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
# SECTION: CLASSES ========================================================== #
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
@dataclass(slots=True, repr=False, eq=False, kw_only=True)
|
|
163
|
+
class EndpointCredentialsBearer(AuthBase):
|
|
164
|
+
"""
|
|
165
|
+
Bearer token authentication via the OAuth2 Client Credentials flow.
|
|
166
|
+
|
|
167
|
+
Summary
|
|
168
|
+
-------
|
|
169
|
+
Implements ``requests`` ``AuthBase`` to lazily obtain and refresh an
|
|
170
|
+
access token, adding ``Authorization: Bearer <token>`` to outgoing
|
|
171
|
+
requests. A small clock skew avoids edge-of-expiry races.
|
|
172
|
+
|
|
173
|
+
Parameters
|
|
174
|
+
----------
|
|
175
|
+
token_url : str
|
|
176
|
+
OAuth2 token endpoint URL.
|
|
177
|
+
client_id : str
|
|
178
|
+
OAuth2 client ID.
|
|
179
|
+
client_secret : str
|
|
180
|
+
OAuth2 client secret.
|
|
181
|
+
scope : str | None, optional
|
|
182
|
+
Optional OAuth2 scope string.
|
|
183
|
+
|
|
184
|
+
Attributes
|
|
185
|
+
----------
|
|
186
|
+
token_url : str
|
|
187
|
+
OAuth2 token endpoint URL.
|
|
188
|
+
client_id : str
|
|
189
|
+
OAuth2 client ID.
|
|
190
|
+
client_secret : str
|
|
191
|
+
OAuth2 client secret.
|
|
192
|
+
scope : str | None
|
|
193
|
+
Optional OAuth2 scope string.
|
|
194
|
+
token : str | None
|
|
195
|
+
Current access token (``None`` until first successful request).
|
|
196
|
+
expiry : float
|
|
197
|
+
UNIX timestamp when the token expires.
|
|
198
|
+
timeout : float
|
|
199
|
+
Timeout in seconds for token requests (defaults to
|
|
200
|
+
``DEFAULT_TOKEN_TIMEOUT``).
|
|
201
|
+
session : requests.Session | None
|
|
202
|
+
Optional session used for token requests to leverage connection
|
|
203
|
+
pooling and shared auth state. Falls back to the module-level
|
|
204
|
+
``requests`` functions when ``None``.
|
|
205
|
+
|
|
206
|
+
Notes
|
|
207
|
+
-----
|
|
208
|
+
- Tokens are refreshed when remaining lifetime < ``CLOCK_SKEW_SEC``.
|
|
209
|
+
- Network/HTTP errors propagate as ``requests`` exceptions from
|
|
210
|
+
``_ensure_token``.
|
|
211
|
+
- Missing ``access_token`` in a successful response raises
|
|
212
|
+
``RuntimeError``.
|
|
213
|
+
"""
|
|
214
|
+
|
|
215
|
+
# -- Attributes -- #
|
|
216
|
+
|
|
217
|
+
token_url: str
|
|
218
|
+
client_id: str
|
|
219
|
+
client_secret: str
|
|
220
|
+
scope: str | None = None
|
|
221
|
+
token: str | None = None
|
|
222
|
+
expiry: float = 0.0
|
|
223
|
+
timeout: float = DEFAULT_TOKEN_TIMEOUT
|
|
224
|
+
session: requests.Session | None = None
|
|
225
|
+
|
|
226
|
+
# -- Magic Methods (Object Behavior) -- #
|
|
227
|
+
|
|
228
|
+
def __call__(
|
|
229
|
+
self,
|
|
230
|
+
r: PreparedRequest,
|
|
231
|
+
) -> PreparedRequest:
|
|
232
|
+
"""
|
|
233
|
+
Attach an Authorization header to an outgoing request.
|
|
234
|
+
|
|
235
|
+
Ensures a valid access token is available, refreshing when
|
|
236
|
+
necessary, and sets ``Authorization: Bearer <token>`` on the
|
|
237
|
+
provided request object.
|
|
238
|
+
|
|
239
|
+
Parameters
|
|
240
|
+
----------
|
|
241
|
+
r : PreparedRequest
|
|
242
|
+
The request object that will be sent by ``requests``.
|
|
243
|
+
|
|
244
|
+
Returns
|
|
245
|
+
-------
|
|
246
|
+
PreparedRequest
|
|
247
|
+
The same request with the Authorization header set.
|
|
248
|
+
"""
|
|
249
|
+
self._ensure_token()
|
|
250
|
+
r.headers['Authorization'] = f'Bearer {self.token}'
|
|
251
|
+
return r
|
|
252
|
+
|
|
253
|
+
# -- Internal Instance Methods -- #
|
|
254
|
+
|
|
255
|
+
def _ensure_token(self) -> None:
|
|
256
|
+
"""
|
|
257
|
+
Fetch or refresh the bearer token if expired or missing.
|
|
258
|
+
|
|
259
|
+
Uses the OAuth2 Client Credentials flow against ``token_url``.
|
|
260
|
+
Applies a small clock skew to avoid edge-of-expiry races.
|
|
261
|
+
|
|
262
|
+
Returns
|
|
263
|
+
-------
|
|
264
|
+
None
|
|
265
|
+
This method mutates ``token`` and ``expiry`` in place.
|
|
266
|
+
|
|
267
|
+
Notes
|
|
268
|
+
-----
|
|
269
|
+
Exceptions raised by the underlying HTTP call propagate directly.
|
|
270
|
+
"""
|
|
271
|
+
if self._token_valid():
|
|
272
|
+
return
|
|
273
|
+
|
|
274
|
+
response = self._request_token()
|
|
275
|
+
self.token = response['access_token']
|
|
276
|
+
ttl = float(response.get('expires_in', DEFAULT_TOKEN_TTL))
|
|
277
|
+
self.expiry = time.time() + max(ttl, 0.0)
|
|
278
|
+
|
|
279
|
+
def _http_client(self) -> _TokenHttpClient:
|
|
280
|
+
"""Return the configured HTTP session or the module-level client."""
|
|
281
|
+
client = self.session or requests
|
|
282
|
+
return cast(_TokenHttpClient, client)
|
|
283
|
+
|
|
284
|
+
def _parse_token_response(
|
|
285
|
+
self,
|
|
286
|
+
resp: Response,
|
|
287
|
+
) -> _TokenResponse:
|
|
288
|
+
"""
|
|
289
|
+
Validate the JSON token response and return a typed mapping.
|
|
290
|
+
|
|
291
|
+
Parameters
|
|
292
|
+
----------
|
|
293
|
+
resp : Response
|
|
294
|
+
The HTTP response from the token endpoint.
|
|
295
|
+
|
|
296
|
+
Returns
|
|
297
|
+
-------
|
|
298
|
+
_TokenResponse
|
|
299
|
+
Parsed token response mapping.
|
|
300
|
+
|
|
301
|
+
Raises
|
|
302
|
+
------
|
|
303
|
+
ValueError
|
|
304
|
+
When the response is not valid JSON or not a JSON object.
|
|
305
|
+
RuntimeError
|
|
306
|
+
When the response is missing the ``access_token`` field.
|
|
307
|
+
"""
|
|
308
|
+
try:
|
|
309
|
+
payload: Any = resp.json()
|
|
310
|
+
except ValueError:
|
|
311
|
+
logger.error(
|
|
312
|
+
'Token response is not valid JSON. Body: %s',
|
|
313
|
+
_truncate(resp.text),
|
|
314
|
+
)
|
|
315
|
+
raise
|
|
316
|
+
|
|
317
|
+
if not isinstance(payload, Mapping):
|
|
318
|
+
logger.error(
|
|
319
|
+
'Token response is not a JSON object (type=%s)',
|
|
320
|
+
type(payload).__name__,
|
|
321
|
+
)
|
|
322
|
+
raise ValueError('Token response must be a JSON object')
|
|
323
|
+
|
|
324
|
+
token = payload.get('access_token')
|
|
325
|
+
if not isinstance(token, str) or not token:
|
|
326
|
+
logger.error(
|
|
327
|
+
'Token response missing "access_token". Keys: %s',
|
|
328
|
+
list(payload.keys()),
|
|
329
|
+
)
|
|
330
|
+
raise RuntimeError('Missing access_token in token response')
|
|
331
|
+
|
|
332
|
+
raw_ttl = payload.get('expires_in', DEFAULT_TOKEN_TTL)
|
|
333
|
+
try:
|
|
334
|
+
ttl = float(raw_ttl)
|
|
335
|
+
except (TypeError, ValueError):
|
|
336
|
+
ttl = float(DEFAULT_TOKEN_TTL)
|
|
337
|
+
|
|
338
|
+
return _TokenResponse(access_token=token, expires_in=ttl)
|
|
339
|
+
|
|
340
|
+
def _request_token(self) -> _TokenResponse:
|
|
341
|
+
"""
|
|
342
|
+
Execute the OAuth2 token request and parse the response.
|
|
343
|
+
|
|
344
|
+
Returns
|
|
345
|
+
-------
|
|
346
|
+
_TokenResponse
|
|
347
|
+
Parsed token response mapping.
|
|
348
|
+
|
|
349
|
+
Raises
|
|
350
|
+
------
|
|
351
|
+
requests.exceptions.Timeout
|
|
352
|
+
On request timeout.
|
|
353
|
+
requests.exceptions.SSLError
|
|
354
|
+
On TLS/SSL errors.
|
|
355
|
+
requests.exceptions.ConnectionError
|
|
356
|
+
On network connection errors.
|
|
357
|
+
requests.exceptions.HTTPError
|
|
358
|
+
On HTTP errors (4xx/5xx responses).
|
|
359
|
+
requests.exceptions.RequestException
|
|
360
|
+
On network/HTTP errors during the token request.
|
|
361
|
+
"""
|
|
362
|
+
client = self._http_client()
|
|
363
|
+
try:
|
|
364
|
+
resp = client.post(
|
|
365
|
+
self.token_url,
|
|
366
|
+
data=self._token_payload(),
|
|
367
|
+
auth=(self.client_id, self.client_secret),
|
|
368
|
+
headers=self._token_headers(),
|
|
369
|
+
timeout=self.timeout,
|
|
370
|
+
)
|
|
371
|
+
resp.raise_for_status()
|
|
372
|
+
except requests.exceptions.Timeout:
|
|
373
|
+
logger.error(
|
|
374
|
+
'Token request timed out (url=%s)',
|
|
375
|
+
self.token_url,
|
|
376
|
+
)
|
|
377
|
+
raise
|
|
378
|
+
except requests.exceptions.SSLError:
|
|
379
|
+
logger.error(
|
|
380
|
+
'TLS/SSL error contacting token endpoint (url=%s)',
|
|
381
|
+
self.token_url,
|
|
382
|
+
)
|
|
383
|
+
raise
|
|
384
|
+
except requests.exceptions.ConnectionError:
|
|
385
|
+
logger.error(
|
|
386
|
+
'Network connection error (url=%s)',
|
|
387
|
+
self.token_url,
|
|
388
|
+
)
|
|
389
|
+
raise
|
|
390
|
+
except requests.exceptions.HTTPError as e:
|
|
391
|
+
body = _response_excerpt(e.response)
|
|
392
|
+
code = getattr(e.response, 'status_code', 'N/A')
|
|
393
|
+
logger.error(
|
|
394
|
+
'Token endpoint returned HTTP %s. Body: %s',
|
|
395
|
+
code,
|
|
396
|
+
body,
|
|
397
|
+
)
|
|
398
|
+
raise
|
|
399
|
+
except requests.exceptions.RequestException:
|
|
400
|
+
logger.exception(
|
|
401
|
+
'Unexpected error requesting token (url=%s)',
|
|
402
|
+
self.token_url,
|
|
403
|
+
)
|
|
404
|
+
raise
|
|
405
|
+
|
|
406
|
+
return self._parse_token_response(resp)
|
|
407
|
+
|
|
408
|
+
def _token_headers(self) -> Mapping[str, str]:
|
|
409
|
+
"""Return headers for the token request."""
|
|
410
|
+
return FORM_HEADERS
|
|
411
|
+
|
|
412
|
+
def _token_payload(self) -> dict[str, str]:
|
|
413
|
+
"""Build the minimal OAuth2 client credentials payload."""
|
|
414
|
+
payload = {
|
|
415
|
+
'grant_type': 'client_credentials',
|
|
416
|
+
}
|
|
417
|
+
if isinstance(self.scope, str) and self.scope.strip():
|
|
418
|
+
payload['scope'] = self.scope
|
|
419
|
+
return payload
|
|
420
|
+
|
|
421
|
+
def _token_valid(self) -> bool:
|
|
422
|
+
"""
|
|
423
|
+
Return ``True`` when the cached token is usable.
|
|
424
|
+
|
|
425
|
+
Returns
|
|
426
|
+
-------
|
|
427
|
+
bool
|
|
428
|
+
``True`` when a token is present and not expired.
|
|
429
|
+
"""
|
|
430
|
+
return self.token is not None and time.time() < (
|
|
431
|
+
self.expiry - CLOCK_SKEW_SEC
|
|
432
|
+
)
|