etlplus 0.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. etlplus/__init__.py +43 -0
  2. etlplus/__main__.py +22 -0
  3. etlplus/__version__.py +14 -0
  4. etlplus/api/README.md +237 -0
  5. etlplus/api/__init__.py +136 -0
  6. etlplus/api/auth.py +432 -0
  7. etlplus/api/config.py +633 -0
  8. etlplus/api/endpoint_client.py +885 -0
  9. etlplus/api/errors.py +170 -0
  10. etlplus/api/pagination/__init__.py +47 -0
  11. etlplus/api/pagination/client.py +188 -0
  12. etlplus/api/pagination/config.py +440 -0
  13. etlplus/api/pagination/paginator.py +775 -0
  14. etlplus/api/rate_limiting/__init__.py +38 -0
  15. etlplus/api/rate_limiting/config.py +343 -0
  16. etlplus/api/rate_limiting/rate_limiter.py +266 -0
  17. etlplus/api/request_manager.py +589 -0
  18. etlplus/api/retry_manager.py +430 -0
  19. etlplus/api/transport.py +325 -0
  20. etlplus/api/types.py +172 -0
  21. etlplus/cli/__init__.py +15 -0
  22. etlplus/cli/app.py +1367 -0
  23. etlplus/cli/handlers.py +775 -0
  24. etlplus/cli/main.py +616 -0
  25. etlplus/config/__init__.py +56 -0
  26. etlplus/config/connector.py +372 -0
  27. etlplus/config/jobs.py +311 -0
  28. etlplus/config/pipeline.py +339 -0
  29. etlplus/config/profile.py +78 -0
  30. etlplus/config/types.py +204 -0
  31. etlplus/config/utils.py +120 -0
  32. etlplus/ddl.py +197 -0
  33. etlplus/enums.py +414 -0
  34. etlplus/extract.py +218 -0
  35. etlplus/file.py +657 -0
  36. etlplus/load.py +336 -0
  37. etlplus/mixins.py +62 -0
  38. etlplus/py.typed +0 -0
  39. etlplus/run.py +368 -0
  40. etlplus/run_helpers.py +843 -0
  41. etlplus/templates/__init__.py +5 -0
  42. etlplus/templates/ddl.sql.j2 +128 -0
  43. etlplus/templates/view.sql.j2 +69 -0
  44. etlplus/transform.py +1049 -0
  45. etlplus/types.py +227 -0
  46. etlplus/utils.py +638 -0
  47. etlplus/validate.py +493 -0
  48. etlplus/validation/__init__.py +44 -0
  49. etlplus/validation/utils.py +389 -0
  50. etlplus-0.5.4.dist-info/METADATA +616 -0
  51. etlplus-0.5.4.dist-info/RECORD +55 -0
  52. etlplus-0.5.4.dist-info/WHEEL +5 -0
  53. etlplus-0.5.4.dist-info/entry_points.txt +2 -0
  54. etlplus-0.5.4.dist-info/licenses/LICENSE +21 -0
  55. etlplus-0.5.4.dist-info/top_level.txt +1 -0
etlplus/api/auth.py ADDED
@@ -0,0 +1,432 @@
1
+ """
2
+ :mod:`etlplus.api.auth` module.
3
+
4
+ Bearer token authentication for REST APIs using the OAuth2 Client Credentials
5
+ flow.
6
+
7
+ Summary
8
+ -------
9
+ Use :class:`EndpointCredentialsBearer` with ``requests`` to add
10
+ ``Authorization: Bearer <token>`` headers. Tokens are fetched and refreshed
11
+ on demand with a small clock skew to avoid edge-of-expiry races.
12
+
13
+ Notes
14
+ -----
15
+ - Tokens are refreshed when remaining lifetime < ``CLOCK_SKEW_SEC`` seconds.
16
+ - Network/HTTP errors are surfaced from ``requests`` with concise logging.
17
+
18
+ Examples
19
+ --------
20
+ Basic usage with ``requests.Session``
21
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
22
+ >>> from etlplus.api import EndpointCredentialsBearer
23
+ >>> auth = EndpointCredentialsBearer(
24
+ ... token_url="https://auth.example.com/oauth2/token",
25
+ ... client_id="id",
26
+ ... client_secret="secret",
27
+ ... scope="read",
28
+ ... )
29
+ >>> import requests
30
+ >>> s = requests.Session()
31
+ >>> s.auth = auth
32
+ >>> r = s.get("https://api.example.com/v1/items")
33
+ >>> r.raise_for_status()
34
+ """
35
+
36
+ from __future__ import annotations
37
+
38
+ import logging
39
+ import time
40
+ from collections.abc import Mapping
41
+ from dataclasses import dataclass
42
+ from types import MappingProxyType
43
+ from typing import Any
44
+ from typing import Protocol
45
+ from typing import TypedDict
46
+ from typing import cast
47
+
48
+ import requests # type: ignore[import]
49
+ from requests import PreparedRequest # type: ignore
50
+ from requests import Response # type: ignore
51
+ from requests.auth import AuthBase # type: ignore
52
+
53
+ from .types import Url
54
+
55
+ logger = logging.getLogger(__name__)
56
+
57
+
58
+ # SECTION: EXPORTS ========================================================== #
59
+
60
+
61
+ __all__ = ['EndpointCredentialsBearer']
62
+
63
+
64
+ # SECTION: CONSTANTS ======================================================== #
65
+
66
+
67
+ CLOCK_SKEW_SEC = 30
68
+ DEFAULT_TOKEN_TTL = 3600
69
+ DEFAULT_TOKEN_TIMEOUT = 15.0
70
+ MAX_LOG_BODY = 500
71
+ FORM_HEADERS = MappingProxyType(
72
+ {'Content-Type': 'application/x-www-form-urlencoded'},
73
+ )
74
+
75
+
76
+ # SECTION: TYPED DICTS ====================================================== #
77
+
78
+
79
+ class _TokenResponse(TypedDict):
80
+ """Minimal shape of an OAuth token response body."""
81
+
82
+ access_token: str
83
+ expires_in: int | float
84
+
85
+
86
+ class _TokenHttpClient(Protocol):
87
+ """Protocol for objects that expose a ``post`` helper like ``requests``."""
88
+
89
+ def post(
90
+ self,
91
+ url: Url,
92
+ **kwargs: Any,
93
+ ) -> Response:
94
+ """
95
+ Issue an HTTP POST request and return the response object.
96
+
97
+ Parameters
98
+ ----------
99
+ url : Url
100
+ The URL to which the request is sent.
101
+ **kwargs : Any
102
+ Arbitrary request keyword arguments (payload, headers, timeout).
103
+
104
+ Returns
105
+ -------
106
+ Response
107
+ HTTP response produced by the client.
108
+ """
109
+
110
+
111
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
112
+
113
+
114
+ def _response_excerpt(
115
+ resp: Response | None,
116
+ ) -> str:
117
+ """
118
+ Return a short excerpt of ``resp.text`` for diagnostics.
119
+
120
+ Parameters
121
+ ----------
122
+ resp : Response | None
123
+ The HTTP response object.
124
+
125
+ Returns
126
+ -------
127
+ str
128
+ The first ``MAX_LOG_BODY`` characters of the response body.
129
+ """
130
+ return _truncate(resp.text if resp is not None else '')
131
+
132
+
133
+ def _truncate(
134
+ text: str | None,
135
+ *,
136
+ limit: int = MAX_LOG_BODY,
137
+ ) -> str:
138
+ """
139
+ Return ``text`` shortened to ``limit`` characters for logging.
140
+
141
+ Parameters
142
+ ----------
143
+ text : str | None
144
+ The text to truncate.
145
+ limit : int, optional
146
+ The maximum length of the returned string (default is
147
+ ``MAX_LOG_BODY``).
148
+
149
+ Returns
150
+ -------
151
+ str
152
+ The truncated text.
153
+ """
154
+ if not text:
155
+ return ''
156
+ return text[:limit]
157
+
158
+
159
+ # SECTION: CLASSES ========================================================== #
160
+
161
+
162
+ @dataclass(slots=True, repr=False, eq=False, kw_only=True)
163
+ class EndpointCredentialsBearer(AuthBase):
164
+ """
165
+ Bearer token authentication via the OAuth2 Client Credentials flow.
166
+
167
+ Summary
168
+ -------
169
+ Implements ``requests`` ``AuthBase`` to lazily obtain and refresh an
170
+ access token, adding ``Authorization: Bearer <token>`` to outgoing
171
+ requests. A small clock skew avoids edge-of-expiry races.
172
+
173
+ Parameters
174
+ ----------
175
+ token_url : str
176
+ OAuth2 token endpoint URL.
177
+ client_id : str
178
+ OAuth2 client ID.
179
+ client_secret : str
180
+ OAuth2 client secret.
181
+ scope : str | None, optional
182
+ Optional OAuth2 scope string.
183
+
184
+ Attributes
185
+ ----------
186
+ token_url : str
187
+ OAuth2 token endpoint URL.
188
+ client_id : str
189
+ OAuth2 client ID.
190
+ client_secret : str
191
+ OAuth2 client secret.
192
+ scope : str | None
193
+ Optional OAuth2 scope string.
194
+ token : str | None
195
+ Current access token (``None`` until first successful request).
196
+ expiry : float
197
+ UNIX timestamp when the token expires.
198
+ timeout : float
199
+ Timeout in seconds for token requests (defaults to
200
+ ``DEFAULT_TOKEN_TIMEOUT``).
201
+ session : requests.Session | None
202
+ Optional session used for token requests to leverage connection
203
+ pooling and shared auth state. Falls back to the module-level
204
+ ``requests`` functions when ``None``.
205
+
206
+ Notes
207
+ -----
208
+ - Tokens are refreshed when remaining lifetime < ``CLOCK_SKEW_SEC``.
209
+ - Network/HTTP errors propagate as ``requests`` exceptions from
210
+ ``_ensure_token``.
211
+ - Missing ``access_token`` in a successful response raises
212
+ ``RuntimeError``.
213
+ """
214
+
215
+ # -- Attributes -- #
216
+
217
+ token_url: str
218
+ client_id: str
219
+ client_secret: str
220
+ scope: str | None = None
221
+ token: str | None = None
222
+ expiry: float = 0.0
223
+ timeout: float = DEFAULT_TOKEN_TIMEOUT
224
+ session: requests.Session | None = None
225
+
226
+ # -- Magic Methods (Object Behavior) -- #
227
+
228
+ def __call__(
229
+ self,
230
+ r: PreparedRequest,
231
+ ) -> PreparedRequest:
232
+ """
233
+ Attach an Authorization header to an outgoing request.
234
+
235
+ Ensures a valid access token is available, refreshing when
236
+ necessary, and sets ``Authorization: Bearer <token>`` on the
237
+ provided request object.
238
+
239
+ Parameters
240
+ ----------
241
+ r : PreparedRequest
242
+ The request object that will be sent by ``requests``.
243
+
244
+ Returns
245
+ -------
246
+ PreparedRequest
247
+ The same request with the Authorization header set.
248
+ """
249
+ self._ensure_token()
250
+ r.headers['Authorization'] = f'Bearer {self.token}'
251
+ return r
252
+
253
+ # -- Internal Instance Methods -- #
254
+
255
+ def _ensure_token(self) -> None:
256
+ """
257
+ Fetch or refresh the bearer token if expired or missing.
258
+
259
+ Uses the OAuth2 Client Credentials flow against ``token_url``.
260
+ Applies a small clock skew to avoid edge-of-expiry races.
261
+
262
+ Returns
263
+ -------
264
+ None
265
+ This method mutates ``token`` and ``expiry`` in place.
266
+
267
+ Notes
268
+ -----
269
+ Exceptions raised by the underlying HTTP call propagate directly.
270
+ """
271
+ if self._token_valid():
272
+ return
273
+
274
+ response = self._request_token()
275
+ self.token = response['access_token']
276
+ ttl = float(response.get('expires_in', DEFAULT_TOKEN_TTL))
277
+ self.expiry = time.time() + max(ttl, 0.0)
278
+
279
+ def _http_client(self) -> _TokenHttpClient:
280
+ """Return the configured HTTP session or the module-level client."""
281
+ client = self.session or requests
282
+ return cast(_TokenHttpClient, client)
283
+
284
+ def _parse_token_response(
285
+ self,
286
+ resp: Response,
287
+ ) -> _TokenResponse:
288
+ """
289
+ Validate the JSON token response and return a typed mapping.
290
+
291
+ Parameters
292
+ ----------
293
+ resp : Response
294
+ The HTTP response from the token endpoint.
295
+
296
+ Returns
297
+ -------
298
+ _TokenResponse
299
+ Parsed token response mapping.
300
+
301
+ Raises
302
+ ------
303
+ ValueError
304
+ When the response is not valid JSON or not a JSON object.
305
+ RuntimeError
306
+ When the response is missing the ``access_token`` field.
307
+ """
308
+ try:
309
+ payload: Any = resp.json()
310
+ except ValueError:
311
+ logger.error(
312
+ 'Token response is not valid JSON. Body: %s',
313
+ _truncate(resp.text),
314
+ )
315
+ raise
316
+
317
+ if not isinstance(payload, Mapping):
318
+ logger.error(
319
+ 'Token response is not a JSON object (type=%s)',
320
+ type(payload).__name__,
321
+ )
322
+ raise ValueError('Token response must be a JSON object')
323
+
324
+ token = payload.get('access_token')
325
+ if not isinstance(token, str) or not token:
326
+ logger.error(
327
+ 'Token response missing "access_token". Keys: %s',
328
+ list(payload.keys()),
329
+ )
330
+ raise RuntimeError('Missing access_token in token response')
331
+
332
+ raw_ttl = payload.get('expires_in', DEFAULT_TOKEN_TTL)
333
+ try:
334
+ ttl = float(raw_ttl)
335
+ except (TypeError, ValueError):
336
+ ttl = float(DEFAULT_TOKEN_TTL)
337
+
338
+ return _TokenResponse(access_token=token, expires_in=ttl)
339
+
340
+ def _request_token(self) -> _TokenResponse:
341
+ """
342
+ Execute the OAuth2 token request and parse the response.
343
+
344
+ Returns
345
+ -------
346
+ _TokenResponse
347
+ Parsed token response mapping.
348
+
349
+ Raises
350
+ ------
351
+ requests.exceptions.Timeout
352
+ On request timeout.
353
+ requests.exceptions.SSLError
354
+ On TLS/SSL errors.
355
+ requests.exceptions.ConnectionError
356
+ On network connection errors.
357
+ requests.exceptions.HTTPError
358
+ On HTTP errors (4xx/5xx responses).
359
+ requests.exceptions.RequestException
360
+ On network/HTTP errors during the token request.
361
+ """
362
+ client = self._http_client()
363
+ try:
364
+ resp = client.post(
365
+ self.token_url,
366
+ data=self._token_payload(),
367
+ auth=(self.client_id, self.client_secret),
368
+ headers=self._token_headers(),
369
+ timeout=self.timeout,
370
+ )
371
+ resp.raise_for_status()
372
+ except requests.exceptions.Timeout:
373
+ logger.error(
374
+ 'Token request timed out (url=%s)',
375
+ self.token_url,
376
+ )
377
+ raise
378
+ except requests.exceptions.SSLError:
379
+ logger.error(
380
+ 'TLS/SSL error contacting token endpoint (url=%s)',
381
+ self.token_url,
382
+ )
383
+ raise
384
+ except requests.exceptions.ConnectionError:
385
+ logger.error(
386
+ 'Network connection error (url=%s)',
387
+ self.token_url,
388
+ )
389
+ raise
390
+ except requests.exceptions.HTTPError as e:
391
+ body = _response_excerpt(e.response)
392
+ code = getattr(e.response, 'status_code', 'N/A')
393
+ logger.error(
394
+ 'Token endpoint returned HTTP %s. Body: %s',
395
+ code,
396
+ body,
397
+ )
398
+ raise
399
+ except requests.exceptions.RequestException:
400
+ logger.exception(
401
+ 'Unexpected error requesting token (url=%s)',
402
+ self.token_url,
403
+ )
404
+ raise
405
+
406
+ return self._parse_token_response(resp)
407
+
408
+ def _token_headers(self) -> Mapping[str, str]:
409
+ """Return headers for the token request."""
410
+ return FORM_HEADERS
411
+
412
+ def _token_payload(self) -> dict[str, str]:
413
+ """Build the minimal OAuth2 client credentials payload."""
414
+ payload = {
415
+ 'grant_type': 'client_credentials',
416
+ }
417
+ if isinstance(self.scope, str) and self.scope.strip():
418
+ payload['scope'] = self.scope
419
+ return payload
420
+
421
+ def _token_valid(self) -> bool:
422
+ """
423
+ Return ``True`` when the cached token is usable.
424
+
425
+ Returns
426
+ -------
427
+ bool
428
+ ``True`` when a token is present and not expired.
429
+ """
430
+ return self.token is not None and time.time() < (
431
+ self.expiry - CLOCK_SKEW_SEC
432
+ )