etlplus 0.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. etlplus/__init__.py +43 -0
  2. etlplus/__main__.py +22 -0
  3. etlplus/__version__.py +14 -0
  4. etlplus/api/README.md +237 -0
  5. etlplus/api/__init__.py +136 -0
  6. etlplus/api/auth.py +432 -0
  7. etlplus/api/config.py +633 -0
  8. etlplus/api/endpoint_client.py +885 -0
  9. etlplus/api/errors.py +170 -0
  10. etlplus/api/pagination/__init__.py +47 -0
  11. etlplus/api/pagination/client.py +188 -0
  12. etlplus/api/pagination/config.py +440 -0
  13. etlplus/api/pagination/paginator.py +775 -0
  14. etlplus/api/rate_limiting/__init__.py +38 -0
  15. etlplus/api/rate_limiting/config.py +343 -0
  16. etlplus/api/rate_limiting/rate_limiter.py +266 -0
  17. etlplus/api/request_manager.py +589 -0
  18. etlplus/api/retry_manager.py +430 -0
  19. etlplus/api/transport.py +325 -0
  20. etlplus/api/types.py +172 -0
  21. etlplus/cli/__init__.py +15 -0
  22. etlplus/cli/app.py +1367 -0
  23. etlplus/cli/handlers.py +775 -0
  24. etlplus/cli/main.py +616 -0
  25. etlplus/config/__init__.py +56 -0
  26. etlplus/config/connector.py +372 -0
  27. etlplus/config/jobs.py +311 -0
  28. etlplus/config/pipeline.py +339 -0
  29. etlplus/config/profile.py +78 -0
  30. etlplus/config/types.py +204 -0
  31. etlplus/config/utils.py +120 -0
  32. etlplus/ddl.py +197 -0
  33. etlplus/enums.py +414 -0
  34. etlplus/extract.py +218 -0
  35. etlplus/file.py +657 -0
  36. etlplus/load.py +336 -0
  37. etlplus/mixins.py +62 -0
  38. etlplus/py.typed +0 -0
  39. etlplus/run.py +368 -0
  40. etlplus/run_helpers.py +843 -0
  41. etlplus/templates/__init__.py +5 -0
  42. etlplus/templates/ddl.sql.j2 +128 -0
  43. etlplus/templates/view.sql.j2 +69 -0
  44. etlplus/transform.py +1049 -0
  45. etlplus/types.py +227 -0
  46. etlplus/utils.py +638 -0
  47. etlplus/validate.py +493 -0
  48. etlplus/validation/__init__.py +44 -0
  49. etlplus/validation/utils.py +389 -0
  50. etlplus-0.5.4.dist-info/METADATA +616 -0
  51. etlplus-0.5.4.dist-info/RECORD +55 -0
  52. etlplus-0.5.4.dist-info/WHEEL +5 -0
  53. etlplus-0.5.4.dist-info/entry_points.txt +2 -0
  54. etlplus-0.5.4.dist-info/licenses/LICENSE +21 -0
  55. etlplus-0.5.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,325 @@
1
+ """
2
+ :mod:`etlplus.api.transport` module.
3
+
4
+ Configure ``requests`` ``HTTPAdapter`` instances with connection pooling and
5
+ optional ``urllib3`` retry behavior.
6
+
7
+ Summary
8
+ -------
9
+ ``build_http_adapter`` accepts a lightweight mapping and translates it into
10
+ an ``HTTPAdapter``. When a retry dict is provided, it is mapped to
11
+ ``urllib3.util.retry.Retry`` where available; otherwise, falls back to an
12
+ integer retry count or no retries.
13
+
14
+ Examples
15
+ --------
16
+ >>> from etlplus.api import build_http_adapter
17
+ >>> cfg = {
18
+ ... "pool_connections": 10,
19
+ ... "pool_maxsize": 10,
20
+ ... "pool_block": False,
21
+ ... "max_retries": {"total": 3, "backoff_factor": 0.5},
22
+ ... }
23
+ >>> adapter = build_http_adapter(cfg)
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ from collections.abc import Mapping
29
+ from collections.abc import Sequence
30
+ from typing import Any
31
+ from typing import TypedDict
32
+
33
+ import requests # type: ignore[import]
34
+ from requests.adapters import HTTPAdapter # type: ignore
35
+
36
+ from ..utils import to_maximum_int
37
+ from ..utils import to_positive_int
38
+
39
+ # SECTION: EXPORTS ========================================================== #
40
+
41
+
42
+ __all__ = [
43
+ # Classes
44
+ 'HTTPAdapterMountConfig',
45
+ 'HTTPAdapterRetryConfig',
46
+ # Functions
47
+ 'build_http_adapter',
48
+ 'build_session_with_adapters',
49
+ ]
50
+
51
+
52
+ # SECTION: TYPED DICTS ====================================================== #
53
+
54
+
55
+ class HTTPAdapterRetryConfig(TypedDict, total=False):
56
+ """
57
+ Retry configuration for urllib3 ``Retry``.
58
+
59
+ Used by requests' ``HTTPAdapter``.
60
+
61
+ Summary
62
+ -------
63
+ Keys mirror the ``Retry`` constructor where relevant. All keys are
64
+ optional; omit any you don't need. When converted downstream, collection-
65
+ valued fields are normalized to tuples/frozensets.
66
+
67
+ Attributes
68
+ ----------
69
+ total : int
70
+ Retry counters matching urllib3 semantics.
71
+ connect : int
72
+ Number of connection-related retries.
73
+ read : int
74
+ Number of read-related retries.
75
+ redirect : int
76
+ Number of redirect-related retries.
77
+ status : int
78
+ Number of status-related retries.
79
+ backoff_factor : float
80
+ Base factor for exponential backoff between attempts.
81
+ status_forcelist : list[int] | tuple[int, ...]
82
+ HTTP status codes that should always be retried.
83
+ allowed_methods : list[str] | set[str] | tuple[str, ...]
84
+ Idempotent HTTP methods eligible for retry.
85
+ raise_on_status : bool
86
+ Whether to raise after exhausting status-based retries.
87
+ respect_retry_after_header : bool
88
+ Honor ``Retry-After`` response headers when present.
89
+
90
+ Examples
91
+ --------
92
+ >>> retry_cfg: HTTPAdapterRetryConfig = {
93
+ ... 'total': 5,
94
+ ... 'backoff_factor': 0.5,
95
+ ... 'status_forcelist': [429, 503],
96
+ ... 'allowed_methods': ['GET'],
97
+ ... }
98
+ """
99
+
100
+ # -- Attributes -- #
101
+
102
+ total: int
103
+ connect: int
104
+ read: int
105
+ redirect: int
106
+ status: int
107
+ backoff_factor: float
108
+ status_forcelist: list[int] | tuple[int, ...]
109
+ allowed_methods: list[str] | set[str] | tuple[str, ...]
110
+ raise_on_status: bool
111
+ respect_retry_after_header: bool
112
+
113
+
114
+ class HTTPAdapterMountConfig(TypedDict, total=False):
115
+ """
116
+ Configuration mapping for mounting an ``HTTPAdapter`` on a ``Session``.
117
+
118
+ Summary
119
+ -------
120
+ Provides connection pooling and optional retry behavior. Values are
121
+ forwarded into ``HTTPAdapter`` and, when a retry dict is supplied,
122
+ converted to a ``Retry`` instance where supported.
123
+
124
+ Attributes
125
+ ----------
126
+ prefix : str
127
+ Prefix to mount the adapter on (e.g., ``'https://'`` or specific base).
128
+ pool_connections : int
129
+ Number of urllib3 connection pools to cache.
130
+ pool_maxsize : int
131
+ Maximum connections per pool.
132
+ pool_block : bool
133
+ Whether the pool should block for connections instead of creating new
134
+ ones.
135
+ max_retries : int | HTTPAdapterRetryConfig
136
+ Retry configuration passed to ``HTTPAdapter`` (int) or converted to
137
+ ``Retry``.
138
+
139
+ Examples
140
+ --------
141
+ >>> adapter_cfg: HTTPAdapterMountConfig = {
142
+ ... 'prefix': 'https://',
143
+ ... 'pool_connections': 10,
144
+ ... 'pool_maxsize': 10,
145
+ ... 'pool_block': False,
146
+ ... 'max_retries': {
147
+ ... 'total': 3,
148
+ ... 'backoff_factor': 0.5,
149
+ ... },
150
+ ... }
151
+ """
152
+
153
+ # -- Attributes -- #
154
+
155
+ prefix: str
156
+ pool_connections: int
157
+ pool_maxsize: int
158
+ pool_block: bool
159
+ max_retries: int | HTTPAdapterRetryConfig
160
+
161
+
162
+ # SECTION: INTERNAL FUNCTIONS ============================================== #
163
+
164
+
165
+ def _build_retry_value(
166
+ config: Mapping[str, Any],
167
+ ) -> int | Any:
168
+ """
169
+ Create an ``urllib3.Retry`` (when available) or integer fallback.
170
+
171
+ Parameters
172
+ ----------
173
+ config : Mapping[str, Any]
174
+ Mapping with urllib3 ``Retry`` kwargs.
175
+
176
+ Returns
177
+ -------
178
+ int | Any
179
+ ``Retry`` instance, ``0`` when config is empty, or integer fallback
180
+ when urllib3 is absent.
181
+ """
182
+ try:
183
+ from urllib3.util.retry import Retry # type: ignore
184
+ except ImportError: # pragma: no cover - optional dependency
185
+ return to_maximum_int(config.get('total'), 0)
186
+
187
+ kwargs = _normalize_retry_kwargs(config)
188
+ return Retry(**kwargs) if kwargs else 0
189
+
190
+
191
+ def _normalize_retry_kwargs(
192
+ retries_cfg: Mapping[str, Any],
193
+ ) -> dict[str, Any]:
194
+ """Filter and normalize urllib3 ``Retry`` kwargs from a mapping."""
195
+ allowed_keys = {
196
+ 'total',
197
+ 'connect',
198
+ 'read',
199
+ 'redirect',
200
+ 'status',
201
+ 'backoff_factor',
202
+ 'status_forcelist',
203
+ 'allowed_methods',
204
+ 'raise_on_status',
205
+ 'respect_retry_after_header',
206
+ }
207
+ normalized: dict[str, Any] = {}
208
+ for key, value in retries_cfg.items():
209
+ if key not in allowed_keys:
210
+ continue
211
+ match key:
212
+ case 'status_forcelist' if isinstance(value, (list, tuple, set)):
213
+ normalized[key] = tuple(value)
214
+ case 'allowed_methods' if isinstance(
215
+ value,
216
+ (list, tuple, set, frozenset),
217
+ ):
218
+ normalized[key] = frozenset(value)
219
+ case _:
220
+ normalized[key] = value
221
+ return normalized
222
+
223
+
224
+ def _resolve_max_retries(
225
+ retries_cfg: object,
226
+ ) -> int | Any:
227
+ """
228
+ Normalize ``max_retries`` values accepted by ``HTTPAdapter``.
229
+
230
+ Parameters
231
+ ----------
232
+ retries_cfg : object
233
+ Raw ``max_retries`` configuration value.
234
+
235
+ Returns
236
+ -------
237
+ int | Any
238
+ Integer retry count or ``Retry`` instance.
239
+ """
240
+ match retries_cfg:
241
+ case int():
242
+ return retries_cfg
243
+ case Mapping():
244
+ try:
245
+ return _build_retry_value(retries_cfg)
246
+ except (TypeError, ValueError, AttributeError):
247
+ return to_maximum_int(retries_cfg.get('total'), 0)
248
+ case _:
249
+ return 0
250
+
251
+
252
+ # SECTION: FUNCTIONS ======================================================== #
253
+
254
+
255
+ def build_http_adapter(
256
+ cfg: Mapping[str, Any],
257
+ ) -> HTTPAdapter:
258
+ """
259
+ Build a requests ``HTTPAdapter`` from a configuration mapping.
260
+
261
+ Supported keys in cfg:
262
+ - pool_connections (int)
263
+ - pool_maxsize (int)
264
+ - pool_block (bool)
265
+ - max_retries (int or dict matching urllib3 ``Retry`` args)
266
+
267
+ When ``max_retries`` is a dict, this attempts to construct an
268
+ ``urllib3.util.retry.Retry`` instance with the provided keys. Unknown
269
+ keys are ignored. If urllib3 is unavailable, falls back to no retries
270
+ (0) or an integer value when provided.
271
+
272
+ Parameters
273
+ ----------
274
+ cfg : Mapping[str, Any]
275
+ Adapter configuration mapping.
276
+
277
+ Returns
278
+ -------
279
+ HTTPAdapter
280
+ Configured HTTPAdapter instance.
281
+ """
282
+ pool_connections = to_positive_int(cfg.get('pool_connections'), 10)
283
+ pool_maxsize = to_positive_int(cfg.get('pool_maxsize'), 10)
284
+ pool_block = bool(cfg.get('pool_block', False))
285
+
286
+ max_retries = _resolve_max_retries(cfg.get('max_retries'))
287
+
288
+ return HTTPAdapter(
289
+ pool_connections=pool_connections,
290
+ pool_maxsize=pool_maxsize,
291
+ max_retries=max_retries,
292
+ pool_block=pool_block,
293
+ )
294
+
295
+
296
+ def build_session_with_adapters(
297
+ adapters_cfg: Sequence[HTTPAdapterMountConfig],
298
+ ) -> requests.Session:
299
+ """
300
+ Mount adapters described by ``adapters_cfg`` onto a new session.
301
+
302
+ Ignores invalid adapter configurations so that a usable session is always
303
+ returned.
304
+
305
+ Parameters
306
+ ----------
307
+ adapters_cfg : Sequence[HTTPAdapterMountConfig]
308
+ Configuration mappings describing the adapter prefix, pooling
309
+ values, and retry policy for each mounted adapter.
310
+
311
+ Returns
312
+ -------
313
+ requests.Session
314
+ Configured session instance.
315
+ """
316
+ session = requests.Session()
317
+ for cfg in adapters_cfg:
318
+ prefix = cfg.get('prefix', 'https://')
319
+ try:
320
+ adapter = build_http_adapter(cfg)
321
+ session.mount(prefix, adapter)
322
+ except (ValueError, TypeError, AttributeError):
323
+ # Skip invalid adapter configs but still return a usable session.
324
+ continue
325
+ return session
etlplus/api/types.py ADDED
@@ -0,0 +1,172 @@
1
+ """
2
+ :mod:`etlplus.api.types` module.
3
+
4
+ HTTP-centric type aliases for :mod:`etlplus.api` helpers.
5
+
6
+ Notes
7
+ -----
8
+ - Keeps pagination, transport, and higher-level modules decoupled from
9
+ ``typing`` details.
10
+ - Uses ``Mapping`` inputs to accept both ``dict`` and mapping-like objects.
11
+
12
+ Examples
13
+ --------
14
+ >>> from etlplus.api import Url, Headers, Params
15
+ >>> url: Url = 'https://api.example.com/data'
16
+ >>> headers: Headers = {'Authorization': 'Bearer token'}
17
+ >>> params: Params = {'query': 'search term', 'limit': 50}
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ from collections.abc import Callable
23
+ from dataclasses import dataclass
24
+ from typing import Any
25
+ from typing import cast
26
+
27
+ from ..types import JSONData
28
+ from ..types import StrAnyMap
29
+ from ..types import StrStrMap
30
+
31
+ # SECTION: EXPORTS ========================================================== #
32
+
33
+
34
+ __all__ = [
35
+ # Data Classes
36
+ 'RequestOptions',
37
+ # Type Aliases
38
+ 'FetchPageCallable',
39
+ 'Headers',
40
+ 'Params',
41
+ 'Url',
42
+ ]
43
+
44
+
45
+ # SECTION: CONSTANTS ======================================================== #
46
+
47
+
48
+ _UNSET = object()
49
+
50
+
51
+ # SECTION: DATA CLASSES ===================================================== #
52
+
53
+
54
+ @dataclass(frozen=True, kw_only=True, slots=True)
55
+ class RequestOptions:
56
+ """
57
+ Immutable snapshot of per-request options.
58
+
59
+ Attributes
60
+ ----------
61
+ params : Params | None
62
+ Query or body parameters.
63
+ headers : Headers | None
64
+ HTTP headers.
65
+ timeout : float | None
66
+ Request timeout in seconds.
67
+ """
68
+
69
+ # -- Attributes -- #
70
+
71
+ params: Params | None = None
72
+ headers: Headers | None = None
73
+ timeout: float | None = None
74
+
75
+ # -- Magic Methods (Object Lifecycle) -- #
76
+
77
+ def __post_init__(self) -> None:
78
+ if self.params:
79
+ object.__setattr__(self, 'params', dict(self.params))
80
+ if self.headers:
81
+ object.__setattr__(self, 'headers', dict(self.headers))
82
+
83
+ # -- Instance Methods -- #
84
+
85
+ def as_kwargs(self) -> dict[str, Any]:
86
+ """
87
+ Convert options into ``requests``-compatible kwargs.
88
+
89
+ Returns
90
+ -------
91
+ dict[str, Any]
92
+ Keyword arguments for ``requests`` methods.
93
+ """
94
+ kw: dict[str, Any] = {}
95
+ if self.params:
96
+ kw['params'] = dict(self.params)
97
+ if self.headers:
98
+ kw['headers'] = dict(self.headers)
99
+ if self.timeout is not None:
100
+ kw['timeout'] = self.timeout
101
+ return kw
102
+
103
+ def evolve(
104
+ self,
105
+ *,
106
+ params: Params | None | object = _UNSET,
107
+ headers: Headers | None | object = _UNSET,
108
+ timeout: float | None | object = _UNSET,
109
+ ) -> RequestOptions:
110
+ """
111
+ Return a copy with the provided fields replaced.
112
+
113
+ Parameters
114
+ ----------
115
+ params : Params | None | object, optional
116
+ Replacement params mapping. ``None`` clears params. When
117
+ omitted, the existing params are preserved.
118
+ headers : Headers | None | object, optional
119
+ Replacement headers mapping. ``None`` clears headers. When
120
+ omitted, the existing headers are preserved.
121
+ timeout : float | None | object, optional
122
+ Replacement timeout. ``None`` clears the timeout. When
123
+ omitted, the existing timeout is preserved.
124
+
125
+ Returns
126
+ -------
127
+ RequestOptions
128
+ New snapshot reflecting the provided overrides.
129
+ """
130
+ if params is _UNSET:
131
+ next_params = self.params
132
+ elif params is None:
133
+ next_params = None
134
+ else:
135
+ next_params = cast(dict, params)
136
+
137
+ if headers is _UNSET:
138
+ next_headers = self.headers
139
+ elif headers is None:
140
+ next_headers = None
141
+ else:
142
+ next_headers = cast(dict, headers)
143
+
144
+ if timeout is _UNSET:
145
+ next_timeout = self.timeout
146
+ else:
147
+ next_timeout = cast(float | None, timeout)
148
+
149
+ return RequestOptions(
150
+ params=next_params,
151
+ headers=next_headers,
152
+ timeout=next_timeout,
153
+ )
154
+
155
+
156
+ # SECTION: TYPE ALIASES ===================================================== #
157
+
158
+
159
+ # HTTP headers represented as a string-to-string mapping.
160
+ type Headers = StrStrMap
161
+
162
+ # Query or body parameters allowing arbitrary JSON-friendly values.
163
+ type Params = StrAnyMap
164
+
165
+ # Fully qualified resource locator consumed by transport helpers.
166
+ type Url = str
167
+
168
+ # Callable signature used by pagination helpers to fetch data pages.
169
+ type FetchPageCallable = Callable[
170
+ [Url, RequestOptions, int | None],
171
+ JSONData,
172
+ ]
@@ -0,0 +1,15 @@
1
+ """
2
+ :mod:`etlplus.cli` package.
3
+
4
+ This package defines the main command-line interface (CLI) command and
5
+ subcommands for ``etlplus``.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from .main import main
11
+
12
+ # SECTION: EXPORTS ========================================================== #
13
+
14
+
15
+ __all__ = ['main']