etlplus 0.12.10__py3-none-any.whl → 0.14.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/README.md +1 -1
- etlplus/__init__.py +1 -26
- etlplus/api/__init__.py +10 -0
- etlplus/api/config.py +36 -20
- etlplus/api/endpoint_client.py +3 -3
- etlplus/api/enums.py +51 -0
- etlplus/api/pagination/client.py +1 -1
- etlplus/api/rate_limiting/config.py +13 -1
- etlplus/api/rate_limiting/rate_limiter.py +8 -11
- etlplus/api/request_manager.py +11 -6
- etlplus/api/transport.py +14 -2
- etlplus/api/types.py +7 -6
- etlplus/{run_helpers.py → api/utils.py} +205 -153
- etlplus/cli/handlers.py +17 -7
- etlplus/config/jobs.py +14 -4
- etlplus/dag.py +103 -0
- etlplus/enums.py +0 -32
- etlplus/file/cfg.py +2 -2
- etlplus/file/conf.py +2 -2
- etlplus/file/dta.py +77 -0
- etlplus/file/enums.py +10 -4
- etlplus/file/hbs.py +78 -0
- etlplus/file/hdf5.py +78 -0
- etlplus/file/jinja2.py +78 -0
- etlplus/file/mat.py +78 -0
- etlplus/file/mustache.py +78 -0
- etlplus/file/nc.py +78 -0
- etlplus/file/numbers.py +75 -0
- etlplus/file/ods.py +79 -0
- etlplus/file/properties.py +13 -13
- etlplus/file/rda.py +78 -0
- etlplus/file/rds.py +78 -0
- etlplus/file/sas7bdat.py +78 -0
- etlplus/file/sav.py +77 -0
- etlplus/file/sylk.py +77 -0
- etlplus/file/toml.py +1 -1
- etlplus/file/vm.py +78 -0
- etlplus/file/wks.py +77 -0
- etlplus/file/xlsm.py +79 -0
- etlplus/file/xpt.py +78 -0
- etlplus/file/zsav.py +77 -0
- etlplus/{validation → ops}/README.md +2 -2
- etlplus/ops/__init__.py +61 -0
- etlplus/{extract.py → ops/extract.py} +78 -94
- etlplus/{load.py → ops/load.py} +73 -93
- etlplus/{run.py → ops/run.py} +140 -110
- etlplus/{transform.py → ops/transform.py} +75 -68
- etlplus/{validation → ops}/utils.py +80 -15
- etlplus/{validate.py → ops/validate.py} +19 -9
- etlplus/types.py +2 -2
- {etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/METADATA +91 -60
- {etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/RECORD +56 -35
- etlplus/validation/__init__.py +0 -44
- {etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/WHEEL +0 -0
- {etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/entry_points.txt +0 -0
- {etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/top_level.txt +0 -0
|
@@ -1,30 +1,13 @@
|
|
|
1
1
|
"""
|
|
2
|
-
:mod:`etlplus.
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
request/load environments, pagination configs, session objects, and endpoint
|
|
6
|
-
clients. Extracted to keep ``run.py`` focused on orchestration while enabling
|
|
7
|
-
reuse and testability.
|
|
8
|
-
|
|
9
|
-
Public (re-export safe) helpers:
|
|
10
|
-
- build_pagination_cfg(pagination, overrides)
|
|
11
|
-
- build_session(cfg)
|
|
12
|
-
- compose_api_request_env(cfg, source_obj, extract_opts)
|
|
13
|
-
- compose_api_target_env(cfg, target_obj, overrides)
|
|
14
|
-
- build_endpoint_client(base_url, base_path, endpoints, env)
|
|
15
|
-
- compute_rl_sleep_seconds(rate_limit, overrides)
|
|
16
|
-
- paginate_with_client(client, endpoint_key, params, headers,
|
|
17
|
-
timeout, pagination, sleep_seconds)
|
|
18
|
-
|
|
19
|
-
Notes
|
|
20
|
-
-----
|
|
21
|
-
These helpers intentionally accept permissive ``Any``/``Mapping`` inputs to
|
|
22
|
-
avoid tight coupling with config dataclasses while keeping runtime flexible.
|
|
2
|
+
:mod:`etlplus.api.utils` module.
|
|
3
|
+
|
|
4
|
+
Shared HTTP helpers for API clients that communicate with REST endpoints.
|
|
23
5
|
"""
|
|
24
6
|
|
|
25
7
|
from __future__ import annotations
|
|
26
8
|
|
|
27
9
|
import inspect
|
|
10
|
+
from collections.abc import Callable
|
|
28
11
|
from collections.abc import Mapping
|
|
29
12
|
from typing import Any
|
|
30
13
|
from typing import TypedDict
|
|
@@ -32,24 +15,34 @@ from typing import cast
|
|
|
32
15
|
|
|
33
16
|
import requests # type: ignore[import]
|
|
34
17
|
|
|
35
|
-
from
|
|
36
|
-
from
|
|
37
|
-
from .
|
|
38
|
-
from .
|
|
39
|
-
from .
|
|
40
|
-
from .
|
|
41
|
-
from .
|
|
42
|
-
from .
|
|
43
|
-
from .
|
|
44
|
-
from .
|
|
45
|
-
from .
|
|
46
|
-
from .
|
|
47
|
-
from .types import
|
|
18
|
+
from ..types import Timeout
|
|
19
|
+
from ..utils import coerce_dict
|
|
20
|
+
from .config import ApiConfig
|
|
21
|
+
from .config import EndpointConfig
|
|
22
|
+
from .endpoint_client import EndpointClient
|
|
23
|
+
from .enums import HttpMethod
|
|
24
|
+
from .pagination import PaginationConfig
|
|
25
|
+
from .pagination import PaginationConfigMap
|
|
26
|
+
from .rate_limiting import RateLimitConfig
|
|
27
|
+
from .rate_limiting import RateLimitConfigMap
|
|
28
|
+
from .rate_limiting import RateLimiter
|
|
29
|
+
from .retry_manager import RetryPolicy
|
|
30
|
+
from .types import Headers
|
|
31
|
+
from .types import Params
|
|
32
|
+
from .types import Url
|
|
33
|
+
|
|
34
|
+
# SECTION: CONSTANTS ======================================================== #
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
DEFAULT_TIMEOUT: float = 10.0
|
|
38
|
+
|
|
48
39
|
|
|
49
40
|
# SECTION: EXPORTS ========================================================== #
|
|
50
41
|
|
|
51
42
|
|
|
52
43
|
__all__ = [
|
|
44
|
+
# Constants
|
|
45
|
+
'DEFAULT_TIMEOUT',
|
|
53
46
|
# Functions
|
|
54
47
|
'build_endpoint_client',
|
|
55
48
|
'build_pagination_cfg',
|
|
@@ -58,6 +51,7 @@ __all__ = [
|
|
|
58
51
|
'compose_api_target_env',
|
|
59
52
|
'compute_rl_sleep_seconds',
|
|
60
53
|
'paginate_with_client',
|
|
54
|
+
'resolve_request',
|
|
61
55
|
# Typed Dicts
|
|
62
56
|
'ApiRequestEnv',
|
|
63
57
|
'ApiTargetEnv',
|
|
@@ -68,43 +62,83 @@ __all__ = [
|
|
|
68
62
|
# SECTION: TYPED DICTS ====================================================== #
|
|
69
63
|
|
|
70
64
|
|
|
71
|
-
class
|
|
72
|
-
"""
|
|
65
|
+
class BaseApiHttpEnv(TypedDict, total=False):
|
|
66
|
+
"""
|
|
67
|
+
Common HTTP request environment for API interactions.
|
|
68
|
+
|
|
69
|
+
Fields shared by both source-side and target-side API operations.
|
|
70
|
+
"""
|
|
73
71
|
|
|
72
|
+
# Request details
|
|
74
73
|
url: Url | None
|
|
75
74
|
headers: dict[str, str]
|
|
76
75
|
timeout: Timeout
|
|
76
|
+
|
|
77
|
+
# Session
|
|
77
78
|
session: requests.Session | None
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class ApiRequestEnv(BaseApiHttpEnv, total=False):
|
|
82
|
+
"""
|
|
83
|
+
Composed HTTP request environment configuration for REST API sources.
|
|
84
|
+
|
|
85
|
+
Returned by :func:`compose_api_request_env` and consumed by the API extract
|
|
86
|
+
branch. Values are fully merged with endpoint/API defaults and job-level
|
|
87
|
+
overrides, preserving the original precedence and behavior.
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
# Client
|
|
78
91
|
use_endpoints: bool
|
|
79
92
|
base_url: str | None
|
|
80
93
|
base_path: str | None
|
|
81
94
|
endpoints_map: dict[str, str] | None
|
|
82
95
|
endpoint_key: str | None
|
|
96
|
+
|
|
97
|
+
# Request
|
|
83
98
|
params: dict[str, Any]
|
|
84
99
|
pagination: PaginationConfigMap | None
|
|
85
100
|
sleep_seconds: float
|
|
101
|
+
|
|
102
|
+
# Reliability
|
|
86
103
|
retry: RetryPolicy | None
|
|
87
104
|
retry_network_errors: bool
|
|
88
105
|
|
|
89
106
|
|
|
90
|
-
class ApiTargetEnv(
|
|
91
|
-
"""
|
|
107
|
+
class ApiTargetEnv(BaseApiHttpEnv, total=False):
|
|
108
|
+
"""
|
|
109
|
+
Composed HTTP request environment configuration for REST API targets.
|
|
110
|
+
|
|
111
|
+
Returned by :func:`compose_api_target_env` and consumed by the API load
|
|
112
|
+
branch. Values are merged from the target object, optional API/endpoint
|
|
113
|
+
reference, and job-level overrides, preserving original precedence and
|
|
114
|
+
behavior.
|
|
115
|
+
|
|
116
|
+
Notes
|
|
117
|
+
-----
|
|
118
|
+
- Precedence for inherited values matches original logic:
|
|
119
|
+
overrides -> target -> API profile defaults.
|
|
120
|
+
- Target composition does not include pagination/rate-limit/retry since
|
|
121
|
+
loads are single-request operations; only headers/timeout/session
|
|
122
|
+
apply.
|
|
123
|
+
"""
|
|
92
124
|
|
|
93
|
-
|
|
94
|
-
headers: dict[str, str]
|
|
95
|
-
timeout: Timeout
|
|
96
|
-
session: requests.Session | None
|
|
125
|
+
# Request
|
|
97
126
|
method: str | None
|
|
98
127
|
|
|
99
128
|
|
|
100
129
|
class SessionConfig(TypedDict, total=False):
|
|
101
|
-
"""
|
|
130
|
+
"""
|
|
131
|
+
Minimal session configuration schema accepted by the
|
|
132
|
+
:class:`requests.Session` runner.
|
|
133
|
+
|
|
134
|
+
Keys mirror common :class:`requests.Session` options; all are optional.
|
|
135
|
+
"""
|
|
102
136
|
|
|
103
137
|
headers: Mapping[str, Any]
|
|
104
138
|
params: Mapping[str, Any]
|
|
105
|
-
auth: Any
|
|
139
|
+
auth: Any # (user, pass) tuple or requests-compatible auth object
|
|
106
140
|
verify: bool | str
|
|
107
|
-
cert: Any
|
|
141
|
+
cert: Any # str or (cert, key)
|
|
108
142
|
proxies: Mapping[str, Any]
|
|
109
143
|
cookies: Mapping[str, Any]
|
|
110
144
|
trust_env: bool
|
|
@@ -113,7 +147,47 @@ class SessionConfig(TypedDict, total=False):
|
|
|
113
147
|
# SECTION: INTERNAL FUNCTIONS ============================================== #
|
|
114
148
|
|
|
115
149
|
|
|
116
|
-
|
|
150
|
+
def _build_session_optional(
|
|
151
|
+
cfg: SessionConfig | None,
|
|
152
|
+
) -> requests.Session | None:
|
|
153
|
+
"""
|
|
154
|
+
Return a configured session when *cfg* is a mapping.
|
|
155
|
+
|
|
156
|
+
Parameters
|
|
157
|
+
----------
|
|
158
|
+
cfg : SessionConfig | None
|
|
159
|
+
Session configuration mapping.
|
|
160
|
+
|
|
161
|
+
Returns
|
|
162
|
+
-------
|
|
163
|
+
requests.Session | None
|
|
164
|
+
Configured session or ``None``.
|
|
165
|
+
"""
|
|
166
|
+
if isinstance(cfg, Mapping):
|
|
167
|
+
return build_session(cast(SessionConfig, cfg))
|
|
168
|
+
return None
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _coalesce(
|
|
172
|
+
*args: Any,
|
|
173
|
+
) -> Any | None:
|
|
174
|
+
"""
|
|
175
|
+
Return the first non-``None`` value from ``args``.
|
|
176
|
+
|
|
177
|
+
Parameters
|
|
178
|
+
----------
|
|
179
|
+
*args : Any
|
|
180
|
+
Candidate values in descending precedence order.
|
|
181
|
+
|
|
182
|
+
Returns
|
|
183
|
+
-------
|
|
184
|
+
Any | None
|
|
185
|
+
The first non-``None`` value, or ``None`` if all are ``None``.
|
|
186
|
+
"""
|
|
187
|
+
for arg in args:
|
|
188
|
+
if arg is not None:
|
|
189
|
+
return arg
|
|
190
|
+
return None
|
|
117
191
|
|
|
118
192
|
|
|
119
193
|
def _get_api_cfg_and_endpoint(
|
|
@@ -217,37 +291,15 @@ def _merge_session_cfg_three(
|
|
|
217
291
|
api_sess = getattr(api_cfg, 'session', None)
|
|
218
292
|
ep_sess = getattr(ep, 'session', None)
|
|
219
293
|
merged: dict[str, Any] = {}
|
|
220
|
-
if isinstance(api_sess,
|
|
294
|
+
if isinstance(api_sess, Mapping):
|
|
221
295
|
merged.update(api_sess)
|
|
222
|
-
if isinstance(ep_sess,
|
|
296
|
+
if isinstance(ep_sess, Mapping):
|
|
223
297
|
merged.update(ep_sess)
|
|
224
|
-
if isinstance(source_session_cfg,
|
|
298
|
+
if isinstance(source_session_cfg, Mapping):
|
|
225
299
|
merged.update(source_session_cfg)
|
|
226
300
|
return cast(SessionConfig | None, (merged or None))
|
|
227
301
|
|
|
228
302
|
|
|
229
|
-
# -- Mapping Helpers -- #
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
def _copy_mapping(
|
|
233
|
-
mapping: Mapping[str, Any] | None,
|
|
234
|
-
) -> dict[str, Any]:
|
|
235
|
-
"""
|
|
236
|
-
Return a shallow copy of *mapping* or an empty dict.
|
|
237
|
-
|
|
238
|
-
Parameters
|
|
239
|
-
----------
|
|
240
|
-
mapping : Mapping[str, Any] | None
|
|
241
|
-
The mapping to copy.
|
|
242
|
-
|
|
243
|
-
Returns
|
|
244
|
-
-------
|
|
245
|
-
dict[str, Any]
|
|
246
|
-
A shallow copy of the mapping or an empty dict.
|
|
247
|
-
"""
|
|
248
|
-
return dict(mapping) if isinstance(mapping, Mapping) else {}
|
|
249
|
-
|
|
250
|
-
|
|
251
303
|
def _update_mapping(
|
|
252
304
|
target: dict[str, Any],
|
|
253
305
|
extra: Mapping[str, Any] | None,
|
|
@@ -266,37 +318,9 @@ def _update_mapping(
|
|
|
266
318
|
target.update(extra)
|
|
267
319
|
|
|
268
320
|
|
|
269
|
-
# -- Session -- #
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
def _build_session_optional(
|
|
273
|
-
cfg: SessionConfig | None,
|
|
274
|
-
) -> requests.Session | None:
|
|
275
|
-
"""
|
|
276
|
-
Return a configured session when *cfg* is a mapping.
|
|
277
|
-
|
|
278
|
-
Parameters
|
|
279
|
-
----------
|
|
280
|
-
cfg : SessionConfig | None
|
|
281
|
-
Session configuration mapping.
|
|
282
|
-
|
|
283
|
-
Returns
|
|
284
|
-
-------
|
|
285
|
-
requests.Session | None
|
|
286
|
-
Configured session or ``None``.
|
|
287
|
-
"""
|
|
288
|
-
|
|
289
|
-
if isinstance(cfg, dict):
|
|
290
|
-
return build_session(cfg)
|
|
291
|
-
return None
|
|
292
|
-
|
|
293
|
-
|
|
294
321
|
# SECTION: FUNCTIONS ======================================================== #
|
|
295
322
|
|
|
296
323
|
|
|
297
|
-
# -- API Environment Composition -- #
|
|
298
|
-
|
|
299
|
-
|
|
300
324
|
def build_endpoint_client(
|
|
301
325
|
*,
|
|
302
326
|
base_url: str,
|
|
@@ -323,15 +347,7 @@ def build_endpoint_client(
|
|
|
323
347
|
EndpointClient
|
|
324
348
|
The constructed endpoint client.
|
|
325
349
|
"""
|
|
326
|
-
|
|
327
|
-
# propagate here by preferring the class on the run module if present.
|
|
328
|
-
try:
|
|
329
|
-
from . import run as run_mod # local import to avoid cycles
|
|
330
|
-
|
|
331
|
-
ClientClass = getattr(run_mod, 'EndpointClient', EndpointClient)
|
|
332
|
-
except (ImportError, AttributeError): # pragma: no cover - fallback path
|
|
333
|
-
ClientClass = EndpointClient
|
|
334
|
-
return ClientClass(
|
|
350
|
+
return EndpointClient(
|
|
335
351
|
base_url=base_url,
|
|
336
352
|
base_path=base_path,
|
|
337
353
|
endpoints=endpoints,
|
|
@@ -369,21 +385,19 @@ def compose_api_request_env(
|
|
|
369
385
|
Mapping[str, Any] | None,
|
|
370
386
|
getattr(source_obj, 'query_params', None),
|
|
371
387
|
)
|
|
372
|
-
params: dict[str, Any] =
|
|
388
|
+
params: dict[str, Any] = coerce_dict(source_params)
|
|
373
389
|
source_headers = cast(
|
|
374
390
|
Mapping[str, str] | None,
|
|
375
391
|
getattr(source_obj, 'headers', None),
|
|
376
392
|
)
|
|
377
|
-
headers: dict[str, str] =
|
|
393
|
+
headers: dict[str, str] = cast(dict[str, str], coerce_dict(source_headers))
|
|
378
394
|
pagination = getattr(source_obj, 'pagination', None)
|
|
379
395
|
rate_limit = getattr(source_obj, 'rate_limit', None)
|
|
380
396
|
retry: RetryPolicy | None = cast(
|
|
381
397
|
RetryPolicy | None,
|
|
382
398
|
getattr(source_obj, 'retry', None),
|
|
383
399
|
)
|
|
384
|
-
retry_network_errors =
|
|
385
|
-
getattr(source_obj, 'retry_network_errors', False),
|
|
386
|
-
)
|
|
400
|
+
retry_network_errors = getattr(source_obj, 'retry_network_errors', None)
|
|
387
401
|
session_cfg = cast(
|
|
388
402
|
SessionConfig | None,
|
|
389
403
|
getattr(source_obj, 'session', None),
|
|
@@ -405,33 +419,33 @@ def compose_api_request_env(
|
|
|
405
419
|
session_cfg,
|
|
406
420
|
force_url=True,
|
|
407
421
|
)
|
|
408
|
-
ep_params: dict[str, Any] =
|
|
422
|
+
ep_params: dict[str, Any] = coerce_dict(
|
|
409
423
|
cast(Mapping[str, Any] | None, getattr(ep, 'query_params', None)),
|
|
410
424
|
)
|
|
411
425
|
_update_mapping(ep_params, params)
|
|
412
426
|
params = ep_params
|
|
413
|
-
pagination = (
|
|
414
|
-
pagination
|
|
415
|
-
|
|
416
|
-
|
|
427
|
+
pagination = _coalesce(
|
|
428
|
+
pagination,
|
|
429
|
+
ep.pagination,
|
|
430
|
+
api_cfg.effective_pagination_defaults(),
|
|
417
431
|
)
|
|
418
|
-
rate_limit = (
|
|
419
|
-
rate_limit
|
|
420
|
-
|
|
421
|
-
|
|
432
|
+
rate_limit = _coalesce(
|
|
433
|
+
rate_limit,
|
|
434
|
+
ep.rate_limit,
|
|
435
|
+
api_cfg.effective_rate_limit_defaults(),
|
|
422
436
|
)
|
|
423
437
|
retry = cast(
|
|
424
438
|
RetryPolicy | None,
|
|
425
|
-
(
|
|
426
|
-
retry
|
|
427
|
-
|
|
428
|
-
|
|
439
|
+
_coalesce(
|
|
440
|
+
retry,
|
|
441
|
+
getattr(ep, 'retry', None),
|
|
442
|
+
getattr(api_cfg, 'retry', None),
|
|
429
443
|
),
|
|
430
444
|
)
|
|
431
|
-
retry_network_errors = (
|
|
432
|
-
retry_network_errors
|
|
433
|
-
|
|
434
|
-
|
|
445
|
+
retry_network_errors = _coalesce(
|
|
446
|
+
retry_network_errors,
|
|
447
|
+
getattr(ep, 'retry_network_errors', None),
|
|
448
|
+
getattr(api_cfg, 'retry_network_errors', None),
|
|
435
449
|
)
|
|
436
450
|
use_client_endpoints = True
|
|
437
451
|
client_base_url = api_cfg.base_url
|
|
@@ -466,8 +480,10 @@ def compose_api_request_env(
|
|
|
466
480
|
retry = rty_ov
|
|
467
481
|
if rne_ov is not None:
|
|
468
482
|
retry_network_errors = bool(rne_ov)
|
|
469
|
-
if isinstance(sess_ov,
|
|
470
|
-
base_cfg: dict[str, Any] = dict(
|
|
483
|
+
if isinstance(sess_ov, Mapping):
|
|
484
|
+
base_cfg: dict[str, Any] = dict(
|
|
485
|
+
cast(Mapping[str, Any], session_cfg or {}),
|
|
486
|
+
)
|
|
471
487
|
base_cfg.update(sess_ov)
|
|
472
488
|
session_cfg = cast(SessionConfig, base_cfg)
|
|
473
489
|
pag_cfg: PaginationConfigMap | None = build_pagination_cfg(
|
|
@@ -488,7 +504,7 @@ def compose_api_request_env(
|
|
|
488
504
|
'pagination': pag_cfg,
|
|
489
505
|
'sleep_seconds': sleep_s,
|
|
490
506
|
'retry': retry,
|
|
491
|
-
'retry_network_errors': retry_network_errors,
|
|
507
|
+
'retry_network_errors': bool(retry_network_errors),
|
|
492
508
|
'session': sess_obj,
|
|
493
509
|
}
|
|
494
510
|
|
|
@@ -524,8 +540,14 @@ def compose_api_target_env(
|
|
|
524
540
|
str | None,
|
|
525
541
|
ov.get('method') or getattr(target_obj, 'method', 'post'),
|
|
526
542
|
)
|
|
527
|
-
headers =
|
|
528
|
-
|
|
543
|
+
headers = cast(
|
|
544
|
+
dict[str, str],
|
|
545
|
+
coerce_dict(
|
|
546
|
+
cast(
|
|
547
|
+
Mapping[str, str] | None,
|
|
548
|
+
getattr(target_obj, 'headers', None),
|
|
549
|
+
),
|
|
550
|
+
),
|
|
529
551
|
)
|
|
530
552
|
_update_mapping(headers, cast(Mapping[str, str] | None, ov.get('headers')))
|
|
531
553
|
timeout: Timeout = (
|
|
@@ -558,9 +580,6 @@ def compose_api_target_env(
|
|
|
558
580
|
}
|
|
559
581
|
|
|
560
582
|
|
|
561
|
-
# -- Pagination -- #
|
|
562
|
-
|
|
563
|
-
|
|
564
583
|
def build_pagination_cfg(
|
|
565
584
|
pagination: PaginationConfig | None,
|
|
566
585
|
overrides: Mapping[str, Any] | None,
|
|
@@ -667,9 +686,6 @@ def build_pagination_cfg(
|
|
|
667
686
|
return cast(PaginationConfigMap, cfg)
|
|
668
687
|
|
|
669
688
|
|
|
670
|
-
# -- Pagination Invocation -- #
|
|
671
|
-
|
|
672
|
-
|
|
673
689
|
def paginate_with_client(
|
|
674
690
|
client: Any,
|
|
675
691
|
endpoint_key: str,
|
|
@@ -727,9 +743,6 @@ def paginate_with_client(
|
|
|
727
743
|
return client.paginate(endpoint_key, **kw_pag)
|
|
728
744
|
|
|
729
745
|
|
|
730
|
-
# -- Rate Limit -- #
|
|
731
|
-
|
|
732
|
-
|
|
733
746
|
def compute_rl_sleep_seconds(
|
|
734
747
|
rate_limit: RateLimitConfig | Mapping[str, Any] | None,
|
|
735
748
|
overrides: Mapping[str, Any] | None,
|
|
@@ -782,9 +795,6 @@ def compute_rl_sleep_seconds(
|
|
|
782
795
|
)
|
|
783
796
|
|
|
784
797
|
|
|
785
|
-
# -- Session -- #
|
|
786
|
-
|
|
787
|
-
|
|
788
798
|
def build_session(
|
|
789
799
|
cfg: SessionConfig | None,
|
|
790
800
|
) -> requests.Session:
|
|
@@ -805,12 +815,12 @@ def build_session(
|
|
|
805
815
|
if not cfg:
|
|
806
816
|
return s
|
|
807
817
|
headers = cfg.get('headers')
|
|
808
|
-
if isinstance(headers,
|
|
818
|
+
if isinstance(headers, Mapping):
|
|
809
819
|
s.headers.update(headers)
|
|
810
820
|
params = cfg.get('params')
|
|
811
|
-
if isinstance(params,
|
|
821
|
+
if isinstance(params, Mapping):
|
|
812
822
|
try:
|
|
813
|
-
s.params = params
|
|
823
|
+
s.params = dict(params)
|
|
814
824
|
except (AttributeError, TypeError):
|
|
815
825
|
pass
|
|
816
826
|
auth = cfg.get('auth')
|
|
@@ -825,12 +835,12 @@ def build_session(
|
|
|
825
835
|
if cert is not None:
|
|
826
836
|
s.cert = cert # type: ignore[assignment]
|
|
827
837
|
proxies = cfg.get('proxies')
|
|
828
|
-
if isinstance(proxies,
|
|
838
|
+
if isinstance(proxies, Mapping):
|
|
829
839
|
s.proxies.update(proxies)
|
|
830
840
|
cookies = cfg.get('cookies')
|
|
831
|
-
if isinstance(cookies,
|
|
841
|
+
if isinstance(cookies, Mapping):
|
|
832
842
|
try:
|
|
833
|
-
s.cookies.update(cookies)
|
|
843
|
+
s.cookies.update(cast(Mapping[str, Any], cookies))
|
|
834
844
|
except (TypeError, ValueError):
|
|
835
845
|
pass
|
|
836
846
|
if 'trust_env' in cfg:
|
|
@@ -841,3 +851,45 @@ def build_session(
|
|
|
841
851
|
pass
|
|
842
852
|
|
|
843
853
|
return s
|
|
854
|
+
|
|
855
|
+
|
|
856
|
+
def resolve_request(
|
|
857
|
+
method: HttpMethod | str,
|
|
858
|
+
*,
|
|
859
|
+
session: Any | None = None,
|
|
860
|
+
timeout: Timeout = None,
|
|
861
|
+
) -> tuple[Callable[..., requests.Response], float, HttpMethod]:
|
|
862
|
+
"""
|
|
863
|
+
Resolve a request callable and effective timeout for an HTTP method.
|
|
864
|
+
|
|
865
|
+
Parameters
|
|
866
|
+
----------
|
|
867
|
+
method : HttpMethod | str
|
|
868
|
+
HTTP method to execute.
|
|
869
|
+
session : Any | None, optional
|
|
870
|
+
Requests-compatible session object. Defaults to module-level
|
|
871
|
+
``requests``.
|
|
872
|
+
timeout : Timeout, optional
|
|
873
|
+
Timeout in seconds for the request. Uses ``DEFAULT_TIMEOUT`` when
|
|
874
|
+
omitted.
|
|
875
|
+
|
|
876
|
+
Returns
|
|
877
|
+
-------
|
|
878
|
+
tuple[Callable[..., requests.Response], float, HttpMethod]
|
|
879
|
+
Tuple of (callable, timeout_seconds, resolved_method).
|
|
880
|
+
|
|
881
|
+
Raises
|
|
882
|
+
------
|
|
883
|
+
TypeError
|
|
884
|
+
If the session object does not expose the requested HTTP method.
|
|
885
|
+
"""
|
|
886
|
+
http_method = HttpMethod.coerce(method)
|
|
887
|
+
request_timeout = DEFAULT_TIMEOUT if timeout is None else timeout
|
|
888
|
+
requester = session or requests
|
|
889
|
+
request_callable = getattr(requester, http_method.value, None)
|
|
890
|
+
if not callable(request_callable):
|
|
891
|
+
raise TypeError(
|
|
892
|
+
'Session object must supply a callable '
|
|
893
|
+
f'"{http_method.value}" method',
|
|
894
|
+
)
|
|
895
|
+
return request_callable, request_timeout, http_method
|
etlplus/cli/handlers.py
CHANGED
|
@@ -18,15 +18,16 @@ from ..config import PipelineConfig
|
|
|
18
18
|
from ..config import load_pipeline_config
|
|
19
19
|
from ..database import load_table_spec
|
|
20
20
|
from ..database import render_tables
|
|
21
|
-
from ..extract import extract
|
|
22
21
|
from ..file import File
|
|
23
|
-
from ..
|
|
24
|
-
from ..
|
|
25
|
-
from ..
|
|
22
|
+
from ..file import FileFormat
|
|
23
|
+
from ..ops import extract
|
|
24
|
+
from ..ops import load
|
|
25
|
+
from ..ops import run
|
|
26
|
+
from ..ops import transform
|
|
27
|
+
from ..ops import validate
|
|
28
|
+
from ..ops.validate import FieldRules
|
|
26
29
|
from ..types import JSONData
|
|
27
30
|
from ..types import TemplateKey
|
|
28
|
-
from ..validate import FieldRules
|
|
29
|
-
from ..validate import validate
|
|
30
31
|
from . import io as cli_io
|
|
31
32
|
|
|
32
33
|
# SECTION: EXPORTS ========================================================== #
|
|
@@ -569,8 +570,17 @@ def transform_handler(
|
|
|
569
570
|
|
|
570
571
|
data = transform(payload, cast(TransformOperations, operations_payload))
|
|
571
572
|
|
|
573
|
+
# TODO: Generalize to handle non-file targets.
|
|
572
574
|
if target and target != '-':
|
|
573
|
-
|
|
575
|
+
# Convert target to Path and target_format to FileFormat if needed
|
|
576
|
+
file_path = Path(target)
|
|
577
|
+
file_format = None
|
|
578
|
+
if target_format is not None:
|
|
579
|
+
try:
|
|
580
|
+
file_format = FileFormat(target_format)
|
|
581
|
+
except ValueError:
|
|
582
|
+
file_format = None # or handle error as appropriate
|
|
583
|
+
File(file_path, file_format=file_format).write(data)
|
|
574
584
|
print(f'Data transformed and saved to {target}')
|
|
575
585
|
return 0
|
|
576
586
|
|
etlplus/config/jobs.py
CHANGED
|
@@ -34,10 +34,7 @@ __all__ = [
|
|
|
34
34
|
]
|
|
35
35
|
|
|
36
36
|
|
|
37
|
-
# SECTION:
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
# SECTION: CLASSES ========================================================== #
|
|
37
|
+
# SECTION: DATA CLASSES ===================================================== #
|
|
41
38
|
|
|
42
39
|
|
|
43
40
|
@dataclass(kw_only=True, slots=True)
|
|
@@ -100,6 +97,8 @@ class JobConfig:
|
|
|
100
97
|
Unique job name.
|
|
101
98
|
description : str | None
|
|
102
99
|
Optional human-friendly description.
|
|
100
|
+
depends_on : list[str]
|
|
101
|
+
Optional job dependency list. Dependencies must refer to other jobs.
|
|
103
102
|
extract : ExtractRef | None
|
|
104
103
|
Extraction reference.
|
|
105
104
|
validate : ValidationRef | None
|
|
@@ -114,6 +113,7 @@ class JobConfig:
|
|
|
114
113
|
|
|
115
114
|
name: str
|
|
116
115
|
description: str | None = None
|
|
116
|
+
depends_on: list[str] = field(default_factory=list)
|
|
117
117
|
extract: ExtractRef | None = None
|
|
118
118
|
validate: ValidationRef | None = None
|
|
119
119
|
transform: TransformRef | None = None
|
|
@@ -149,9 +149,19 @@ class JobConfig:
|
|
|
149
149
|
if description is not None and not isinstance(description, str):
|
|
150
150
|
description = str(description)
|
|
151
151
|
|
|
152
|
+
depends_raw = data.get('depends_on')
|
|
153
|
+
depends_on: list[str] = []
|
|
154
|
+
if isinstance(depends_raw, str):
|
|
155
|
+
depends_on = [depends_raw]
|
|
156
|
+
elif isinstance(depends_raw, list):
|
|
157
|
+
for entry in depends_raw:
|
|
158
|
+
if isinstance(entry, str):
|
|
159
|
+
depends_on.append(entry)
|
|
160
|
+
|
|
152
161
|
return cls(
|
|
153
162
|
name=name,
|
|
154
163
|
description=description,
|
|
164
|
+
depends_on=depends_on,
|
|
155
165
|
extract=ExtractRef.from_obj(data.get('extract')),
|
|
156
166
|
validate=ValidationRef.from_obj(data.get('validate')),
|
|
157
167
|
transform=TransformRef.from_obj(data.get('transform')),
|