etlplus 0.15.0__py3-none-any.whl → 0.16.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/README.md +25 -3
- etlplus/__init__.py +2 -0
- etlplus/api/README.md +31 -0
- etlplus/api/__init__.py +14 -14
- etlplus/api/auth.py +10 -7
- etlplus/api/config.py +8 -13
- etlplus/api/endpoint_client.py +20 -20
- etlplus/api/errors.py +4 -4
- etlplus/api/pagination/__init__.py +6 -6
- etlplus/api/pagination/config.py +12 -10
- etlplus/api/pagination/paginator.py +6 -7
- etlplus/api/rate_limiting/__init__.py +2 -2
- etlplus/api/rate_limiting/config.py +14 -14
- etlplus/api/rate_limiting/rate_limiter.py +3 -3
- etlplus/api/request_manager.py +4 -4
- etlplus/api/retry_manager.py +8 -8
- etlplus/api/transport.py +11 -11
- etlplus/api/types.py +131 -11
- etlplus/api/utils.py +50 -50
- etlplus/cli/commands.py +93 -60
- etlplus/cli/constants.py +1 -1
- etlplus/cli/handlers.py +43 -26
- etlplus/cli/io.py +2 -2
- etlplus/cli/main.py +2 -2
- etlplus/cli/state.py +4 -7
- etlplus/{workflow/pipeline.py → config.py} +62 -99
- etlplus/connector/__init__.py +43 -0
- etlplus/connector/api.py +161 -0
- etlplus/connector/connector.py +26 -0
- etlplus/connector/core.py +132 -0
- etlplus/connector/database.py +122 -0
- etlplus/connector/enums.py +52 -0
- etlplus/connector/file.py +120 -0
- etlplus/connector/types.py +40 -0
- etlplus/connector/utils.py +122 -0
- etlplus/database/ddl.py +2 -2
- etlplus/database/engine.py +19 -3
- etlplus/database/orm.py +2 -0
- etlplus/enums.py +36 -200
- etlplus/file/_imports.py +1 -0
- etlplus/file/_io.py +52 -4
- etlplus/file/accdb.py +3 -2
- etlplus/file/arrow.py +3 -2
- etlplus/file/avro.py +3 -2
- etlplus/file/bson.py +3 -2
- etlplus/file/cbor.py +3 -2
- etlplus/file/cfg.py +3 -2
- etlplus/file/conf.py +3 -2
- etlplus/file/core.py +11 -8
- etlplus/file/csv.py +3 -2
- etlplus/file/dat.py +3 -2
- etlplus/file/dta.py +3 -2
- etlplus/file/duckdb.py +3 -2
- etlplus/file/enums.py +1 -1
- etlplus/file/feather.py +3 -2
- etlplus/file/fwf.py +3 -2
- etlplus/file/gz.py +3 -2
- etlplus/file/hbs.py +3 -2
- etlplus/file/hdf5.py +3 -2
- etlplus/file/ini.py +3 -2
- etlplus/file/ion.py +3 -2
- etlplus/file/jinja2.py +3 -2
- etlplus/file/json.py +5 -16
- etlplus/file/log.py +3 -2
- etlplus/file/mat.py +3 -2
- etlplus/file/mdb.py +3 -2
- etlplus/file/msgpack.py +3 -2
- etlplus/file/mustache.py +3 -2
- etlplus/file/nc.py +3 -2
- etlplus/file/ndjson.py +3 -2
- etlplus/file/numbers.py +3 -2
- etlplus/file/ods.py +3 -2
- etlplus/file/orc.py +3 -2
- etlplus/file/parquet.py +3 -2
- etlplus/file/pb.py +3 -2
- etlplus/file/pbf.py +3 -2
- etlplus/file/properties.py +3 -2
- etlplus/file/proto.py +3 -2
- etlplus/file/psv.py +3 -2
- etlplus/file/rda.py +3 -2
- etlplus/file/rds.py +3 -2
- etlplus/file/sas7bdat.py +3 -2
- etlplus/file/sav.py +3 -2
- etlplus/file/sqlite.py +3 -2
- etlplus/file/stub.py +1 -0
- etlplus/file/sylk.py +3 -2
- etlplus/file/tab.py +3 -2
- etlplus/file/toml.py +3 -2
- etlplus/file/tsv.py +3 -2
- etlplus/file/txt.py +4 -3
- etlplus/file/vm.py +3 -2
- etlplus/file/wks.py +3 -2
- etlplus/file/xls.py +3 -2
- etlplus/file/xlsm.py +3 -2
- etlplus/file/xlsx.py +3 -2
- etlplus/file/xml.py +9 -3
- etlplus/file/xpt.py +3 -2
- etlplus/file/yaml.py +5 -16
- etlplus/file/zip.py +3 -2
- etlplus/file/zsav.py +3 -2
- etlplus/ops/__init__.py +1 -0
- etlplus/ops/enums.py +173 -0
- etlplus/ops/extract.py +222 -23
- etlplus/ops/load.py +155 -36
- etlplus/ops/run.py +92 -107
- etlplus/ops/transform.py +48 -29
- etlplus/ops/types.py +147 -0
- etlplus/ops/utils.py +11 -40
- etlplus/ops/validate.py +16 -16
- etlplus/types.py +6 -102
- etlplus/utils.py +163 -29
- etlplus/workflow/README.md +0 -24
- etlplus/workflow/__init__.py +2 -15
- etlplus/workflow/dag.py +23 -1
- etlplus/workflow/jobs.py +83 -39
- etlplus/workflow/profile.py +4 -2
- {etlplus-0.15.0.dist-info → etlplus-0.16.6.dist-info}/METADATA +4 -4
- etlplus-0.16.6.dist-info/RECORD +143 -0
- {etlplus-0.15.0.dist-info → etlplus-0.16.6.dist-info}/WHEEL +1 -1
- etlplus/config/README.md +0 -50
- etlplus/config/__init__.py +0 -33
- etlplus/config/types.py +0 -140
- etlplus/dag.py +0 -103
- etlplus/workflow/connector.py +0 -373
- etlplus/workflow/types.py +0 -115
- etlplus/workflow/utils.py +0 -120
- etlplus-0.15.0.dist-info/RECORD +0 -139
- {etlplus-0.15.0.dist-info → etlplus-0.16.6.dist-info}/entry_points.txt +0 -0
- {etlplus-0.15.0.dist-info → etlplus-0.16.6.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.15.0.dist-info → etlplus-0.16.6.dist-info}/top_level.txt +0 -0
etlplus/cli/handlers.py
CHANGED
|
@@ -14,6 +14,7 @@ from typing import Any
|
|
|
14
14
|
from typing import Literal
|
|
15
15
|
from typing import cast
|
|
16
16
|
|
|
17
|
+
from .. import Config
|
|
17
18
|
from ..database import load_table_spec
|
|
18
19
|
from ..database import render_tables
|
|
19
20
|
from ..file import File
|
|
@@ -23,11 +24,9 @@ from ..ops import load
|
|
|
23
24
|
from ..ops import run
|
|
24
25
|
from ..ops import transform
|
|
25
26
|
from ..ops import validate
|
|
26
|
-
from ..ops.validate import
|
|
27
|
+
from ..ops.validate import FieldRulesDict
|
|
27
28
|
from ..types import JSONData
|
|
28
29
|
from ..types import TemplateKey
|
|
29
|
-
from ..workflow import PipelineConfig
|
|
30
|
-
from ..workflow import load_pipeline_config
|
|
31
30
|
from . import io as cli_io
|
|
32
31
|
|
|
33
32
|
# SECTION: EXPORTS ========================================================== #
|
|
@@ -73,14 +72,14 @@ def _collect_table_specs(
|
|
|
73
72
|
specs.append(dict(load_table_spec(Path(spec_path))))
|
|
74
73
|
|
|
75
74
|
if config_path:
|
|
76
|
-
cfg =
|
|
75
|
+
cfg = Config.from_yaml(config_path, substitute=True)
|
|
77
76
|
specs.extend(getattr(cfg, 'table_schemas', []))
|
|
78
77
|
|
|
79
78
|
return specs
|
|
80
79
|
|
|
81
80
|
|
|
82
81
|
def _check_sections(
|
|
83
|
-
cfg:
|
|
82
|
+
cfg: Config,
|
|
84
83
|
*,
|
|
85
84
|
jobs: bool,
|
|
86
85
|
pipelines: bool,
|
|
@@ -93,7 +92,7 @@ def _check_sections(
|
|
|
93
92
|
|
|
94
93
|
Parameters
|
|
95
94
|
----------
|
|
96
|
-
cfg :
|
|
95
|
+
cfg : Config
|
|
97
96
|
The loaded pipeline configuration.
|
|
98
97
|
jobs : bool
|
|
99
98
|
Whether to include job metadata.
|
|
@@ -121,23 +120,26 @@ def _check_sections(
|
|
|
121
120
|
if targets:
|
|
122
121
|
sections['targets'] = [tgt.name for tgt in cfg.targets]
|
|
123
122
|
if transforms:
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
123
|
+
if isinstance(cfg.transforms, Mapping):
|
|
124
|
+
sections['transforms'] = list(cfg.transforms)
|
|
125
|
+
else:
|
|
126
|
+
sections['transforms'] = [
|
|
127
|
+
getattr(trf, 'name', None) for trf in cfg.transforms
|
|
128
|
+
]
|
|
127
129
|
if not sections:
|
|
128
130
|
sections['jobs'] = _pipeline_summary(cfg)['jobs']
|
|
129
131
|
return sections
|
|
130
132
|
|
|
131
133
|
|
|
132
134
|
def _pipeline_summary(
|
|
133
|
-
cfg:
|
|
135
|
+
cfg: Config,
|
|
134
136
|
) -> dict[str, Any]:
|
|
135
137
|
"""
|
|
136
138
|
Return a human-friendly snapshot of a pipeline config.
|
|
137
139
|
|
|
138
140
|
Parameters
|
|
139
141
|
----------
|
|
140
|
-
cfg :
|
|
142
|
+
cfg : Config
|
|
141
143
|
The loaded pipeline configuration.
|
|
142
144
|
|
|
143
145
|
Returns
|
|
@@ -157,6 +159,29 @@ def _pipeline_summary(
|
|
|
157
159
|
}
|
|
158
160
|
|
|
159
161
|
|
|
162
|
+
def _write_file_payload(
|
|
163
|
+
payload: JSONData,
|
|
164
|
+
target: str,
|
|
165
|
+
*,
|
|
166
|
+
format_hint: str | None,
|
|
167
|
+
) -> None:
|
|
168
|
+
"""
|
|
169
|
+
Write a JSON-like payload to a file path using an optional format hint.
|
|
170
|
+
|
|
171
|
+
Parameters
|
|
172
|
+
----------
|
|
173
|
+
payload : JSONData
|
|
174
|
+
The structured data to write.
|
|
175
|
+
target : str
|
|
176
|
+
File path to write to.
|
|
177
|
+
format_hint : str | None
|
|
178
|
+
Optional format hint for :class:`FileFormat`.
|
|
179
|
+
"""
|
|
180
|
+
file_path = Path(target)
|
|
181
|
+
file_format = FileFormat.coerce(format_hint) if format_hint else None
|
|
182
|
+
File(file_path, file_format=file_format).write(payload)
|
|
183
|
+
|
|
184
|
+
|
|
160
185
|
# SECTION: FUNCTIONS ======================================================== #
|
|
161
186
|
|
|
162
187
|
|
|
@@ -203,7 +228,7 @@ def check_handler(
|
|
|
203
228
|
Zero on success.
|
|
204
229
|
|
|
205
230
|
"""
|
|
206
|
-
cfg =
|
|
231
|
+
cfg = Config.from_yaml(config, substitute=substitute)
|
|
207
232
|
if summary:
|
|
208
233
|
cli_io.emit_json(_pipeline_summary(cfg), pretty=True)
|
|
209
234
|
return 0
|
|
@@ -479,7 +504,7 @@ def run_handler(
|
|
|
479
504
|
Name of the job to run. If not provided, runs the entire pipeline.
|
|
480
505
|
Default is ``None``.
|
|
481
506
|
pipeline : str | None, optional
|
|
482
|
-
Alias for
|
|
507
|
+
Alias for *job*. Default is ``None``.
|
|
483
508
|
pretty : bool, optional
|
|
484
509
|
Whether to pretty-print output. Default is ``True``.
|
|
485
510
|
|
|
@@ -488,7 +513,7 @@ def run_handler(
|
|
|
488
513
|
int
|
|
489
514
|
Zero on success.
|
|
490
515
|
"""
|
|
491
|
-
cfg =
|
|
516
|
+
cfg = Config.from_yaml(config, substitute=True)
|
|
492
517
|
|
|
493
518
|
job_name = job or pipeline
|
|
494
519
|
if job_name:
|
|
@@ -572,15 +597,7 @@ def transform_handler(
|
|
|
572
597
|
|
|
573
598
|
# TODO: Generalize to handle non-file targets.
|
|
574
599
|
if target and target != '-':
|
|
575
|
-
|
|
576
|
-
file_path = Path(target)
|
|
577
|
-
file_format = None
|
|
578
|
-
if target_format is not None:
|
|
579
|
-
try:
|
|
580
|
-
file_format = FileFormat(target_format)
|
|
581
|
-
except ValueError:
|
|
582
|
-
file_format = None # or handle error as appropriate
|
|
583
|
-
File(file_path, file_format=file_format).write(data)
|
|
600
|
+
_write_file_payload(data, target, format_hint=target_format)
|
|
584
601
|
print(f'Data transformed and saved to {target}')
|
|
585
602
|
return 0
|
|
586
603
|
|
|
@@ -644,7 +661,7 @@ def validate_handler(
|
|
|
644
661
|
if not isinstance(rules_payload, dict):
|
|
645
662
|
raise ValueError('rules must resolve to a mapping of field rules')
|
|
646
663
|
|
|
647
|
-
field_rules = cast(Mapping[str,
|
|
664
|
+
field_rules = cast(Mapping[str, FieldRulesDict], rules_payload)
|
|
648
665
|
result = validate(payload, field_rules)
|
|
649
666
|
|
|
650
667
|
if target and target != '-':
|
|
@@ -653,11 +670,11 @@ def validate_handler(
|
|
|
653
670
|
cli_io.write_json_output(
|
|
654
671
|
validated_data,
|
|
655
672
|
target,
|
|
656
|
-
success_message='
|
|
673
|
+
success_message='ValidationDict result saved to',
|
|
657
674
|
)
|
|
658
675
|
else:
|
|
659
676
|
print(
|
|
660
|
-
f'
|
|
677
|
+
f'ValidationDict failed, no data to save for {target}',
|
|
661
678
|
file=sys.stderr,
|
|
662
679
|
)
|
|
663
680
|
else:
|
etlplus/cli/io.py
CHANGED
|
@@ -71,7 +71,7 @@ def emit_or_write(
|
|
|
71
71
|
success_message: str,
|
|
72
72
|
) -> None:
|
|
73
73
|
"""
|
|
74
|
-
Emit JSON or persist to disk based on
|
|
74
|
+
Emit JSON or persist to disk based on *output_path*.
|
|
75
75
|
|
|
76
76
|
Parameters
|
|
77
77
|
----------
|
|
@@ -122,7 +122,7 @@ def materialize_file_payload(
|
|
|
122
122
|
format_explicit: bool,
|
|
123
123
|
) -> JSONData | object:
|
|
124
124
|
"""
|
|
125
|
-
Return structured payloads when
|
|
125
|
+
Return structured payloads when *source* references a file.
|
|
126
126
|
|
|
127
127
|
Parameters
|
|
128
128
|
----------
|
etlplus/cli/main.py
CHANGED
|
@@ -44,13 +44,13 @@ def _emit_context_help(
|
|
|
44
44
|
Returns
|
|
45
45
|
-------
|
|
46
46
|
bool
|
|
47
|
-
``True`` when help was emitted, ``False`` when
|
|
47
|
+
``True`` when help was emitted, ``False`` when *ctx* was ``None``.
|
|
48
48
|
"""
|
|
49
49
|
if ctx is None:
|
|
50
50
|
return False
|
|
51
51
|
|
|
52
52
|
with contextlib.redirect_stdout(sys.stderr):
|
|
53
|
-
ctx.get_help()
|
|
53
|
+
print(ctx.get_help())
|
|
54
54
|
return True
|
|
55
55
|
|
|
56
56
|
|
etlplus/cli/state.py
CHANGED
|
@@ -15,6 +15,7 @@ from typing import Final
|
|
|
15
15
|
|
|
16
16
|
import typer
|
|
17
17
|
|
|
18
|
+
from ..utils import normalize_str
|
|
18
19
|
from .constants import DATA_CONNECTORS
|
|
19
20
|
|
|
20
21
|
# SECTION: EXPORTS ========================================================== #
|
|
@@ -322,14 +323,10 @@ def validate_choice(
|
|
|
322
323
|
typer.BadParameter
|
|
323
324
|
If the input value is not in the set of valid choices.
|
|
324
325
|
"""
|
|
325
|
-
v = str(value or '')
|
|
326
|
-
normalized_choices = {c
|
|
326
|
+
v = normalize_str(str(value or ''))
|
|
327
|
+
normalized_choices = {normalize_str(c): c for c in choices}
|
|
327
328
|
if v in normalized_choices:
|
|
328
|
-
|
|
329
|
-
for choice in choices:
|
|
330
|
-
if choice.lower() == v:
|
|
331
|
-
return choice
|
|
332
|
-
return v
|
|
329
|
+
return normalized_choices[v]
|
|
333
330
|
allowed = ', '.join(sorted(choices))
|
|
334
331
|
raise typer.BadParameter(
|
|
335
332
|
f"Invalid {label} '{value}'. Choose from: {allowed}",
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
|
-
:mod:`etlplus.
|
|
2
|
+
:mod:`etlplus.config` module.
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
Configuration model and helpers for job pipeline orchestration.
|
|
5
5
|
|
|
6
6
|
Notes
|
|
7
7
|
-----
|
|
@@ -16,6 +16,7 @@ Notes
|
|
|
16
16
|
from __future__ import annotations
|
|
17
17
|
|
|
18
18
|
import os
|
|
19
|
+
from collections.abc import Callable
|
|
19
20
|
from collections.abc import Mapping
|
|
20
21
|
from dataclasses import dataclass
|
|
21
22
|
from dataclasses import field
|
|
@@ -23,152 +24,111 @@ from pathlib import Path
|
|
|
23
24
|
from typing import Any
|
|
24
25
|
from typing import Self
|
|
25
26
|
|
|
26
|
-
from
|
|
27
|
-
from ..file import File
|
|
28
|
-
from ..file import FileFormat
|
|
29
|
-
from ..types import StrAnyMap
|
|
30
|
-
from ..utils import coerce_dict
|
|
31
|
-
from ..utils import maybe_mapping
|
|
27
|
+
from .api import ApiConfig
|
|
32
28
|
from .connector import Connector
|
|
33
29
|
from .connector import parse_connector
|
|
34
|
-
from .
|
|
35
|
-
from .
|
|
30
|
+
from .file import File
|
|
31
|
+
from .file import FileFormat
|
|
32
|
+
from .types import StrAnyMap
|
|
33
|
+
from .utils import coerce_dict
|
|
36
34
|
from .utils import deep_substitute
|
|
35
|
+
from .utils import maybe_mapping
|
|
36
|
+
from .workflow.jobs import JobConfig
|
|
37
|
+
from .workflow.profile import ProfileConfig
|
|
37
38
|
|
|
38
39
|
# SECTION: EXPORTS ========================================================== #
|
|
39
40
|
|
|
40
41
|
|
|
41
42
|
__all__ = [
|
|
42
43
|
# Data Classes
|
|
43
|
-
'
|
|
44
|
-
# Functions
|
|
45
|
-
'load_pipeline_config',
|
|
44
|
+
'Config',
|
|
46
45
|
]
|
|
47
46
|
|
|
48
47
|
|
|
49
48
|
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
50
49
|
|
|
51
50
|
|
|
52
|
-
def
|
|
53
|
-
raw: StrAnyMap,
|
|
54
|
-
) -> list[JobConfig]:
|
|
55
|
-
"""
|
|
56
|
-
Return a list of ``JobConfig`` objects parsed from the mapping.
|
|
57
|
-
|
|
58
|
-
Parameters
|
|
59
|
-
----------
|
|
60
|
-
raw : StrAnyMap
|
|
61
|
-
Raw pipeline mapping.
|
|
62
|
-
|
|
63
|
-
Returns
|
|
64
|
-
-------
|
|
65
|
-
list[JobConfig]
|
|
66
|
-
Parsed job configurations.
|
|
67
|
-
"""
|
|
68
|
-
jobs: list[JobConfig] = []
|
|
69
|
-
for job_raw in raw.get('jobs', []) or []:
|
|
70
|
-
job_cfg = JobConfig.from_obj(job_raw)
|
|
71
|
-
if job_cfg is not None:
|
|
72
|
-
jobs.append(job_cfg)
|
|
73
|
-
|
|
74
|
-
return jobs
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
def _build_sources(
|
|
51
|
+
def _build_connectors(
|
|
78
52
|
raw: StrAnyMap,
|
|
53
|
+
*,
|
|
54
|
+
key: str,
|
|
79
55
|
) -> list[Connector]:
|
|
80
56
|
"""
|
|
81
|
-
|
|
57
|
+
Parse connector entries from a list under ``raw[key]``.
|
|
82
58
|
|
|
83
59
|
Parameters
|
|
84
60
|
----------
|
|
85
61
|
raw : StrAnyMap
|
|
86
62
|
Raw pipeline mapping.
|
|
63
|
+
key : str
|
|
64
|
+
Key pointing to connector entries (e.g., ``"sources"``).
|
|
87
65
|
|
|
88
66
|
Returns
|
|
89
67
|
-------
|
|
90
68
|
list[Connector]
|
|
91
|
-
Parsed
|
|
69
|
+
Parsed connector instances.
|
|
92
70
|
"""
|
|
93
|
-
return
|
|
71
|
+
return list(
|
|
72
|
+
_collect_parsed(raw.get(key, []) or [], _parse_connector_entry),
|
|
73
|
+
)
|
|
94
74
|
|
|
95
75
|
|
|
96
|
-
def
|
|
97
|
-
|
|
98
|
-
|
|
76
|
+
def _collect_parsed[T](
|
|
77
|
+
items: Any,
|
|
78
|
+
parser: Callable[[Any], T | None],
|
|
79
|
+
) -> list[T]:
|
|
99
80
|
"""
|
|
100
|
-
|
|
81
|
+
Collect parsed items from ``raw[key]`` using a tolerant parser.
|
|
101
82
|
|
|
102
83
|
Parameters
|
|
103
84
|
----------
|
|
104
|
-
|
|
105
|
-
|
|
85
|
+
items : Any
|
|
86
|
+
List-like payload to parse.
|
|
87
|
+
parser : Callable[[Any], T | None]
|
|
88
|
+
Parser that returns an instance or ``None`` for invalid entries.
|
|
106
89
|
|
|
107
90
|
Returns
|
|
108
91
|
-------
|
|
109
|
-
list[
|
|
110
|
-
Parsed
|
|
92
|
+
list[T]
|
|
93
|
+
Parsed items, excluding invalid entries.
|
|
111
94
|
"""
|
|
112
|
-
|
|
95
|
+
parsed_items: list[T] = []
|
|
96
|
+
for entry in items or []:
|
|
97
|
+
parsed = parser(entry)
|
|
98
|
+
if parsed is not None:
|
|
99
|
+
parsed_items.append(parsed)
|
|
100
|
+
return parsed_items
|
|
113
101
|
|
|
114
102
|
|
|
115
|
-
def
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
) -> list[Connector]:
|
|
103
|
+
def _parse_connector_entry(
|
|
104
|
+
obj: Any,
|
|
105
|
+
) -> Connector | None:
|
|
119
106
|
"""
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
Unknown or malformed entries are skipped to preserve permissiveness.
|
|
107
|
+
Parse a connector mapping into a concrete connector instance.
|
|
123
108
|
|
|
124
109
|
Parameters
|
|
125
110
|
----------
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
key : str
|
|
129
|
-
List-containing top-level key ("sources" or "targets").
|
|
111
|
+
obj : Any
|
|
112
|
+
Candidate connector mapping.
|
|
130
113
|
|
|
131
114
|
Returns
|
|
132
115
|
-------
|
|
133
|
-
|
|
134
|
-
|
|
116
|
+
Connector | None
|
|
117
|
+
Parsed connector instance or ``None`` when invalid.
|
|
135
118
|
"""
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
except TypeError:
|
|
143
|
-
# Skip unsupported types or malformed entries
|
|
144
|
-
continue
|
|
145
|
-
|
|
146
|
-
return items
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
# SECTION: FUNCTIONS ======================================================== #
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
def load_pipeline_config(
|
|
153
|
-
path: Path | str,
|
|
154
|
-
*,
|
|
155
|
-
substitute: bool = False,
|
|
156
|
-
env: Mapping[str, str] | None = None,
|
|
157
|
-
) -> PipelineConfig:
|
|
158
|
-
"""
|
|
159
|
-
Load a pipeline YAML file into a ``PipelineConfig`` instance.
|
|
160
|
-
|
|
161
|
-
Delegates to ``PipelineConfig.from_yaml`` for construction and optional
|
|
162
|
-
variable substitution.
|
|
163
|
-
"""
|
|
164
|
-
return PipelineConfig.from_yaml(path, substitute=substitute, env=env)
|
|
119
|
+
if not (entry := maybe_mapping(obj)):
|
|
120
|
+
return None
|
|
121
|
+
try:
|
|
122
|
+
return parse_connector(entry)
|
|
123
|
+
except TypeError:
|
|
124
|
+
return None
|
|
165
125
|
|
|
166
126
|
|
|
167
127
|
# SECTION: DATA CLASSES ===================================================== #
|
|
168
128
|
|
|
169
129
|
|
|
170
130
|
@dataclass(kw_only=True, slots=True)
|
|
171
|
-
class
|
|
131
|
+
class Config:
|
|
172
132
|
"""
|
|
173
133
|
Configuration for the data processing pipeline.
|
|
174
134
|
|
|
@@ -231,7 +191,7 @@ class PipelineConfig:
|
|
|
231
191
|
env: Mapping[str, str] | None = None,
|
|
232
192
|
) -> Self:
|
|
233
193
|
"""
|
|
234
|
-
Parse a YAML file into a ``
|
|
194
|
+
Parse a YAML file into a ``Config`` instance.
|
|
235
195
|
|
|
236
196
|
Parameters
|
|
237
197
|
----------
|
|
@@ -279,7 +239,7 @@ class PipelineConfig:
|
|
|
279
239
|
raw: StrAnyMap,
|
|
280
240
|
) -> Self:
|
|
281
241
|
"""
|
|
282
|
-
Parse a mapping into a ``
|
|
242
|
+
Parse a mapping into a ``Config`` instance.
|
|
283
243
|
|
|
284
244
|
Parameters
|
|
285
245
|
----------
|
|
@@ -311,17 +271,20 @@ class PipelineConfig:
|
|
|
311
271
|
file_systems = coerce_dict(raw.get('file_systems'))
|
|
312
272
|
|
|
313
273
|
# Sources
|
|
314
|
-
sources =
|
|
274
|
+
sources = _build_connectors(raw, key='sources')
|
|
315
275
|
|
|
316
276
|
# Validations/Transforms
|
|
317
277
|
validations = coerce_dict(raw.get('validations'))
|
|
318
278
|
transforms = coerce_dict(raw.get('transforms'))
|
|
319
279
|
|
|
320
280
|
# Targets
|
|
321
|
-
targets =
|
|
281
|
+
targets = _build_connectors(raw, key='targets')
|
|
322
282
|
|
|
323
283
|
# Jobs
|
|
324
|
-
jobs =
|
|
284
|
+
jobs: list[JobConfig] = _collect_parsed(
|
|
285
|
+
raw.get('jobs', []) or [],
|
|
286
|
+
JobConfig.from_obj,
|
|
287
|
+
)
|
|
325
288
|
|
|
326
289
|
# Table schemas (optional, tolerant pass-through structures).
|
|
327
290
|
table_schemas: list[dict[str, Any]] = []
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.connector` package.
|
|
3
|
+
|
|
4
|
+
Connector configuration types and enums.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from .api import ConnectorApi
|
|
10
|
+
from .api import ConnectorApiConfigDict
|
|
11
|
+
from .connector import Connector
|
|
12
|
+
from .core import ConnectorBase
|
|
13
|
+
from .core import ConnectorProtocol
|
|
14
|
+
from .database import ConnectorDb
|
|
15
|
+
from .database import ConnectorDbConfigDict
|
|
16
|
+
from .enums import DataConnectorType
|
|
17
|
+
from .file import ConnectorFile
|
|
18
|
+
from .file import ConnectorFileConfigDict
|
|
19
|
+
from .types import ConnectorType
|
|
20
|
+
from .utils import parse_connector
|
|
21
|
+
|
|
22
|
+
# SECTION: EXPORTS ========================================================== #
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
# Data Classes
|
|
27
|
+
'ConnectorApi',
|
|
28
|
+
'ConnectorDb',
|
|
29
|
+
'ConnectorFile',
|
|
30
|
+
# Enums
|
|
31
|
+
'DataConnectorType',
|
|
32
|
+
# Functions
|
|
33
|
+
'parse_connector',
|
|
34
|
+
# Type Aliases
|
|
35
|
+
'Connector',
|
|
36
|
+
'ConnectorBase',
|
|
37
|
+
'ConnectorProtocol',
|
|
38
|
+
'ConnectorType',
|
|
39
|
+
# Typed Dicts
|
|
40
|
+
'ConnectorApiConfigDict',
|
|
41
|
+
'ConnectorDbConfigDict',
|
|
42
|
+
'ConnectorFileConfigDict',
|
|
43
|
+
]
|
etlplus/connector/api.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.connector.api` module.
|
|
3
|
+
|
|
4
|
+
API connector configuration dataclass.
|
|
5
|
+
|
|
6
|
+
Notes
|
|
7
|
+
-----
|
|
8
|
+
- TypedDicts in this module are intentionally ``total=False`` and are not
|
|
9
|
+
enforced at runtime.
|
|
10
|
+
- :meth:`*.from_obj` constructors accept :class:`Mapping[str, Any]` and perform
|
|
11
|
+
tolerant parsing and light casting. This keeps the runtime permissive while
|
|
12
|
+
improving autocomplete and static analysis for contributors.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from dataclasses import dataclass
|
|
18
|
+
from dataclasses import field
|
|
19
|
+
from typing import Any
|
|
20
|
+
from typing import Self
|
|
21
|
+
from typing import TypedDict
|
|
22
|
+
from typing import overload
|
|
23
|
+
|
|
24
|
+
from ..api import PaginationConfig
|
|
25
|
+
from ..api import PaginationConfigDict
|
|
26
|
+
from ..api import RateLimitConfig
|
|
27
|
+
from ..api import RateLimitConfigDict
|
|
28
|
+
from ..types import StrAnyMap
|
|
29
|
+
from ..types import StrStrMap
|
|
30
|
+
from ..utils import cast_str_dict
|
|
31
|
+
from ..utils import coerce_dict
|
|
32
|
+
from ..utils import maybe_mapping
|
|
33
|
+
from .core import ConnectorBase
|
|
34
|
+
from .enums import DataConnectorType
|
|
35
|
+
from .types import ConnectorType
|
|
36
|
+
|
|
37
|
+
# SECTION: EXPORTS ========================================================== #
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
__all__ = [
|
|
41
|
+
'ConnectorApi',
|
|
42
|
+
'ConnectorApiConfigDict',
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# SECTION: TYPED DICTS ====================================================== #
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class ConnectorApiConfigDict(TypedDict, total=False):
|
|
50
|
+
"""
|
|
51
|
+
Shape accepted by :meth:`ConnectorApi.from_obj` (all keys optional).
|
|
52
|
+
|
|
53
|
+
See Also
|
|
54
|
+
--------
|
|
55
|
+
- :meth:`etlplus.connector.api.ConnectorApi.from_obj`
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
name: str
|
|
59
|
+
type: ConnectorType
|
|
60
|
+
url: str
|
|
61
|
+
method: str
|
|
62
|
+
headers: StrStrMap
|
|
63
|
+
query_params: StrAnyMap
|
|
64
|
+
pagination: PaginationConfigDict
|
|
65
|
+
rate_limit: RateLimitConfigDict
|
|
66
|
+
api: str
|
|
67
|
+
endpoint: str
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
# SECTION: DATA CLASSES ===================================================== #
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@dataclass(kw_only=True, slots=True)
|
|
74
|
+
class ConnectorApi(ConnectorBase):
|
|
75
|
+
"""
|
|
76
|
+
Configuration for an API-based data connector.
|
|
77
|
+
|
|
78
|
+
Attributes
|
|
79
|
+
----------
|
|
80
|
+
type : ConnectorType
|
|
81
|
+
Connector kind, always ``'api'``.
|
|
82
|
+
url : str | None
|
|
83
|
+
Direct absolute URL (when not using ``service``/``endpoint`` refs).
|
|
84
|
+
method : str | None
|
|
85
|
+
Optional HTTP method; typically omitted for sources (defaults to
|
|
86
|
+
GET) and used for targets (e.g., ``'post'``).
|
|
87
|
+
headers : dict[str, str]
|
|
88
|
+
Additional request headers.
|
|
89
|
+
query_params : dict[str, Any]
|
|
90
|
+
Default query parameters.
|
|
91
|
+
pagination : PaginationConfig | None
|
|
92
|
+
Pagination settings (optional).
|
|
93
|
+
rate_limit : RateLimitConfig | None
|
|
94
|
+
Rate limiting settings (optional).
|
|
95
|
+
api : str | None
|
|
96
|
+
Service reference into the pipeline ``apis`` block (a.k.a.
|
|
97
|
+
``service``).
|
|
98
|
+
endpoint : str | None
|
|
99
|
+
Endpoint name within the referenced service.
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
# -- Attributes -- #
|
|
103
|
+
|
|
104
|
+
type: ConnectorType = DataConnectorType.API
|
|
105
|
+
|
|
106
|
+
# Direct form
|
|
107
|
+
url: str | None = None
|
|
108
|
+
# Optional HTTP method; typically omitted for sources (defaults to GET)
|
|
109
|
+
# at runtime) and used for targets (e.g., 'post', 'put').
|
|
110
|
+
method: str | None = None
|
|
111
|
+
headers: dict[str, str] = field(default_factory=dict)
|
|
112
|
+
query_params: dict[str, Any] = field(default_factory=dict)
|
|
113
|
+
pagination: PaginationConfig | None = None
|
|
114
|
+
rate_limit: RateLimitConfig | None = None
|
|
115
|
+
|
|
116
|
+
# Reference form (to top-level APIs/endpoints)
|
|
117
|
+
api: str | None = None
|
|
118
|
+
endpoint: str | None = None
|
|
119
|
+
|
|
120
|
+
# -- Class Methods -- #
|
|
121
|
+
|
|
122
|
+
@classmethod
|
|
123
|
+
@overload
|
|
124
|
+
def from_obj(cls, obj: ConnectorApiConfigDict) -> Self: ...
|
|
125
|
+
|
|
126
|
+
@classmethod
|
|
127
|
+
@overload
|
|
128
|
+
def from_obj(cls, obj: StrAnyMap) -> Self: ...
|
|
129
|
+
|
|
130
|
+
@classmethod
|
|
131
|
+
def from_obj(
|
|
132
|
+
cls,
|
|
133
|
+
obj: StrAnyMap,
|
|
134
|
+
) -> Self:
|
|
135
|
+
"""
|
|
136
|
+
Parse a mapping into a ``ConnectorApi`` instance.
|
|
137
|
+
|
|
138
|
+
Parameters
|
|
139
|
+
----------
|
|
140
|
+
obj : StrAnyMap
|
|
141
|
+
Mapping with at least ``name``.
|
|
142
|
+
|
|
143
|
+
Returns
|
|
144
|
+
-------
|
|
145
|
+
Self
|
|
146
|
+
Parsed connector instance.
|
|
147
|
+
"""
|
|
148
|
+
name = cls._require_name(obj, kind='Api')
|
|
149
|
+
headers = cast_str_dict(maybe_mapping(obj.get('headers')))
|
|
150
|
+
|
|
151
|
+
return cls(
|
|
152
|
+
name=name,
|
|
153
|
+
url=obj.get('url'),
|
|
154
|
+
method=obj.get('method'),
|
|
155
|
+
headers=headers,
|
|
156
|
+
query_params=coerce_dict(obj.get('query_params')),
|
|
157
|
+
pagination=PaginationConfig.from_obj(obj.get('pagination')),
|
|
158
|
+
rate_limit=RateLimitConfig.from_obj(obj.get('rate_limit')),
|
|
159
|
+
api=obj.get('api') or obj.get('service'),
|
|
160
|
+
endpoint=obj.get('endpoint'),
|
|
161
|
+
)
|