etlplus 0.9.1__py3-none-any.whl → 0.9.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/README.md +37 -0
- etlplus/__init__.py +1 -26
- etlplus/api/README.md +51 -3
- etlplus/api/__init__.py +10 -0
- etlplus/api/config.py +39 -28
- etlplus/api/endpoint_client.py +3 -3
- etlplus/api/enums.py +51 -0
- etlplus/api/pagination/client.py +1 -1
- etlplus/api/rate_limiting/config.py +13 -1
- etlplus/api/rate_limiting/rate_limiter.py +8 -11
- etlplus/api/request_manager.py +11 -6
- etlplus/api/transport.py +14 -2
- etlplus/api/types.py +96 -6
- etlplus/{run_helpers.py → api/utils.py} +209 -153
- etlplus/cli/README.md +40 -0
- etlplus/cli/commands.py +76 -43
- etlplus/cli/constants.py +1 -1
- etlplus/cli/handlers.py +40 -12
- etlplus/cli/io.py +2 -2
- etlplus/cli/main.py +1 -1
- etlplus/cli/state.py +4 -7
- etlplus/database/README.md +48 -0
- etlplus/database/ddl.py +1 -1
- etlplus/database/engine.py +19 -3
- etlplus/database/orm.py +2 -0
- etlplus/database/schema.py +1 -1
- etlplus/enums.py +1 -157
- etlplus/file/README.md +105 -0
- etlplus/file/__init__.py +25 -0
- etlplus/file/_imports.py +141 -0
- etlplus/file/_io.py +160 -0
- etlplus/file/accdb.py +78 -0
- etlplus/file/arrow.py +78 -0
- etlplus/file/avro.py +176 -0
- etlplus/file/bson.py +77 -0
- etlplus/file/cbor.py +78 -0
- etlplus/file/cfg.py +79 -0
- etlplus/file/conf.py +80 -0
- etlplus/file/core.py +322 -0
- etlplus/file/csv.py +79 -0
- etlplus/file/dat.py +78 -0
- etlplus/file/dta.py +77 -0
- etlplus/file/duckdb.py +78 -0
- etlplus/file/enums.py +343 -0
- etlplus/file/feather.py +111 -0
- etlplus/file/fwf.py +77 -0
- etlplus/file/gz.py +123 -0
- etlplus/file/hbs.py +78 -0
- etlplus/file/hdf5.py +78 -0
- etlplus/file/ini.py +79 -0
- etlplus/file/ion.py +78 -0
- etlplus/file/jinja2.py +78 -0
- etlplus/file/json.py +98 -0
- etlplus/file/log.py +78 -0
- etlplus/file/mat.py +78 -0
- etlplus/file/mdb.py +78 -0
- etlplus/file/msgpack.py +78 -0
- etlplus/file/mustache.py +78 -0
- etlplus/file/nc.py +78 -0
- etlplus/file/ndjson.py +108 -0
- etlplus/file/numbers.py +75 -0
- etlplus/file/ods.py +79 -0
- etlplus/file/orc.py +111 -0
- etlplus/file/parquet.py +113 -0
- etlplus/file/pb.py +78 -0
- etlplus/file/pbf.py +77 -0
- etlplus/file/properties.py +78 -0
- etlplus/file/proto.py +77 -0
- etlplus/file/psv.py +79 -0
- etlplus/file/rda.py +78 -0
- etlplus/file/rds.py +78 -0
- etlplus/file/sas7bdat.py +78 -0
- etlplus/file/sav.py +77 -0
- etlplus/file/sqlite.py +78 -0
- etlplus/file/stub.py +84 -0
- etlplus/file/sylk.py +77 -0
- etlplus/file/tab.py +81 -0
- etlplus/file/toml.py +78 -0
- etlplus/file/tsv.py +80 -0
- etlplus/file/txt.py +102 -0
- etlplus/file/vm.py +78 -0
- etlplus/file/wks.py +77 -0
- etlplus/file/xls.py +88 -0
- etlplus/file/xlsm.py +79 -0
- etlplus/file/xlsx.py +99 -0
- etlplus/file/xml.py +185 -0
- etlplus/file/xpt.py +78 -0
- etlplus/file/yaml.py +95 -0
- etlplus/file/zip.py +175 -0
- etlplus/file/zsav.py +77 -0
- etlplus/ops/README.md +50 -0
- etlplus/ops/__init__.py +61 -0
- etlplus/{extract.py → ops/extract.py} +81 -99
- etlplus/{load.py → ops/load.py} +78 -101
- etlplus/{run.py → ops/run.py} +159 -127
- etlplus/{transform.py → ops/transform.py} +75 -68
- etlplus/{validation → ops}/utils.py +53 -17
- etlplus/{validate.py → ops/validate.py} +22 -12
- etlplus/templates/README.md +46 -0
- etlplus/types.py +5 -4
- etlplus/utils.py +136 -2
- etlplus/workflow/README.md +52 -0
- etlplus/{config → workflow}/__init__.py +10 -23
- etlplus/{config → workflow}/connector.py +58 -44
- etlplus/workflow/dag.py +105 -0
- etlplus/{config → workflow}/jobs.py +105 -32
- etlplus/{config → workflow}/pipeline.py +59 -51
- etlplus/{config → workflow}/profile.py +8 -5
- etlplus/workflow/types.py +115 -0
- {etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/METADATA +210 -17
- etlplus-0.9.2.dist-info/RECORD +134 -0
- {etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/WHEEL +1 -1
- etlplus/config/types.py +0 -204
- etlplus/config/utils.py +0 -120
- etlplus/file.py +0 -657
- etlplus/validation/__init__.py +0 -44
- etlplus-0.9.1.dist-info/RECORD +0 -65
- {etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/entry_points.txt +0 -0
- {etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/top_level.txt +0 -0
etlplus/workflow/dag.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.workflow.dag` module.
|
|
3
|
+
|
|
4
|
+
Lightweight directed acyclic graph (DAG) helpers for ordering jobs based on
|
|
5
|
+
:attr:`depends_on`.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from collections import deque
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
|
|
13
|
+
from .jobs import JobConfig
|
|
14
|
+
|
|
15
|
+
# SECTION: EXPORTS ========================================================== #
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
# Errors
|
|
20
|
+
'DagError',
|
|
21
|
+
# Functions
|
|
22
|
+
'topological_sort_jobs',
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# SECTION: ERRORS =========================================================== #
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass(slots=True)
|
|
30
|
+
class DagError(ValueError):
|
|
31
|
+
"""
|
|
32
|
+
Raised when the job dependency graph is invalid.
|
|
33
|
+
|
|
34
|
+
Attributes
|
|
35
|
+
----------
|
|
36
|
+
message : str
|
|
37
|
+
Error message.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
# -- Attributes -- #
|
|
41
|
+
|
|
42
|
+
message: str
|
|
43
|
+
|
|
44
|
+
# -- Magic Methods (Object Representation) -- #
|
|
45
|
+
|
|
46
|
+
def __str__(self) -> str:
|
|
47
|
+
return self.message
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def topological_sort_jobs(
|
|
54
|
+
jobs: list[JobConfig],
|
|
55
|
+
) -> list[JobConfig]:
|
|
56
|
+
"""
|
|
57
|
+
Return jobs in topological order based on :attr:`depends_on`.
|
|
58
|
+
|
|
59
|
+
Parameters
|
|
60
|
+
----------
|
|
61
|
+
jobs : list[JobConfig]
|
|
62
|
+
List of job configurations to sort.
|
|
63
|
+
|
|
64
|
+
Returns
|
|
65
|
+
-------
|
|
66
|
+
list[JobConfig]
|
|
67
|
+
Jobs sorted in topological order.
|
|
68
|
+
|
|
69
|
+
Raises
|
|
70
|
+
------
|
|
71
|
+
DagError
|
|
72
|
+
If a dependency is missing, self-referential, or when a cycle is
|
|
73
|
+
detected.
|
|
74
|
+
"""
|
|
75
|
+
index = {job.name: job for job in jobs}
|
|
76
|
+
edges: dict[str, set[str]] = {name: set() for name in index}
|
|
77
|
+
indegree: dict[str, int] = {name: 0 for name in index}
|
|
78
|
+
|
|
79
|
+
for job in jobs:
|
|
80
|
+
for dep in job.depends_on:
|
|
81
|
+
if dep not in index:
|
|
82
|
+
raise DagError(
|
|
83
|
+
f'Unknown dependency "{dep}" in job "{job.name}"',
|
|
84
|
+
)
|
|
85
|
+
if dep == job.name:
|
|
86
|
+
raise DagError(f'Job "{job.name}" depends on itself')
|
|
87
|
+
if job.name not in edges[dep]:
|
|
88
|
+
edges[dep].add(job.name)
|
|
89
|
+
indegree[job.name] += 1
|
|
90
|
+
|
|
91
|
+
queue = deque(sorted(name for name, deg in indegree.items() if deg == 0))
|
|
92
|
+
ordered: list[str] = []
|
|
93
|
+
|
|
94
|
+
while queue:
|
|
95
|
+
name = queue.popleft()
|
|
96
|
+
ordered.append(name)
|
|
97
|
+
for child in sorted(edges[name]):
|
|
98
|
+
indegree[child] -= 1
|
|
99
|
+
if indegree[child] == 0:
|
|
100
|
+
queue.append(child)
|
|
101
|
+
|
|
102
|
+
if len(ordered) != len(jobs):
|
|
103
|
+
raise DagError('Dependency cycle detected')
|
|
104
|
+
|
|
105
|
+
return [index[name] for name in ordered]
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
"""
|
|
2
|
-
:mod:`etlplus.
|
|
2
|
+
:mod:`etlplus.workflow.jobs` module.
|
|
3
3
|
|
|
4
4
|
Data classes modeling job orchestration references (extract, validate,
|
|
5
5
|
transform, load).
|
|
6
6
|
|
|
7
7
|
Notes
|
|
8
8
|
-----
|
|
9
|
-
- Lightweight references used inside
|
|
9
|
+
- Lightweight references used inside :class:`PipelineConfig` to avoid storing
|
|
10
10
|
large nested structures.
|
|
11
11
|
- All attributes are simple and optional where appropriate, keeping parsing
|
|
12
12
|
tolerant.
|
|
@@ -19,6 +19,7 @@ from dataclasses import field
|
|
|
19
19
|
from typing import Any
|
|
20
20
|
from typing import Self
|
|
21
21
|
|
|
22
|
+
from ..types import StrAnyMap
|
|
22
23
|
from ..utils import coerce_dict
|
|
23
24
|
from ..utils import maybe_mapping
|
|
24
25
|
|
|
@@ -26,6 +27,7 @@ from ..utils import maybe_mapping
|
|
|
26
27
|
|
|
27
28
|
|
|
28
29
|
__all__ = [
|
|
30
|
+
# Data Classes
|
|
29
31
|
'ExtractRef',
|
|
30
32
|
'JobConfig',
|
|
31
33
|
'LoadRef',
|
|
@@ -34,10 +36,76 @@ __all__ = [
|
|
|
34
36
|
]
|
|
35
37
|
|
|
36
38
|
|
|
37
|
-
# SECTION:
|
|
39
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
38
40
|
|
|
39
41
|
|
|
40
|
-
|
|
42
|
+
def _coerce_optional_str(value: Any) -> str | None:
|
|
43
|
+
"""
|
|
44
|
+
Normalize optional string values, coercing non-strings when needed.
|
|
45
|
+
|
|
46
|
+
Parameters
|
|
47
|
+
----------
|
|
48
|
+
value : Any
|
|
49
|
+
Optional value to normalize.
|
|
50
|
+
|
|
51
|
+
Returns
|
|
52
|
+
-------
|
|
53
|
+
str | None
|
|
54
|
+
``None`` when ``value`` is ``None``; otherwise a string value.
|
|
55
|
+
"""
|
|
56
|
+
if value is None:
|
|
57
|
+
return None
|
|
58
|
+
return value if isinstance(value, str) else str(value)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _parse_depends_on(
|
|
62
|
+
value: Any,
|
|
63
|
+
) -> list[str]:
|
|
64
|
+
"""
|
|
65
|
+
Normalize dependency declarations into a string list.
|
|
66
|
+
|
|
67
|
+
Parameters
|
|
68
|
+
----------
|
|
69
|
+
value : Any
|
|
70
|
+
Input dependency specification (string or list of strings).
|
|
71
|
+
|
|
72
|
+
Returns
|
|
73
|
+
-------
|
|
74
|
+
list[str]
|
|
75
|
+
Normalized dependency list.
|
|
76
|
+
"""
|
|
77
|
+
if isinstance(value, str):
|
|
78
|
+
return [value]
|
|
79
|
+
if isinstance(value, list):
|
|
80
|
+
return [entry for entry in value if isinstance(entry, str)]
|
|
81
|
+
return []
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _require_str(
|
|
85
|
+
# data: dict[str, Any],
|
|
86
|
+
data: StrAnyMap,
|
|
87
|
+
key: str,
|
|
88
|
+
) -> str | None:
|
|
89
|
+
"""
|
|
90
|
+
Extract a required string field from a mapping.
|
|
91
|
+
|
|
92
|
+
Parameters
|
|
93
|
+
----------
|
|
94
|
+
data : StrAnyMap
|
|
95
|
+
Mapping containing the target field.
|
|
96
|
+
key : str
|
|
97
|
+
Field name to extract.
|
|
98
|
+
|
|
99
|
+
Returns
|
|
100
|
+
-------
|
|
101
|
+
str | None
|
|
102
|
+
The string value when present and valid; otherwise ``None``.
|
|
103
|
+
"""
|
|
104
|
+
value = data.get(key)
|
|
105
|
+
return value if isinstance(value, str) else None
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
# SECTION: DATA CLASSES ===================================================== #
|
|
41
109
|
|
|
42
110
|
|
|
43
111
|
@dataclass(kw_only=True, slots=True)
|
|
@@ -65,12 +133,13 @@ class ExtractRef:
|
|
|
65
133
|
cls,
|
|
66
134
|
obj: Any,
|
|
67
135
|
) -> Self | None:
|
|
68
|
-
"""
|
|
136
|
+
"""
|
|
137
|
+
Parse a mapping into an :class:`ExtractRef` instance.
|
|
69
138
|
|
|
70
139
|
Parameters
|
|
71
140
|
----------
|
|
72
141
|
obj : Any
|
|
73
|
-
Mapping with
|
|
142
|
+
Mapping with :attr:`source` and optional :attr:`options`.
|
|
74
143
|
|
|
75
144
|
Returns
|
|
76
145
|
-------
|
|
@@ -80,8 +149,8 @@ class ExtractRef:
|
|
|
80
149
|
data = maybe_mapping(obj)
|
|
81
150
|
if not data:
|
|
82
151
|
return None
|
|
83
|
-
source = data
|
|
84
|
-
if
|
|
152
|
+
source = _require_str(data, 'source')
|
|
153
|
+
if source is None:
|
|
85
154
|
return None
|
|
86
155
|
return cls(
|
|
87
156
|
source=source,
|
|
@@ -100,6 +169,8 @@ class JobConfig:
|
|
|
100
169
|
Unique job name.
|
|
101
170
|
description : str | None
|
|
102
171
|
Optional human-friendly description.
|
|
172
|
+
depends_on : list[str]
|
|
173
|
+
Optional job dependency list. Dependencies must refer to other jobs.
|
|
103
174
|
extract : ExtractRef | None
|
|
104
175
|
Extraction reference.
|
|
105
176
|
validate : ValidationRef | None
|
|
@@ -114,6 +185,7 @@ class JobConfig:
|
|
|
114
185
|
|
|
115
186
|
name: str
|
|
116
187
|
description: str | None = None
|
|
188
|
+
depends_on: list[str] = field(default_factory=list)
|
|
117
189
|
extract: ExtractRef | None = None
|
|
118
190
|
validate: ValidationRef | None = None
|
|
119
191
|
transform: TransformRef | None = None
|
|
@@ -126,7 +198,8 @@ class JobConfig:
|
|
|
126
198
|
cls,
|
|
127
199
|
obj: Any,
|
|
128
200
|
) -> Self | None:
|
|
129
|
-
"""
|
|
201
|
+
"""
|
|
202
|
+
Parse a mapping into a :class:`JobConfig` instance.
|
|
130
203
|
|
|
131
204
|
Parameters
|
|
132
205
|
----------
|
|
@@ -141,17 +214,18 @@ class JobConfig:
|
|
|
141
214
|
data = maybe_mapping(obj)
|
|
142
215
|
if not data:
|
|
143
216
|
return None
|
|
144
|
-
name = data
|
|
145
|
-
if
|
|
217
|
+
name = _require_str(data, 'name')
|
|
218
|
+
if name is None:
|
|
146
219
|
return None
|
|
147
220
|
|
|
148
|
-
description = data.get('description')
|
|
149
|
-
|
|
150
|
-
|
|
221
|
+
description = _coerce_optional_str(data.get('description'))
|
|
222
|
+
|
|
223
|
+
depends_on = _parse_depends_on(data.get('depends_on'))
|
|
151
224
|
|
|
152
225
|
return cls(
|
|
153
226
|
name=name,
|
|
154
227
|
description=description,
|
|
228
|
+
depends_on=depends_on,
|
|
155
229
|
extract=ExtractRef.from_obj(data.get('extract')),
|
|
156
230
|
validate=ValidationRef.from_obj(data.get('validate')),
|
|
157
231
|
transform=TransformRef.from_obj(data.get('transform')),
|
|
@@ -184,12 +258,13 @@ class LoadRef:
|
|
|
184
258
|
cls,
|
|
185
259
|
obj: Any,
|
|
186
260
|
) -> Self | None:
|
|
187
|
-
"""
|
|
261
|
+
"""
|
|
262
|
+
Parse a mapping into a :class:`LoadRef` instance.
|
|
188
263
|
|
|
189
264
|
Parameters
|
|
190
265
|
----------
|
|
191
266
|
obj : Any
|
|
192
|
-
Mapping with
|
|
267
|
+
Mapping with :attr:`target` and optional :attr:`overrides`.
|
|
193
268
|
|
|
194
269
|
Returns
|
|
195
270
|
-------
|
|
@@ -199,8 +274,8 @@ class LoadRef:
|
|
|
199
274
|
data = maybe_mapping(obj)
|
|
200
275
|
if not data:
|
|
201
276
|
return None
|
|
202
|
-
target = data
|
|
203
|
-
if
|
|
277
|
+
target = _require_str(data, 'target')
|
|
278
|
+
if target is None:
|
|
204
279
|
return None
|
|
205
280
|
return cls(
|
|
206
281
|
target=target,
|
|
@@ -230,12 +305,13 @@ class TransformRef:
|
|
|
230
305
|
cls,
|
|
231
306
|
obj: Any,
|
|
232
307
|
) -> Self | None:
|
|
233
|
-
"""
|
|
308
|
+
"""
|
|
309
|
+
Parse a mapping into a :class:`TransformRef` instance.
|
|
234
310
|
|
|
235
311
|
Parameters
|
|
236
312
|
----------
|
|
237
313
|
obj : Any
|
|
238
|
-
Mapping with
|
|
314
|
+
Mapping with :attr:`pipeline`.
|
|
239
315
|
|
|
240
316
|
Returns
|
|
241
317
|
-------
|
|
@@ -245,8 +321,8 @@ class TransformRef:
|
|
|
245
321
|
data = maybe_mapping(obj)
|
|
246
322
|
if not data:
|
|
247
323
|
return None
|
|
248
|
-
pipeline = data
|
|
249
|
-
if
|
|
324
|
+
pipeline = _require_str(data, 'pipeline')
|
|
325
|
+
if pipeline is None:
|
|
250
326
|
return None
|
|
251
327
|
return cls(pipeline=pipeline)
|
|
252
328
|
|
|
@@ -280,12 +356,13 @@ class ValidationRef:
|
|
|
280
356
|
cls,
|
|
281
357
|
obj: Any,
|
|
282
358
|
) -> Self | None:
|
|
283
|
-
"""
|
|
359
|
+
"""
|
|
360
|
+
Parse a mapping into a :class:`ValidationRef` instance.
|
|
284
361
|
|
|
285
362
|
Parameters
|
|
286
363
|
----------
|
|
287
364
|
obj : Any
|
|
288
|
-
Mapping with
|
|
365
|
+
Mapping with :attr:`ruleset` plus optional metadata.
|
|
289
366
|
|
|
290
367
|
Returns
|
|
291
368
|
-------
|
|
@@ -295,15 +372,11 @@ class ValidationRef:
|
|
|
295
372
|
data = maybe_mapping(obj)
|
|
296
373
|
if not data:
|
|
297
374
|
return None
|
|
298
|
-
ruleset = data
|
|
299
|
-
if
|
|
375
|
+
ruleset = _require_str(data, 'ruleset')
|
|
376
|
+
if ruleset is None:
|
|
300
377
|
return None
|
|
301
|
-
severity = data.get('severity')
|
|
302
|
-
|
|
303
|
-
severity = str(severity)
|
|
304
|
-
phase = data.get('phase')
|
|
305
|
-
if phase is not None and not isinstance(phase, str):
|
|
306
|
-
phase = str(phase)
|
|
378
|
+
severity = _coerce_optional_str(data.get('severity'))
|
|
379
|
+
phase = _coerce_optional_str(data.get('phase'))
|
|
307
380
|
return cls(
|
|
308
381
|
ruleset=ruleset,
|
|
309
382
|
severity=severity,
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
:mod:`etlplus.
|
|
2
|
+
:mod:`etlplus.workflow.pipeline` module.
|
|
3
3
|
|
|
4
4
|
Pipeline configuration model and helpers for job orchestration.
|
|
5
5
|
|
|
@@ -16,6 +16,7 @@ Notes
|
|
|
16
16
|
from __future__ import annotations
|
|
17
17
|
|
|
18
18
|
import os
|
|
19
|
+
from collections.abc import Callable
|
|
19
20
|
from collections.abc import Mapping
|
|
20
21
|
from dataclasses import dataclass
|
|
21
22
|
from dataclasses import field
|
|
@@ -24,72 +25,90 @@ from typing import Any
|
|
|
24
25
|
from typing import Self
|
|
25
26
|
|
|
26
27
|
from ..api import ApiConfig
|
|
27
|
-
from ..enums import FileFormat
|
|
28
28
|
from ..file import File
|
|
29
|
+
from ..file import FileFormat
|
|
29
30
|
from ..types import StrAnyMap
|
|
30
31
|
from ..utils import coerce_dict
|
|
32
|
+
from ..utils import deep_substitute
|
|
31
33
|
from ..utils import maybe_mapping
|
|
32
34
|
from .connector import Connector
|
|
33
35
|
from .connector import parse_connector
|
|
34
36
|
from .jobs import JobConfig
|
|
35
37
|
from .profile import ProfileConfig
|
|
36
|
-
from .utils import deep_substitute
|
|
37
38
|
|
|
38
39
|
# SECTION: EXPORTS ========================================================== #
|
|
39
40
|
|
|
40
41
|
|
|
41
|
-
__all__ = [
|
|
42
|
+
__all__ = [
|
|
43
|
+
# Data Classes
|
|
44
|
+
'PipelineConfig',
|
|
45
|
+
# Functions
|
|
46
|
+
'load_pipeline_config',
|
|
47
|
+
]
|
|
42
48
|
|
|
43
49
|
|
|
44
|
-
|
|
50
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _collect_parsed[T](
|
|
45
54
|
raw: StrAnyMap,
|
|
46
|
-
|
|
55
|
+
key: str,
|
|
56
|
+
parser: Callable[[Any], T | None],
|
|
57
|
+
) -> list[T]:
|
|
47
58
|
"""
|
|
48
|
-
|
|
59
|
+
Collect parsed items from ``raw[key]`` using a tolerant parser.
|
|
49
60
|
|
|
50
61
|
Parameters
|
|
51
62
|
----------
|
|
52
63
|
raw : StrAnyMap
|
|
53
64
|
Raw pipeline mapping.
|
|
65
|
+
key : str
|
|
66
|
+
Key pointing to a list-like payload.
|
|
67
|
+
parser : Callable[[Any], T | None]
|
|
68
|
+
Parser that returns an instance or ``None`` for invalid entries.
|
|
54
69
|
|
|
55
70
|
Returns
|
|
56
71
|
-------
|
|
57
|
-
list[
|
|
58
|
-
Parsed
|
|
72
|
+
list[T]
|
|
73
|
+
Parsed items, excluding invalid entries.
|
|
59
74
|
"""
|
|
60
|
-
|
|
61
|
-
for
|
|
62
|
-
|
|
63
|
-
if
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
return jobs
|
|
75
|
+
items: list[T] = []
|
|
76
|
+
for entry in raw.get(key, []) or []:
|
|
77
|
+
parsed = parser(entry)
|
|
78
|
+
if parsed is not None:
|
|
79
|
+
items.append(parsed)
|
|
80
|
+
return items
|
|
67
81
|
|
|
68
82
|
|
|
69
|
-
def
|
|
70
|
-
|
|
71
|
-
) ->
|
|
83
|
+
def _parse_connector_entry(
|
|
84
|
+
obj: Any,
|
|
85
|
+
) -> Connector | None:
|
|
72
86
|
"""
|
|
73
|
-
|
|
87
|
+
Parse a connector mapping into a concrete connector instance.
|
|
74
88
|
|
|
75
89
|
Parameters
|
|
76
90
|
----------
|
|
77
|
-
|
|
78
|
-
|
|
91
|
+
obj : Any
|
|
92
|
+
Candidate connector mapping.
|
|
79
93
|
|
|
80
94
|
Returns
|
|
81
95
|
-------
|
|
82
|
-
|
|
83
|
-
Parsed
|
|
96
|
+
Connector | None
|
|
97
|
+
Parsed connector instance or ``None`` when invalid.
|
|
84
98
|
"""
|
|
85
|
-
|
|
99
|
+
if not (entry := maybe_mapping(obj)):
|
|
100
|
+
return None
|
|
101
|
+
try:
|
|
102
|
+
return parse_connector(entry)
|
|
103
|
+
except TypeError:
|
|
104
|
+
return None
|
|
86
105
|
|
|
87
106
|
|
|
88
|
-
def
|
|
107
|
+
def _build_sources(
|
|
89
108
|
raw: StrAnyMap,
|
|
90
109
|
) -> list[Connector]:
|
|
91
110
|
"""
|
|
92
|
-
Return a list of
|
|
111
|
+
Return a list of source connectors parsed from the mapping.
|
|
93
112
|
|
|
94
113
|
Parameters
|
|
95
114
|
----------
|
|
@@ -99,43 +118,32 @@ def _build_targets(
|
|
|
99
118
|
Returns
|
|
100
119
|
-------
|
|
101
120
|
list[Connector]
|
|
102
|
-
Parsed
|
|
121
|
+
Parsed source connectors.
|
|
103
122
|
"""
|
|
104
|
-
return
|
|
123
|
+
return list(
|
|
124
|
+
_collect_parsed(raw, 'sources', _parse_connector_entry),
|
|
125
|
+
)
|
|
105
126
|
|
|
106
127
|
|
|
107
|
-
def
|
|
128
|
+
def _build_targets(
|
|
108
129
|
raw: StrAnyMap,
|
|
109
|
-
key: str,
|
|
110
130
|
) -> list[Connector]:
|
|
111
131
|
"""
|
|
112
|
-
Return
|
|
113
|
-
|
|
114
|
-
Unknown or malformed entries are skipped to preserve permissiveness.
|
|
132
|
+
Return a list of target connectors parsed from the mapping.
|
|
115
133
|
|
|
116
134
|
Parameters
|
|
117
135
|
----------
|
|
118
136
|
raw : StrAnyMap
|
|
119
137
|
Raw pipeline mapping.
|
|
120
|
-
key : str
|
|
121
|
-
List-containing top-level key ("sources" or "targets").
|
|
122
138
|
|
|
123
139
|
Returns
|
|
124
140
|
-------
|
|
125
141
|
list[Connector]
|
|
126
|
-
|
|
142
|
+
Parsed target connectors.
|
|
127
143
|
"""
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
continue
|
|
132
|
-
try:
|
|
133
|
-
items.append(parse_connector(entry))
|
|
134
|
-
except TypeError:
|
|
135
|
-
# Skip unsupported types or malformed entries
|
|
136
|
-
continue
|
|
137
|
-
|
|
138
|
-
return items
|
|
144
|
+
return list(
|
|
145
|
+
_collect_parsed(raw, 'targets', _parse_connector_entry),
|
|
146
|
+
)
|
|
139
147
|
|
|
140
148
|
|
|
141
149
|
# SECTION: FUNCTIONS ======================================================== #
|
|
@@ -156,7 +164,7 @@ def load_pipeline_config(
|
|
|
156
164
|
return PipelineConfig.from_yaml(path, substitute=substitute, env=env)
|
|
157
165
|
|
|
158
166
|
|
|
159
|
-
# SECTION: CLASSES
|
|
167
|
+
# SECTION: DATA CLASSES ===================================================== #
|
|
160
168
|
|
|
161
169
|
|
|
162
170
|
@dataclass(kw_only=True, slots=True)
|
|
@@ -246,7 +254,7 @@ class PipelineConfig:
|
|
|
246
254
|
TypeError
|
|
247
255
|
If the YAML root is not a mapping/object.
|
|
248
256
|
"""
|
|
249
|
-
raw = File(Path(path), FileFormat.YAML).
|
|
257
|
+
raw = File(Path(path), FileFormat.YAML).read()
|
|
250
258
|
if not isinstance(raw, dict):
|
|
251
259
|
raise TypeError('Pipeline YAML must have a mapping/object root')
|
|
252
260
|
|
|
@@ -313,7 +321,7 @@ class PipelineConfig:
|
|
|
313
321
|
targets = _build_targets(raw)
|
|
314
322
|
|
|
315
323
|
# Jobs
|
|
316
|
-
jobs =
|
|
324
|
+
jobs = _collect_parsed(raw, 'jobs', JobConfig.from_obj)
|
|
317
325
|
|
|
318
326
|
# Table schemas (optional, tolerant pass-through structures).
|
|
319
327
|
table_schemas: list[dict[str, Any]] = []
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
:mod:`etlplus.
|
|
2
|
+
:mod:`etlplus.workflow.profile` module.
|
|
3
3
|
|
|
4
4
|
Profile model for pipeline-level defaults and environment.
|
|
5
5
|
|
|
@@ -22,10 +22,13 @@ from ..utils import cast_str_dict
|
|
|
22
22
|
# SECTION: EXPORTS ========================================================== #
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
__all__ = [
|
|
25
|
+
__all__ = [
|
|
26
|
+
# Data Classes
|
|
27
|
+
'ProfileConfig',
|
|
28
|
+
]
|
|
26
29
|
|
|
27
30
|
|
|
28
|
-
# SECTION: CLASSES
|
|
31
|
+
# SECTION: DATA CLASSES ===================================================== #
|
|
29
32
|
|
|
30
33
|
|
|
31
34
|
@dataclass(kw_only=True, slots=True)
|
|
@@ -53,7 +56,7 @@ class ProfileConfig:
|
|
|
53
56
|
cls,
|
|
54
57
|
obj: StrAnyMap | None,
|
|
55
58
|
) -> Self:
|
|
56
|
-
"""Parse a mapping into a
|
|
59
|
+
"""Parse a mapping into a :class:`ProfileConfig` instance.
|
|
57
60
|
|
|
58
61
|
Parameters
|
|
59
62
|
----------
|
|
@@ -64,7 +67,7 @@ class ProfileConfig:
|
|
|
64
67
|
-------
|
|
65
68
|
Self
|
|
66
69
|
Parsed profile configuration; non-mapping input yields a default
|
|
67
|
-
instance. All
|
|
70
|
+
instance. All :attr:`env` values are coerced to strings.
|
|
68
71
|
"""
|
|
69
72
|
if not isinstance(obj, Mapping):
|
|
70
73
|
return cls()
|