etlplus 0.12.12__py3-none-any.whl → 0.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/README.md +2 -2
- etlplus/__init__.py +1 -26
- etlplus/api/README.md +2 -2
- etlplus/api/__init__.py +10 -0
- etlplus/api/config.py +36 -20
- etlplus/api/endpoint_client.py +3 -3
- etlplus/api/enums.py +51 -0
- etlplus/api/pagination/client.py +1 -1
- etlplus/api/rate_limiting/config.py +13 -1
- etlplus/api/rate_limiting/rate_limiter.py +8 -11
- etlplus/api/request_manager.py +11 -6
- etlplus/api/transport.py +14 -2
- etlplus/api/types.py +7 -6
- etlplus/{run_helpers.py → api/utils.py} +209 -153
- etlplus/cli/README.md +2 -2
- etlplus/cli/handlers.py +19 -9
- etlplus/config/README.md +31 -33
- etlplus/config/__init__.py +9 -32
- etlplus/config/types.py +0 -64
- etlplus/dag.py +103 -0
- etlplus/database/README.md +2 -2
- etlplus/enums.py +0 -32
- etlplus/file/README.md +2 -2
- etlplus/file/enums.py +1 -1
- etlplus/{validation → ops}/README.md +2 -2
- etlplus/ops/__init__.py +61 -0
- etlplus/{extract.py → ops/extract.py} +78 -94
- etlplus/{load.py → ops/load.py} +73 -93
- etlplus/{run.py → ops/run.py} +153 -118
- etlplus/{transform.py → ops/transform.py} +75 -68
- etlplus/{validation → ops}/utils.py +80 -15
- etlplus/{validate.py → ops/validate.py} +19 -9
- etlplus/templates/README.md +2 -2
- etlplus/types.py +2 -2
- etlplus/workflow/README.md +52 -0
- etlplus/workflow/__init__.py +43 -0
- etlplus/{config → workflow}/connector.py +17 -16
- etlplus/workflow/dag.py +105 -0
- etlplus/{config → workflow}/jobs.py +31 -15
- etlplus/{config → workflow}/pipeline.py +11 -3
- etlplus/{config → workflow}/profile.py +8 -5
- etlplus/workflow/types.py +115 -0
- {etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/METADATA +91 -60
- {etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/RECORD +49 -43
- {etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/WHEEL +1 -1
- etlplus/validation/__init__.py +0 -44
- /etlplus/{config → workflow}/utils.py +0 -0
- {etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/entry_points.txt +0 -0
- {etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
:mod:`etlplus.
|
|
2
|
+
:mod:`etlplus.ops.validate` module.
|
|
3
3
|
|
|
4
4
|
Validate dicts and lists of dicts using simple, schema-like rules.
|
|
5
5
|
|
|
@@ -34,11 +34,11 @@ from typing import Final
|
|
|
34
34
|
from typing import Literal
|
|
35
35
|
from typing import TypedDict
|
|
36
36
|
|
|
37
|
+
from ..types import JSONData
|
|
38
|
+
from ..types import Record
|
|
39
|
+
from ..types import StrAnyMap
|
|
40
|
+
from ..types import StrPath
|
|
37
41
|
from .load import load_data
|
|
38
|
-
from .types import JSONData
|
|
39
|
-
from .types import Record
|
|
40
|
-
from .types import StrAnyMap
|
|
41
|
-
from .types import StrPath
|
|
42
42
|
|
|
43
43
|
# SECTION: EXPORTS ========================================================== #
|
|
44
44
|
|
|
@@ -279,11 +279,15 @@ def _type_matches(
|
|
|
279
279
|
bool
|
|
280
280
|
``True`` if the value matches the expected type; ``False`` if not.
|
|
281
281
|
"""
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
282
|
+
if expected == 'number':
|
|
283
|
+
return _is_number(value)
|
|
284
|
+
if expected == 'integer':
|
|
285
|
+
return isinstance(value, int) and not isinstance(value, bool)
|
|
286
|
+
if expected == 'boolean':
|
|
287
|
+
return isinstance(value, bool)
|
|
285
288
|
|
|
286
|
-
|
|
289
|
+
py_type = TYPE_MAP.get(expected)
|
|
290
|
+
return isinstance(value, py_type) if py_type else False
|
|
287
291
|
|
|
288
292
|
|
|
289
293
|
def _validate_record(
|
|
@@ -330,6 +334,9 @@ def _validate_record(
|
|
|
330
334
|
# SECTION: FUNCTIONS ======================================================== #
|
|
331
335
|
|
|
332
336
|
|
|
337
|
+
# -- Helpers -- #
|
|
338
|
+
|
|
339
|
+
|
|
333
340
|
def validate_field(
|
|
334
341
|
value: Any,
|
|
335
342
|
rules: StrAnyMap | FieldRules,
|
|
@@ -425,6 +432,9 @@ def validate_field(
|
|
|
425
432
|
return {'valid': len(errors) == 0, 'errors': errors}
|
|
426
433
|
|
|
427
434
|
|
|
435
|
+
# -- Orchestration -- #
|
|
436
|
+
|
|
437
|
+
|
|
428
438
|
def validate(
|
|
429
439
|
source: StrPath | JSONData,
|
|
430
440
|
rules: RulesMap | None = None,
|
etlplus/templates/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# etlplus.templates
|
|
1
|
+
# `etlplus.templates` Subpackage
|
|
2
2
|
|
|
3
3
|
Documentation for the `etlplus.templates` subpackage: SQL and DDL template helpers.
|
|
4
4
|
|
|
@@ -8,7 +8,7 @@ Documentation for the `etlplus.templates` subpackage: SQL and DDL template helpe
|
|
|
8
8
|
|
|
9
9
|
Back to project overview: see the top-level [README](../../README.md).
|
|
10
10
|
|
|
11
|
-
- [etlplus.templates
|
|
11
|
+
- [`etlplus.templates` Subpackage](#etlplus-templates-subpackage)
|
|
12
12
|
- [Available Templates](#available-templates)
|
|
13
13
|
- [Rendering Templates](#rendering-templates)
|
|
14
14
|
- [Example: Rendering a DDL Template](#example-rendering-a-ddl-template)
|
etlplus/types.py
CHANGED
|
@@ -193,8 +193,8 @@ type AggregateSpec = StrAnyMap
|
|
|
193
193
|
|
|
194
194
|
# -- Pipelines-- #
|
|
195
195
|
|
|
196
|
-
# Unified pipeline step spec consumed by :mod:`etlplus.transform`.
|
|
197
|
-
type StepSpec = FilterSpec | MapSpec | SelectSpec | SortSpec
|
|
196
|
+
# Unified pipeline step spec consumed by :mod:`etlplus.ops.transform`.
|
|
197
|
+
type StepSpec = AggregateSpec | FilterSpec | MapSpec | SelectSpec | SortSpec
|
|
198
198
|
|
|
199
199
|
# Collections of steps
|
|
200
200
|
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# `etlplus.workflow` Subpackage
|
|
2
|
+
|
|
3
|
+
Documentation for the `etlplus.workflow` subpackage: configuration helpers for connectors,
|
|
4
|
+
pipelines, jobs, and profiles.
|
|
5
|
+
|
|
6
|
+
- Provides classes and utilities for managing ETL pipeline configuration
|
|
7
|
+
- Supports YAML/JSON config loading and validation
|
|
8
|
+
- Includes helpers for connectors, jobs, pipelines, and profiles
|
|
9
|
+
- Exposes type definitions for config schemas
|
|
10
|
+
|
|
11
|
+
Back to project overview: see the top-level [README](../../README.md).
|
|
12
|
+
|
|
13
|
+
- [`etlplus.workflow` Subpackage](#etlplusworkflow-subpackage)
|
|
14
|
+
- [Supported Configuration Types](#supported-configuration-types)
|
|
15
|
+
- [Loading and Validating Configs](#loading-and-validating-configs)
|
|
16
|
+
- [Example: Loading a Pipeline Config](#example-loading-a-pipeline-config)
|
|
17
|
+
- [See Also](#see-also)
|
|
18
|
+
|
|
19
|
+
## Supported Configuration Types
|
|
20
|
+
|
|
21
|
+
- **Connector**: Connection details for databases, files, or APIs
|
|
22
|
+
- **Job**: ETL job definitions and scheduling
|
|
23
|
+
- **Pipeline**: End-to-end pipeline configuration
|
|
24
|
+
- **Profile**: User or environment-specific settings
|
|
25
|
+
|
|
26
|
+
## Loading and Validating Configs
|
|
27
|
+
|
|
28
|
+
Use the provided classes to load and validate configuration files:
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
from etlplus.workflow import PipelineConfig
|
|
32
|
+
|
|
33
|
+
cfg = PipelineConfig.from_yaml("pipeline.yml")
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
- Supports YAML and JSON formats
|
|
37
|
+
- Validates against expected schema
|
|
38
|
+
|
|
39
|
+
## Example: Loading a Pipeline Config
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
from etlplus.workflow import PipelineConfig
|
|
43
|
+
|
|
44
|
+
pipeline = PipelineConfig.from_yaml("configs/pipeline.yml")
|
|
45
|
+
print(pipeline)
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## See Also
|
|
49
|
+
|
|
50
|
+
- Top-level CLI and library usage in the main [README](../../README.md)
|
|
51
|
+
- Config type definitions in [types.py](types.py)
|
|
52
|
+
- Config utilities in [utils.py](utils.py)
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.workflow` package.
|
|
3
|
+
|
|
4
|
+
Job workflow helpers.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from .connector import Connector
|
|
10
|
+
from .connector import ConnectorApi
|
|
11
|
+
from .connector import ConnectorDb
|
|
12
|
+
from .connector import ConnectorFile
|
|
13
|
+
from .connector import parse_connector
|
|
14
|
+
from .dag import topological_sort_jobs
|
|
15
|
+
from .jobs import ExtractRef
|
|
16
|
+
from .jobs import JobConfig
|
|
17
|
+
from .jobs import LoadRef
|
|
18
|
+
from .jobs import TransformRef
|
|
19
|
+
from .jobs import ValidationRef
|
|
20
|
+
from .pipeline import PipelineConfig
|
|
21
|
+
from .pipeline import load_pipeline_config
|
|
22
|
+
|
|
23
|
+
# SECTION: EXPORTS ========================================================== #
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
# Data Classes
|
|
28
|
+
'ConnectorApi',
|
|
29
|
+
'ConnectorDb',
|
|
30
|
+
'ConnectorFile',
|
|
31
|
+
'ExtractRef',
|
|
32
|
+
'JobConfig',
|
|
33
|
+
'LoadRef',
|
|
34
|
+
'PipelineConfig',
|
|
35
|
+
'TransformRef',
|
|
36
|
+
'ValidationRef',
|
|
37
|
+
# Functions
|
|
38
|
+
'load_pipeline_config',
|
|
39
|
+
'parse_connector',
|
|
40
|
+
'topological_sort_jobs',
|
|
41
|
+
# Type Aliases
|
|
42
|
+
'Connector',
|
|
43
|
+
]
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
:mod:`etlplus.
|
|
2
|
+
:mod:`etlplus.workflow.connector` module.
|
|
3
3
|
|
|
4
4
|
A module defining configuration types for data source/target connectors in ETL
|
|
5
5
|
pipelines. A "connector" is any I/O endpoint:
|
|
@@ -11,18 +11,19 @@ pipelines. A "connector" is any I/O endpoint:
|
|
|
11
11
|
|
|
12
12
|
Examples
|
|
13
13
|
--------
|
|
14
|
-
- Use
|
|
15
|
-
|
|
16
|
-
- Use the
|
|
17
|
-
|
|
18
|
-
|
|
14
|
+
- Use :class:`ConnectorApi`/:class:`ConnectorFile`/:class:`ConnectorDb` when
|
|
15
|
+
you want the concrete dataclasses.
|
|
16
|
+
- Use the :class:`Connector` union for typing a value that can be any
|
|
17
|
+
connector.
|
|
18
|
+
- Use :func:`parse_connector(obj)` to construct a connector instance from a
|
|
19
|
+
generic mapping that includes a *type* key.
|
|
19
20
|
|
|
20
21
|
Notes
|
|
21
22
|
-----
|
|
22
23
|
- TypedDict shapes are editor hints; runtime parsing remains permissive
|
|
23
|
-
|
|
24
|
+
(from_obj accepts Mapping[str, Any]).
|
|
24
25
|
- TypedDicts referenced in :mod:`etlplus.config.types` remain editor hints.
|
|
25
|
-
|
|
26
|
+
Runtime parsing stays permissive and tolerant.
|
|
26
27
|
|
|
27
28
|
See Also
|
|
28
29
|
--------
|
|
@@ -59,7 +60,7 @@ if TYPE_CHECKING: # Editor-only typing hints to avoid runtime imports
|
|
|
59
60
|
|
|
60
61
|
|
|
61
62
|
__all__ = [
|
|
62
|
-
# Classes
|
|
63
|
+
# Data Classes
|
|
63
64
|
'ConnectorApi',
|
|
64
65
|
'ConnectorDb',
|
|
65
66
|
'ConnectorFile',
|
|
@@ -83,12 +84,12 @@ class ConnectorApi:
|
|
|
83
84
|
name : str
|
|
84
85
|
Unique connector name.
|
|
85
86
|
type : ConnectorType
|
|
86
|
-
Connector kind literal, always ``
|
|
87
|
+
Connector kind literal, always ``'api'``.
|
|
87
88
|
url : str | None
|
|
88
89
|
Direct absolute URL (when not using ``service``/``endpoint`` refs).
|
|
89
90
|
method : str | None
|
|
90
91
|
Optional HTTP method; typically omitted for sources (defaults to
|
|
91
|
-
GET) and used for targets (e.g., ``
|
|
92
|
+
GET) and used for targets (e.g., ``'post'``).
|
|
92
93
|
headers : dict[str, str]
|
|
93
94
|
Additional request headers.
|
|
94
95
|
query_params : dict[str, Any]
|
|
@@ -111,7 +112,7 @@ class ConnectorApi:
|
|
|
111
112
|
|
|
112
113
|
# Direct form
|
|
113
114
|
url: str | None = None
|
|
114
|
-
# Optional HTTP method; typically omitted for sources (defaults to GET
|
|
115
|
+
# Optional HTTP method; typically omitted for sources (defaults to GET)
|
|
115
116
|
# at runtime) and used for targets (e.g., 'post', 'put').
|
|
116
117
|
method: str | None = None
|
|
117
118
|
headers: dict[str, str] = field(default_factory=dict)
|
|
@@ -185,7 +186,7 @@ class ConnectorDb:
|
|
|
185
186
|
name : str
|
|
186
187
|
Unique connector name.
|
|
187
188
|
type : ConnectorType
|
|
188
|
-
Connector kind literal, always ``
|
|
189
|
+
Connector kind literal, always ``'database'``.
|
|
189
190
|
connection_string : str | None
|
|
190
191
|
Connection string/DSN for the database.
|
|
191
192
|
query : str | None
|
|
@@ -193,7 +194,7 @@ class ConnectorDb:
|
|
|
193
194
|
table : str | None
|
|
194
195
|
Target/source table name (optional).
|
|
195
196
|
mode : str | None
|
|
196
|
-
Load mode hint (e.g., ``
|
|
197
|
+
Load mode hint (e.g., ``'append'``, ``'replace'``) — future use.
|
|
197
198
|
"""
|
|
198
199
|
|
|
199
200
|
# -- Attributes -- #
|
|
@@ -262,9 +263,9 @@ class ConnectorFile:
|
|
|
262
263
|
name : str
|
|
263
264
|
Unique connector name.
|
|
264
265
|
type : ConnectorType
|
|
265
|
-
Connector kind literal, always ``
|
|
266
|
+
Connector kind literal, always ``'file'``.
|
|
266
267
|
format : str | None
|
|
267
|
-
File format (e.g., ``
|
|
268
|
+
File format (e.g., ``'json'``, ``'csv'``).
|
|
268
269
|
path : str | None
|
|
269
270
|
File path or URI.
|
|
270
271
|
options : dict[str, Any]
|
etlplus/workflow/dag.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.workflow.dag` module.
|
|
3
|
+
|
|
4
|
+
Lightweight directed acyclic graph (DAG) helpers for ordering jobs based on
|
|
5
|
+
:attr:`depends_on`.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from collections import deque
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
|
|
13
|
+
from .jobs import JobConfig
|
|
14
|
+
|
|
15
|
+
# SECTION: EXPORTS ========================================================== #
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
# Errors
|
|
20
|
+
'DagError',
|
|
21
|
+
# Functions
|
|
22
|
+
'topological_sort_jobs',
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# SECTION: ERRORS =========================================================== #
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass(slots=True)
|
|
30
|
+
class DagError(ValueError):
|
|
31
|
+
"""
|
|
32
|
+
Raised when the job dependency graph is invalid.
|
|
33
|
+
|
|
34
|
+
Attributes
|
|
35
|
+
----------
|
|
36
|
+
message : str
|
|
37
|
+
Error message.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
# -- Attributes -- #
|
|
41
|
+
|
|
42
|
+
message: str
|
|
43
|
+
|
|
44
|
+
# -- Magic Methods (Object Representation) -- #
|
|
45
|
+
|
|
46
|
+
def __str__(self) -> str:
|
|
47
|
+
return self.message
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def topological_sort_jobs(
|
|
54
|
+
jobs: list[JobConfig],
|
|
55
|
+
) -> list[JobConfig]:
|
|
56
|
+
"""
|
|
57
|
+
Return jobs in topological order based on :attr:`depends_on`.
|
|
58
|
+
|
|
59
|
+
Parameters
|
|
60
|
+
----------
|
|
61
|
+
jobs : list[JobConfig]
|
|
62
|
+
List of job configurations to sort.
|
|
63
|
+
|
|
64
|
+
Returns
|
|
65
|
+
-------
|
|
66
|
+
list[JobConfig]
|
|
67
|
+
Jobs sorted in topological order.
|
|
68
|
+
|
|
69
|
+
Raises
|
|
70
|
+
------
|
|
71
|
+
DagError
|
|
72
|
+
If a dependency is missing, self-referential, or when a cycle is
|
|
73
|
+
detected.
|
|
74
|
+
"""
|
|
75
|
+
index = {job.name: job for job in jobs}
|
|
76
|
+
edges: dict[str, set[str]] = {name: set() for name in index}
|
|
77
|
+
indegree: dict[str, int] = {name: 0 for name in index}
|
|
78
|
+
|
|
79
|
+
for job in jobs:
|
|
80
|
+
for dep in job.depends_on:
|
|
81
|
+
if dep not in index:
|
|
82
|
+
raise DagError(
|
|
83
|
+
f'Unknown dependency "{dep}" in job "{job.name}"',
|
|
84
|
+
)
|
|
85
|
+
if dep == job.name:
|
|
86
|
+
raise DagError(f'Job "{job.name}" depends on itself')
|
|
87
|
+
if job.name not in edges[dep]:
|
|
88
|
+
edges[dep].add(job.name)
|
|
89
|
+
indegree[job.name] += 1
|
|
90
|
+
|
|
91
|
+
queue = deque(sorted(name for name, deg in indegree.items() if deg == 0))
|
|
92
|
+
ordered: list[str] = []
|
|
93
|
+
|
|
94
|
+
while queue:
|
|
95
|
+
name = queue.popleft()
|
|
96
|
+
ordered.append(name)
|
|
97
|
+
for child in sorted(edges[name]):
|
|
98
|
+
indegree[child] -= 1
|
|
99
|
+
if indegree[child] == 0:
|
|
100
|
+
queue.append(child)
|
|
101
|
+
|
|
102
|
+
if len(ordered) != len(jobs):
|
|
103
|
+
raise DagError('Dependency cycle detected')
|
|
104
|
+
|
|
105
|
+
return [index[name] for name in ordered]
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
"""
|
|
2
|
-
:mod:`etlplus.
|
|
2
|
+
:mod:`etlplus.workflow.jobs` module.
|
|
3
3
|
|
|
4
4
|
Data classes modeling job orchestration references (extract, validate,
|
|
5
5
|
transform, load).
|
|
6
6
|
|
|
7
7
|
Notes
|
|
8
8
|
-----
|
|
9
|
-
- Lightweight references used inside
|
|
9
|
+
- Lightweight references used inside :class:`PipelineConfig` to avoid storing
|
|
10
10
|
large nested structures.
|
|
11
11
|
- All attributes are simple and optional where appropriate, keeping parsing
|
|
12
12
|
tolerant.
|
|
@@ -26,6 +26,7 @@ from ..utils import maybe_mapping
|
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
__all__ = [
|
|
29
|
+
# Data Classes
|
|
29
30
|
'ExtractRef',
|
|
30
31
|
'JobConfig',
|
|
31
32
|
'LoadRef',
|
|
@@ -34,10 +35,7 @@ __all__ = [
|
|
|
34
35
|
]
|
|
35
36
|
|
|
36
37
|
|
|
37
|
-
# SECTION:
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
# SECTION: CLASSES ========================================================== #
|
|
38
|
+
# SECTION: DATA CLASSES ===================================================== #
|
|
41
39
|
|
|
42
40
|
|
|
43
41
|
@dataclass(kw_only=True, slots=True)
|
|
@@ -65,12 +63,13 @@ class ExtractRef:
|
|
|
65
63
|
cls,
|
|
66
64
|
obj: Any,
|
|
67
65
|
) -> Self | None:
|
|
68
|
-
"""
|
|
66
|
+
"""
|
|
67
|
+
Parse a mapping into an :class:`ExtractRef` instance.
|
|
69
68
|
|
|
70
69
|
Parameters
|
|
71
70
|
----------
|
|
72
71
|
obj : Any
|
|
73
|
-
Mapping with
|
|
72
|
+
Mapping with :attr:`source` and optional :attr:`options`.
|
|
74
73
|
|
|
75
74
|
Returns
|
|
76
75
|
-------
|
|
@@ -100,6 +99,8 @@ class JobConfig:
|
|
|
100
99
|
Unique job name.
|
|
101
100
|
description : str | None
|
|
102
101
|
Optional human-friendly description.
|
|
102
|
+
depends_on : list[str]
|
|
103
|
+
Optional job dependency list. Dependencies must refer to other jobs.
|
|
103
104
|
extract : ExtractRef | None
|
|
104
105
|
Extraction reference.
|
|
105
106
|
validate : ValidationRef | None
|
|
@@ -114,6 +115,7 @@ class JobConfig:
|
|
|
114
115
|
|
|
115
116
|
name: str
|
|
116
117
|
description: str | None = None
|
|
118
|
+
depends_on: list[str] = field(default_factory=list)
|
|
117
119
|
extract: ExtractRef | None = None
|
|
118
120
|
validate: ValidationRef | None = None
|
|
119
121
|
transform: TransformRef | None = None
|
|
@@ -126,7 +128,8 @@ class JobConfig:
|
|
|
126
128
|
cls,
|
|
127
129
|
obj: Any,
|
|
128
130
|
) -> Self | None:
|
|
129
|
-
"""
|
|
131
|
+
"""
|
|
132
|
+
Parse a mapping into a :class:`JobConfig` instance.
|
|
130
133
|
|
|
131
134
|
Parameters
|
|
132
135
|
----------
|
|
@@ -149,9 +152,19 @@ class JobConfig:
|
|
|
149
152
|
if description is not None and not isinstance(description, str):
|
|
150
153
|
description = str(description)
|
|
151
154
|
|
|
155
|
+
depends_raw = data.get('depends_on')
|
|
156
|
+
depends_on: list[str] = []
|
|
157
|
+
if isinstance(depends_raw, str):
|
|
158
|
+
depends_on = [depends_raw]
|
|
159
|
+
elif isinstance(depends_raw, list):
|
|
160
|
+
for entry in depends_raw:
|
|
161
|
+
if isinstance(entry, str):
|
|
162
|
+
depends_on.append(entry)
|
|
163
|
+
|
|
152
164
|
return cls(
|
|
153
165
|
name=name,
|
|
154
166
|
description=description,
|
|
167
|
+
depends_on=depends_on,
|
|
155
168
|
extract=ExtractRef.from_obj(data.get('extract')),
|
|
156
169
|
validate=ValidationRef.from_obj(data.get('validate')),
|
|
157
170
|
transform=TransformRef.from_obj(data.get('transform')),
|
|
@@ -184,12 +197,13 @@ class LoadRef:
|
|
|
184
197
|
cls,
|
|
185
198
|
obj: Any,
|
|
186
199
|
) -> Self | None:
|
|
187
|
-
"""
|
|
200
|
+
"""
|
|
201
|
+
Parse a mapping into a :class:`LoadRef` instance.
|
|
188
202
|
|
|
189
203
|
Parameters
|
|
190
204
|
----------
|
|
191
205
|
obj : Any
|
|
192
|
-
Mapping with
|
|
206
|
+
Mapping with :attr:`target` and optional :attr:`overrides`.
|
|
193
207
|
|
|
194
208
|
Returns
|
|
195
209
|
-------
|
|
@@ -230,12 +244,13 @@ class TransformRef:
|
|
|
230
244
|
cls,
|
|
231
245
|
obj: Any,
|
|
232
246
|
) -> Self | None:
|
|
233
|
-
"""
|
|
247
|
+
"""
|
|
248
|
+
Parse a mapping into a :class:`TransformRef` instance.
|
|
234
249
|
|
|
235
250
|
Parameters
|
|
236
251
|
----------
|
|
237
252
|
obj : Any
|
|
238
|
-
Mapping with
|
|
253
|
+
Mapping with :attr:`pipeline`.
|
|
239
254
|
|
|
240
255
|
Returns
|
|
241
256
|
-------
|
|
@@ -280,12 +295,13 @@ class ValidationRef:
|
|
|
280
295
|
cls,
|
|
281
296
|
obj: Any,
|
|
282
297
|
) -> Self | None:
|
|
283
|
-
"""
|
|
298
|
+
"""
|
|
299
|
+
Parse a mapping into a :class:`ValidationRef` instance.
|
|
284
300
|
|
|
285
301
|
Parameters
|
|
286
302
|
----------
|
|
287
303
|
obj : Any
|
|
288
|
-
Mapping with
|
|
304
|
+
Mapping with :attr:`ruleset` plus optional metadata.
|
|
289
305
|
|
|
290
306
|
Returns
|
|
291
307
|
-------
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
:mod:`etlplus.
|
|
2
|
+
:mod:`etlplus.workflow.pipeline` module.
|
|
3
3
|
|
|
4
4
|
Pipeline configuration model and helpers for job orchestration.
|
|
5
5
|
|
|
@@ -38,7 +38,15 @@ from .utils import deep_substitute
|
|
|
38
38
|
# SECTION: EXPORTS ========================================================== #
|
|
39
39
|
|
|
40
40
|
|
|
41
|
-
__all__ = [
|
|
41
|
+
__all__ = [
|
|
42
|
+
# Data Classes
|
|
43
|
+
'PipelineConfig',
|
|
44
|
+
# Functions
|
|
45
|
+
'load_pipeline_config',
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
42
50
|
|
|
43
51
|
|
|
44
52
|
def _build_jobs(
|
|
@@ -156,7 +164,7 @@ def load_pipeline_config(
|
|
|
156
164
|
return PipelineConfig.from_yaml(path, substitute=substitute, env=env)
|
|
157
165
|
|
|
158
166
|
|
|
159
|
-
# SECTION: CLASSES
|
|
167
|
+
# SECTION: DATA CLASSES ===================================================== #
|
|
160
168
|
|
|
161
169
|
|
|
162
170
|
@dataclass(kw_only=True, slots=True)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
:mod:`etlplus.
|
|
2
|
+
:mod:`etlplus.workflow.profile` module.
|
|
3
3
|
|
|
4
4
|
Profile model for pipeline-level defaults and environment.
|
|
5
5
|
|
|
@@ -22,10 +22,13 @@ from ..utils import cast_str_dict
|
|
|
22
22
|
# SECTION: EXPORTS ========================================================== #
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
__all__ = [
|
|
25
|
+
__all__ = [
|
|
26
|
+
# Data Classes
|
|
27
|
+
'ProfileConfig',
|
|
28
|
+
]
|
|
26
29
|
|
|
27
30
|
|
|
28
|
-
# SECTION: CLASSES
|
|
31
|
+
# SECTION: DATA CLASSES ===================================================== #
|
|
29
32
|
|
|
30
33
|
|
|
31
34
|
@dataclass(kw_only=True, slots=True)
|
|
@@ -53,7 +56,7 @@ class ProfileConfig:
|
|
|
53
56
|
cls,
|
|
54
57
|
obj: StrAnyMap | None,
|
|
55
58
|
) -> Self:
|
|
56
|
-
"""Parse a mapping into a
|
|
59
|
+
"""Parse a mapping into a :class:`ProfileConfig` instance.
|
|
57
60
|
|
|
58
61
|
Parameters
|
|
59
62
|
----------
|
|
@@ -64,7 +67,7 @@ class ProfileConfig:
|
|
|
64
67
|
-------
|
|
65
68
|
Self
|
|
66
69
|
Parsed profile configuration; non-mapping input yields a default
|
|
67
|
-
instance. All
|
|
70
|
+
instance. All :attr:`env` values are coerced to strings.
|
|
68
71
|
"""
|
|
69
72
|
if not isinstance(obj, Mapping):
|
|
70
73
|
return cls()
|