etlplus 0.16.2__py3-none-any.whl → 0.16.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/README.md +22 -0
- etlplus/__init__.py +2 -0
- etlplus/cli/commands.py +22 -22
- etlplus/cli/handlers.py +8 -9
- etlplus/{workflow/pipeline.py → config.py} +17 -37
- etlplus/enums.py +35 -167
- etlplus/ops/__init__.py +1 -0
- etlplus/ops/enums.py +173 -0
- etlplus/ops/run.py +3 -3
- etlplus/ops/transform.py +16 -16
- etlplus/ops/types.py +147 -0
- etlplus/types.py +2 -100
- etlplus/workflow/README.md +0 -24
- etlplus/workflow/__init__.py +0 -4
- etlplus/workflow/jobs.py +0 -2
- {etlplus-0.16.2.dist-info → etlplus-0.16.4.dist-info}/METADATA +1 -1
- {etlplus-0.16.2.dist-info → etlplus-0.16.4.dist-info}/RECORD +21 -19
- {etlplus-0.16.2.dist-info → etlplus-0.16.4.dist-info}/WHEEL +0 -0
- {etlplus-0.16.2.dist-info → etlplus-0.16.4.dist-info}/entry_points.txt +0 -0
- {etlplus-0.16.2.dist-info → etlplus-0.16.4.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.16.2.dist-info → etlplus-0.16.4.dist-info}/top_level.txt +0 -0
etlplus/README.md
CHANGED
|
@@ -31,6 +31,28 @@ assert validate(filtered, {"age": {"type": "number", "min": 0}})["valid"]
|
|
|
31
31
|
load(filtered, "file", "output.json", file_format="json")
|
|
32
32
|
```
|
|
33
33
|
|
|
34
|
+
## Loading and Validating Configs
|
|
35
|
+
|
|
36
|
+
Use the provided classes to load and validate configuration files:
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
from etlplus.workflow import Config
|
|
40
|
+
|
|
41
|
+
cfg = Config.from_yaml("pipeline.yml")
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
- Supports YAML and JSON formats
|
|
45
|
+
- Validates against expected schema
|
|
46
|
+
|
|
47
|
+
## Example: Loading a Pipeline Config
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
from etlplus.workflow import Config
|
|
51
|
+
|
|
52
|
+
config = Config.from_yaml("configs/pipeline.yml")
|
|
53
|
+
print(config)
|
|
54
|
+
```
|
|
55
|
+
|
|
34
56
|
## See Also
|
|
35
57
|
|
|
36
58
|
- [Top-level project README](../README.md)
|
etlplus/__init__.py
CHANGED
|
@@ -5,6 +5,7 @@ Top-level facade for the ETLPlus toolkit.
|
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
from .__version__ import __version__
|
|
8
|
+
from .config import Config
|
|
8
9
|
|
|
9
10
|
__author__ = 'ETLPlus Team'
|
|
10
11
|
|
|
@@ -15,4 +16,5 @@ __author__ = 'ETLPlus Team'
|
|
|
15
16
|
__all__ = [
|
|
16
17
|
'__author__',
|
|
17
18
|
'__version__',
|
|
19
|
+
'Config',
|
|
18
20
|
]
|
etlplus/cli/commands.py
CHANGED
|
@@ -62,6 +62,16 @@ __all__ = ['app']
|
|
|
62
62
|
# SECTION: TYPE ALIASES ==================================================== #
|
|
63
63
|
|
|
64
64
|
|
|
65
|
+
ConfigOption = Annotated[
|
|
66
|
+
str,
|
|
67
|
+
typer.Option(
|
|
68
|
+
...,
|
|
69
|
+
'--config',
|
|
70
|
+
metavar='PATH',
|
|
71
|
+
help='Path to YAML-formatted configuration file.',
|
|
72
|
+
),
|
|
73
|
+
]
|
|
74
|
+
|
|
65
75
|
JobOption = Annotated[
|
|
66
76
|
str | None,
|
|
67
77
|
typer.Option(
|
|
@@ -97,16 +107,6 @@ OutputOption = Annotated[
|
|
|
97
107
|
),
|
|
98
108
|
]
|
|
99
109
|
|
|
100
|
-
PipelineConfigOption = Annotated[
|
|
101
|
-
str,
|
|
102
|
-
typer.Option(
|
|
103
|
-
...,
|
|
104
|
-
'--config',
|
|
105
|
-
metavar='PATH',
|
|
106
|
-
help='Path to pipeline YAML configuration file.',
|
|
107
|
-
),
|
|
108
|
-
]
|
|
109
|
-
|
|
110
110
|
PipelineOption = Annotated[
|
|
111
111
|
str | None,
|
|
112
112
|
typer.Option(
|
|
@@ -407,7 +407,7 @@ def _root(
|
|
|
407
407
|
@app.command('check')
|
|
408
408
|
def check_cmd(
|
|
409
409
|
ctx: typer.Context,
|
|
410
|
-
config:
|
|
410
|
+
config: ConfigOption,
|
|
411
411
|
jobs: JobsOption = False,
|
|
412
412
|
pipelines: PipelinesOption = False,
|
|
413
413
|
sources: SourcesOption = False,
|
|
@@ -422,20 +422,20 @@ def check_cmd(
|
|
|
422
422
|
----------
|
|
423
423
|
ctx : typer.Context
|
|
424
424
|
The Typer context.
|
|
425
|
-
config :
|
|
425
|
+
config : ConfigOption
|
|
426
426
|
Path to pipeline YAML configuration file.
|
|
427
|
-
jobs :
|
|
427
|
+
jobs : JobsOption, optional
|
|
428
428
|
List available job names and exit. Default is ``False``.
|
|
429
|
-
pipelines :
|
|
429
|
+
pipelines : PipelinesOption, optional
|
|
430
430
|
List ETL pipelines. Default is ``False``.
|
|
431
|
-
sources :
|
|
431
|
+
sources : SourcesOption, optional
|
|
432
432
|
List data sources. Default is ``False``.
|
|
433
|
-
summary :
|
|
433
|
+
summary : SummaryOption, optional
|
|
434
434
|
Show pipeline summary (name, version, sources, targets, jobs). Default
|
|
435
435
|
is ``False``.
|
|
436
|
-
targets :
|
|
436
|
+
targets : TargetsOption, optional
|
|
437
437
|
List data targets. Default is ``False``.
|
|
438
|
-
transforms :
|
|
438
|
+
transforms : TransformsOption, optional
|
|
439
439
|
List data transforms. Default is ``False``.
|
|
440
440
|
|
|
441
441
|
Returns
|
|
@@ -725,7 +725,7 @@ def render_cmd(
|
|
|
725
725
|
@app.command('run')
|
|
726
726
|
def run_cmd(
|
|
727
727
|
ctx: typer.Context,
|
|
728
|
-
config:
|
|
728
|
+
config: ConfigOption,
|
|
729
729
|
job: JobOption = None,
|
|
730
730
|
pipeline: PipelineOption = None,
|
|
731
731
|
) -> int:
|
|
@@ -736,11 +736,11 @@ def run_cmd(
|
|
|
736
736
|
----------
|
|
737
737
|
ctx : typer.Context
|
|
738
738
|
The Typer context.
|
|
739
|
-
config :
|
|
739
|
+
config : ConfigOption
|
|
740
740
|
Path to pipeline YAML configuration file.
|
|
741
|
-
job :
|
|
741
|
+
job : JobOption, optional
|
|
742
742
|
Name of the job to run. Default is ``None``.
|
|
743
|
-
pipeline :
|
|
743
|
+
pipeline : PipelineOption, optional
|
|
744
744
|
Name of the pipeline to run. Default is ``None``.
|
|
745
745
|
|
|
746
746
|
Returns
|
etlplus/cli/handlers.py
CHANGED
|
@@ -14,6 +14,7 @@ from typing import Any
|
|
|
14
14
|
from typing import Literal
|
|
15
15
|
from typing import cast
|
|
16
16
|
|
|
17
|
+
from .. import Config
|
|
17
18
|
from ..database import load_table_spec
|
|
18
19
|
from ..database import render_tables
|
|
19
20
|
from ..file import File
|
|
@@ -26,8 +27,6 @@ from ..ops import validate
|
|
|
26
27
|
from ..ops.validate import FieldRules
|
|
27
28
|
from ..types import JSONData
|
|
28
29
|
from ..types import TemplateKey
|
|
29
|
-
from ..workflow import PipelineConfig
|
|
30
|
-
from ..workflow import load_pipeline_config
|
|
31
30
|
from . import io as cli_io
|
|
32
31
|
|
|
33
32
|
# SECTION: EXPORTS ========================================================== #
|
|
@@ -73,14 +72,14 @@ def _collect_table_specs(
|
|
|
73
72
|
specs.append(dict(load_table_spec(Path(spec_path))))
|
|
74
73
|
|
|
75
74
|
if config_path:
|
|
76
|
-
cfg =
|
|
75
|
+
cfg = Config.from_yaml(config_path, substitute=True)
|
|
77
76
|
specs.extend(getattr(cfg, 'table_schemas', []))
|
|
78
77
|
|
|
79
78
|
return specs
|
|
80
79
|
|
|
81
80
|
|
|
82
81
|
def _check_sections(
|
|
83
|
-
cfg:
|
|
82
|
+
cfg: Config,
|
|
84
83
|
*,
|
|
85
84
|
jobs: bool,
|
|
86
85
|
pipelines: bool,
|
|
@@ -93,7 +92,7 @@ def _check_sections(
|
|
|
93
92
|
|
|
94
93
|
Parameters
|
|
95
94
|
----------
|
|
96
|
-
cfg :
|
|
95
|
+
cfg : Config
|
|
97
96
|
The loaded pipeline configuration.
|
|
98
97
|
jobs : bool
|
|
99
98
|
Whether to include job metadata.
|
|
@@ -133,14 +132,14 @@ def _check_sections(
|
|
|
133
132
|
|
|
134
133
|
|
|
135
134
|
def _pipeline_summary(
|
|
136
|
-
cfg:
|
|
135
|
+
cfg: Config,
|
|
137
136
|
) -> dict[str, Any]:
|
|
138
137
|
"""
|
|
139
138
|
Return a human-friendly snapshot of a pipeline config.
|
|
140
139
|
|
|
141
140
|
Parameters
|
|
142
141
|
----------
|
|
143
|
-
cfg :
|
|
142
|
+
cfg : Config
|
|
144
143
|
The loaded pipeline configuration.
|
|
145
144
|
|
|
146
145
|
Returns
|
|
@@ -229,7 +228,7 @@ def check_handler(
|
|
|
229
228
|
Zero on success.
|
|
230
229
|
|
|
231
230
|
"""
|
|
232
|
-
cfg =
|
|
231
|
+
cfg = Config.from_yaml(config, substitute=substitute)
|
|
233
232
|
if summary:
|
|
234
233
|
cli_io.emit_json(_pipeline_summary(cfg), pretty=True)
|
|
235
234
|
return 0
|
|
@@ -514,7 +513,7 @@ def run_handler(
|
|
|
514
513
|
int
|
|
515
514
|
Zero on success.
|
|
516
515
|
"""
|
|
517
|
-
cfg =
|
|
516
|
+
cfg = Config.from_yaml(config, substitute=True)
|
|
518
517
|
|
|
519
518
|
job_name = job or pipeline
|
|
520
519
|
if job_name:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
|
-
:mod:`etlplus.
|
|
2
|
+
:mod:`etlplus.config` module.
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
Configuration model and helpers for job pipeline orchestration.
|
|
5
5
|
|
|
6
6
|
Notes
|
|
7
7
|
-----
|
|
@@ -24,26 +24,24 @@ from pathlib import Path
|
|
|
24
24
|
from typing import Any
|
|
25
25
|
from typing import Self
|
|
26
26
|
|
|
27
|
-
from
|
|
28
|
-
from
|
|
29
|
-
from
|
|
30
|
-
from
|
|
31
|
-
from
|
|
32
|
-
from
|
|
33
|
-
from
|
|
34
|
-
from
|
|
35
|
-
from
|
|
36
|
-
from .jobs import JobConfig
|
|
37
|
-
from .profile import ProfileConfig
|
|
27
|
+
from .api import ApiConfig
|
|
28
|
+
from .connector import Connector
|
|
29
|
+
from .connector import parse_connector
|
|
30
|
+
from .file import File
|
|
31
|
+
from .file import FileFormat
|
|
32
|
+
from .types import StrAnyMap
|
|
33
|
+
from .utils import coerce_dict
|
|
34
|
+
from .utils import deep_substitute
|
|
35
|
+
from .utils import maybe_mapping
|
|
36
|
+
from .workflow.jobs import JobConfig
|
|
37
|
+
from .workflow.profile import ProfileConfig
|
|
38
38
|
|
|
39
39
|
# SECTION: EXPORTS ========================================================== #
|
|
40
40
|
|
|
41
41
|
|
|
42
42
|
__all__ = [
|
|
43
43
|
# Data Classes
|
|
44
|
-
'
|
|
45
|
-
# Functions
|
|
46
|
-
'load_pipeline_config',
|
|
44
|
+
'Config',
|
|
47
45
|
]
|
|
48
46
|
|
|
49
47
|
|
|
@@ -126,29 +124,11 @@ def _parse_connector_entry(
|
|
|
126
124
|
return None
|
|
127
125
|
|
|
128
126
|
|
|
129
|
-
# SECTION: FUNCTIONS ======================================================== #
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
def load_pipeline_config(
|
|
133
|
-
path: Path | str,
|
|
134
|
-
*,
|
|
135
|
-
substitute: bool = False,
|
|
136
|
-
env: Mapping[str, str] | None = None,
|
|
137
|
-
) -> PipelineConfig:
|
|
138
|
-
"""
|
|
139
|
-
Load a pipeline YAML file into a ``PipelineConfig`` instance.
|
|
140
|
-
|
|
141
|
-
Delegates to ``PipelineConfig.from_yaml`` for construction and optional
|
|
142
|
-
variable substitution.
|
|
143
|
-
"""
|
|
144
|
-
return PipelineConfig.from_yaml(path, substitute=substitute, env=env)
|
|
145
|
-
|
|
146
|
-
|
|
147
127
|
# SECTION: DATA CLASSES ===================================================== #
|
|
148
128
|
|
|
149
129
|
|
|
150
130
|
@dataclass(kw_only=True, slots=True)
|
|
151
|
-
class
|
|
131
|
+
class Config:
|
|
152
132
|
"""
|
|
153
133
|
Configuration for the data processing pipeline.
|
|
154
134
|
|
|
@@ -211,7 +191,7 @@ class PipelineConfig:
|
|
|
211
191
|
env: Mapping[str, str] | None = None,
|
|
212
192
|
) -> Self:
|
|
213
193
|
"""
|
|
214
|
-
Parse a YAML file into a ``
|
|
194
|
+
Parse a YAML file into a ``Config`` instance.
|
|
215
195
|
|
|
216
196
|
Parameters
|
|
217
197
|
----------
|
|
@@ -259,7 +239,7 @@ class PipelineConfig:
|
|
|
259
239
|
raw: StrAnyMap,
|
|
260
240
|
) -> Self:
|
|
261
241
|
"""
|
|
262
|
-
Parse a mapping into a ``
|
|
242
|
+
Parse a mapping into a ``Config`` instance.
|
|
263
243
|
|
|
264
244
|
Parameters
|
|
265
245
|
----------
|
etlplus/enums.py
CHANGED
|
@@ -1,18 +1,14 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.enums` module.
|
|
3
3
|
|
|
4
|
-
Shared enumeration
|
|
4
|
+
Shared enumeration base class.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
9
|
import enum
|
|
10
|
-
import operator as _op
|
|
11
|
-
from statistics import fmean
|
|
12
10
|
from typing import Self
|
|
13
11
|
|
|
14
|
-
from .types import AggregateFunc
|
|
15
|
-
from .types import OperatorFunc
|
|
16
12
|
from .types import StrStrMap
|
|
17
13
|
|
|
18
14
|
# SECTION: EXPORTS ========================================================== #
|
|
@@ -20,10 +16,7 @@ from .types import StrStrMap
|
|
|
20
16
|
|
|
21
17
|
__all__ = [
|
|
22
18
|
# Enums
|
|
23
|
-
'AggregateName',
|
|
24
19
|
'CoercibleStrEnum',
|
|
25
|
-
'OperatorName',
|
|
26
|
-
'PipelineStep',
|
|
27
20
|
]
|
|
28
21
|
|
|
29
22
|
|
|
@@ -41,6 +34,7 @@ class CoercibleStrEnum(enum.StrEnum):
|
|
|
41
34
|
Notes
|
|
42
35
|
-----
|
|
43
36
|
- Values are normalized via ``str(value).strip().casefold()``.
|
|
37
|
+
- If value matching fails, the raw string is tried as a member name.
|
|
44
38
|
- Error messages enumerate allowed values for easier debugging.
|
|
45
39
|
"""
|
|
46
40
|
|
|
@@ -56,7 +50,13 @@ class CoercibleStrEnum(enum.StrEnum):
|
|
|
56
50
|
Returns
|
|
57
51
|
-------
|
|
58
52
|
StrStrMap
|
|
59
|
-
A mapping of alias
|
|
53
|
+
A mapping of alias strings to their corresponding enum member
|
|
54
|
+
values or names.
|
|
55
|
+
|
|
56
|
+
Notes
|
|
57
|
+
-----
|
|
58
|
+
- Alias keys are normalized via ``str(key).strip().casefold()``.
|
|
59
|
+
- Alias values should be member values or member names.
|
|
60
60
|
"""
|
|
61
61
|
return {}
|
|
62
62
|
|
|
@@ -80,7 +80,7 @@ class CoercibleStrEnum(enum.StrEnum):
|
|
|
80
80
|
Parameters
|
|
81
81
|
----------
|
|
82
82
|
value : Self | str | object
|
|
83
|
-
An existing enum member or a
|
|
83
|
+
An existing enum member or a string-like value to normalize.
|
|
84
84
|
|
|
85
85
|
Returns
|
|
86
86
|
-------
|
|
@@ -95,10 +95,26 @@ class CoercibleStrEnum(enum.StrEnum):
|
|
|
95
95
|
if isinstance(value, cls):
|
|
96
96
|
return value
|
|
97
97
|
try:
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
98
|
+
raw = str(value).strip()
|
|
99
|
+
normalized = raw.casefold()
|
|
100
|
+
aliases = {
|
|
101
|
+
str(key).strip().casefold(): alias
|
|
102
|
+
for key, alias in cls.aliases().items()
|
|
103
|
+
}
|
|
104
|
+
resolved = aliases.get(normalized)
|
|
105
|
+
if resolved is None:
|
|
106
|
+
try:
|
|
107
|
+
return cls(normalized) # type: ignore[arg-type]
|
|
108
|
+
except (ValueError, TypeError):
|
|
109
|
+
return cls[raw] # type: ignore[index]
|
|
110
|
+
if isinstance(resolved, cls):
|
|
111
|
+
return resolved
|
|
112
|
+
try:
|
|
113
|
+
return cls(resolved) # type: ignore[arg-type]
|
|
114
|
+
except (ValueError, TypeError):
|
|
115
|
+
# Allow aliases to reference member names.
|
|
116
|
+
return cls[resolved] # type: ignore[index]
|
|
117
|
+
except (ValueError, TypeError, KeyError) as e:
|
|
102
118
|
allowed = ', '.join(cls.choices())
|
|
103
119
|
raise ValueError(
|
|
104
120
|
f'Invalid {cls.__name__} value: {value!r}. Allowed: {allowed}',
|
|
@@ -107,15 +123,15 @@ class CoercibleStrEnum(enum.StrEnum):
|
|
|
107
123
|
@classmethod
|
|
108
124
|
def try_coerce(
|
|
109
125
|
cls,
|
|
110
|
-
value: object,
|
|
126
|
+
value: Self | str | object,
|
|
111
127
|
) -> Self | None:
|
|
112
128
|
"""
|
|
113
|
-
|
|
129
|
+
Attempt to coerce a value into the enum; return ``None`` on failure.
|
|
114
130
|
|
|
115
131
|
Parameters
|
|
116
132
|
----------
|
|
117
|
-
value : object
|
|
118
|
-
An existing enum member or a
|
|
133
|
+
value : Self | str | object
|
|
134
|
+
An existing enum member or a string-like value to normalize.
|
|
119
135
|
|
|
120
136
|
Returns
|
|
121
137
|
-------
|
|
@@ -124,153 +140,5 @@ class CoercibleStrEnum(enum.StrEnum):
|
|
|
124
140
|
"""
|
|
125
141
|
try:
|
|
126
142
|
return cls.coerce(value)
|
|
127
|
-
except ValueError:
|
|
143
|
+
except (ValueError, TypeError, KeyError):
|
|
128
144
|
return None
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
# SECTION: ENUMS ============================================================ #
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
class AggregateName(CoercibleStrEnum):
|
|
135
|
-
"""Supported aggregations with helpers."""
|
|
136
|
-
|
|
137
|
-
# -- Constants -- #
|
|
138
|
-
|
|
139
|
-
AVG = 'avg'
|
|
140
|
-
COUNT = 'count'
|
|
141
|
-
MAX = 'max'
|
|
142
|
-
MIN = 'min'
|
|
143
|
-
SUM = 'sum'
|
|
144
|
-
|
|
145
|
-
# -- Class Methods -- #
|
|
146
|
-
|
|
147
|
-
@property
|
|
148
|
-
def func(self) -> AggregateFunc:
|
|
149
|
-
"""
|
|
150
|
-
Get the aggregation function for this aggregation type.
|
|
151
|
-
|
|
152
|
-
Returns
|
|
153
|
-
-------
|
|
154
|
-
AggregateFunc
|
|
155
|
-
The aggregation function corresponding to this aggregation type.
|
|
156
|
-
"""
|
|
157
|
-
if self is AggregateName.COUNT:
|
|
158
|
-
return lambda xs, n: n
|
|
159
|
-
if self is AggregateName.MAX:
|
|
160
|
-
return lambda xs, n: (max(xs) if xs else None)
|
|
161
|
-
if self is AggregateName.MIN:
|
|
162
|
-
return lambda xs, n: (min(xs) if xs else None)
|
|
163
|
-
if self is AggregateName.SUM:
|
|
164
|
-
return lambda xs, n: sum(xs)
|
|
165
|
-
|
|
166
|
-
# AVG
|
|
167
|
-
return lambda xs, n: (fmean(xs) if xs else 0.0)
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
class OperatorName(CoercibleStrEnum):
|
|
171
|
-
"""Supported comparison operators with helpers."""
|
|
172
|
-
|
|
173
|
-
# -- Constants -- #
|
|
174
|
-
|
|
175
|
-
EQ = 'eq'
|
|
176
|
-
NE = 'ne'
|
|
177
|
-
GT = 'gt'
|
|
178
|
-
GTE = 'gte'
|
|
179
|
-
LT = 'lt'
|
|
180
|
-
LTE = 'lte'
|
|
181
|
-
IN = 'in'
|
|
182
|
-
CONTAINS = 'contains'
|
|
183
|
-
|
|
184
|
-
# -- Getters -- #
|
|
185
|
-
|
|
186
|
-
@property
|
|
187
|
-
def func(self) -> OperatorFunc:
|
|
188
|
-
"""
|
|
189
|
-
Get the comparison function for this operator.
|
|
190
|
-
|
|
191
|
-
Returns
|
|
192
|
-
-------
|
|
193
|
-
OperatorFunc
|
|
194
|
-
The comparison function corresponding to this operator.
|
|
195
|
-
"""
|
|
196
|
-
match self:
|
|
197
|
-
case OperatorName.EQ:
|
|
198
|
-
return _op.eq
|
|
199
|
-
case OperatorName.NE:
|
|
200
|
-
return _op.ne
|
|
201
|
-
case OperatorName.GT:
|
|
202
|
-
return _op.gt
|
|
203
|
-
case OperatorName.GTE:
|
|
204
|
-
return _op.ge
|
|
205
|
-
case OperatorName.LT:
|
|
206
|
-
return _op.lt
|
|
207
|
-
case OperatorName.LTE:
|
|
208
|
-
return _op.le
|
|
209
|
-
case OperatorName.IN:
|
|
210
|
-
return lambda a, b: a in b
|
|
211
|
-
case OperatorName.CONTAINS:
|
|
212
|
-
return lambda a, b: b in a
|
|
213
|
-
|
|
214
|
-
# -- Class Methods -- #
|
|
215
|
-
|
|
216
|
-
@classmethod
|
|
217
|
-
def aliases(cls) -> StrStrMap:
|
|
218
|
-
"""
|
|
219
|
-
Return a mapping of common aliases for each enum member.
|
|
220
|
-
|
|
221
|
-
Returns
|
|
222
|
-
-------
|
|
223
|
-
StrStrMap
|
|
224
|
-
A mapping of alias names to their corresponding enum member names.
|
|
225
|
-
"""
|
|
226
|
-
return {
|
|
227
|
-
'==': 'eq',
|
|
228
|
-
'=': 'eq',
|
|
229
|
-
'!=': 'ne',
|
|
230
|
-
'<>': 'ne',
|
|
231
|
-
'>=': 'gte',
|
|
232
|
-
'≥': 'gte',
|
|
233
|
-
'<=': 'lte',
|
|
234
|
-
'≤': 'lte',
|
|
235
|
-
'>': 'gt',
|
|
236
|
-
'<': 'lt',
|
|
237
|
-
}
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
class PipelineStep(CoercibleStrEnum):
|
|
241
|
-
"""Pipeline step names as an enum for internal orchestration."""
|
|
242
|
-
|
|
243
|
-
# -- Constants -- #
|
|
244
|
-
|
|
245
|
-
FILTER = 'filter'
|
|
246
|
-
MAP = 'map'
|
|
247
|
-
SELECT = 'select'
|
|
248
|
-
SORT = 'sort'
|
|
249
|
-
AGGREGATE = 'aggregate'
|
|
250
|
-
|
|
251
|
-
# -- Getters -- #
|
|
252
|
-
|
|
253
|
-
@property
|
|
254
|
-
def order(self) -> int:
|
|
255
|
-
"""
|
|
256
|
-
Get the execution order of this pipeline step.
|
|
257
|
-
|
|
258
|
-
Returns
|
|
259
|
-
-------
|
|
260
|
-
int
|
|
261
|
-
The execution order of this pipeline step.
|
|
262
|
-
"""
|
|
263
|
-
return _PIPELINE_ORDER_INDEX[self]
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
# SECTION: INTERNAL CONSTANTS ============================================== #
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
# Precomputed order index for PipelineStep; avoids recomputing on each access.
|
|
270
|
-
_PIPELINE_ORDER_INDEX: dict[PipelineStep, int] = {
|
|
271
|
-
PipelineStep.FILTER: 0,
|
|
272
|
-
PipelineStep.MAP: 1,
|
|
273
|
-
PipelineStep.SELECT: 2,
|
|
274
|
-
PipelineStep.SORT: 3,
|
|
275
|
-
PipelineStep.AGGREGATE: 4,
|
|
276
|
-
}
|
etlplus/ops/__init__.py
CHANGED
etlplus/ops/enums.py
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.ops.enums` module.
|
|
3
|
+
|
|
4
|
+
Operation-specific enums and helpers.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import operator as _op
|
|
10
|
+
from statistics import fmean
|
|
11
|
+
|
|
12
|
+
from ..enums import CoercibleStrEnum
|
|
13
|
+
from ..types import StrStrMap
|
|
14
|
+
from .types import AggregateFunc
|
|
15
|
+
from .types import OperatorFunc
|
|
16
|
+
|
|
17
|
+
# SECTION: EXPORTS ========================================================= #
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
# Enums
|
|
22
|
+
'AggregateName',
|
|
23
|
+
'OperatorName',
|
|
24
|
+
'PipelineStep',
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# SECTION: ENUMS ============================================================ #
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class AggregateName(CoercibleStrEnum):
|
|
32
|
+
"""Supported aggregations with helpers."""
|
|
33
|
+
|
|
34
|
+
# -- Constants -- #
|
|
35
|
+
|
|
36
|
+
AVG = 'avg'
|
|
37
|
+
COUNT = 'count'
|
|
38
|
+
MAX = 'max'
|
|
39
|
+
MIN = 'min'
|
|
40
|
+
SUM = 'sum'
|
|
41
|
+
|
|
42
|
+
# -- Class Methods -- #
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def func(self) -> AggregateFunc:
|
|
46
|
+
"""
|
|
47
|
+
Get the aggregation function for this aggregation type.
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
AggregateFunc
|
|
52
|
+
The aggregation function corresponding to this aggregation type.
|
|
53
|
+
"""
|
|
54
|
+
if self is AggregateName.COUNT:
|
|
55
|
+
return lambda xs, n: n
|
|
56
|
+
if self is AggregateName.MAX:
|
|
57
|
+
return lambda xs, n: (max(xs) if xs else None)
|
|
58
|
+
if self is AggregateName.MIN:
|
|
59
|
+
return lambda xs, n: (min(xs) if xs else None)
|
|
60
|
+
if self is AggregateName.SUM:
|
|
61
|
+
return lambda xs, n: sum(xs)
|
|
62
|
+
|
|
63
|
+
# AVG
|
|
64
|
+
return lambda xs, n: (fmean(xs) if xs else 0.0)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class OperatorName(CoercibleStrEnum):
|
|
68
|
+
"""Supported comparison operators with helpers."""
|
|
69
|
+
|
|
70
|
+
# -- Constants -- #
|
|
71
|
+
|
|
72
|
+
EQ = 'eq'
|
|
73
|
+
NE = 'ne'
|
|
74
|
+
GT = 'gt'
|
|
75
|
+
GTE = 'gte'
|
|
76
|
+
LT = 'lt'
|
|
77
|
+
LTE = 'lte'
|
|
78
|
+
IN = 'in'
|
|
79
|
+
CONTAINS = 'contains'
|
|
80
|
+
|
|
81
|
+
# -- Getters -- #
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def func(self) -> OperatorFunc:
|
|
85
|
+
"""
|
|
86
|
+
Get the comparison function for this operator.
|
|
87
|
+
|
|
88
|
+
Returns
|
|
89
|
+
-------
|
|
90
|
+
OperatorFunc
|
|
91
|
+
The comparison function corresponding to this operator.
|
|
92
|
+
"""
|
|
93
|
+
match self:
|
|
94
|
+
case OperatorName.EQ:
|
|
95
|
+
return _op.eq
|
|
96
|
+
case OperatorName.NE:
|
|
97
|
+
return _op.ne
|
|
98
|
+
case OperatorName.GT:
|
|
99
|
+
return _op.gt
|
|
100
|
+
case OperatorName.GTE:
|
|
101
|
+
return _op.ge
|
|
102
|
+
case OperatorName.LT:
|
|
103
|
+
return _op.lt
|
|
104
|
+
case OperatorName.LTE:
|
|
105
|
+
return _op.le
|
|
106
|
+
case OperatorName.IN:
|
|
107
|
+
return lambda a, b: a in b
|
|
108
|
+
case OperatorName.CONTAINS:
|
|
109
|
+
return lambda a, b: b in a
|
|
110
|
+
|
|
111
|
+
# -- Class Methods -- #
|
|
112
|
+
|
|
113
|
+
@classmethod
|
|
114
|
+
def aliases(cls) -> StrStrMap:
|
|
115
|
+
"""
|
|
116
|
+
Return a mapping of common aliases for each enum member.
|
|
117
|
+
|
|
118
|
+
Returns
|
|
119
|
+
-------
|
|
120
|
+
StrStrMap
|
|
121
|
+
A mapping of alias names to their corresponding enum member names.
|
|
122
|
+
"""
|
|
123
|
+
return {
|
|
124
|
+
'==': 'eq',
|
|
125
|
+
'=': 'eq',
|
|
126
|
+
'!=': 'ne',
|
|
127
|
+
'<>': 'ne',
|
|
128
|
+
'>=': 'gte',
|
|
129
|
+
'≥': 'gte',
|
|
130
|
+
'<=': 'lte',
|
|
131
|
+
'≤': 'lte',
|
|
132
|
+
'>': 'gt',
|
|
133
|
+
'<': 'lt',
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class PipelineStep(CoercibleStrEnum):
|
|
138
|
+
"""Pipeline step names as an enum for internal orchestration."""
|
|
139
|
+
|
|
140
|
+
# -- Constants -- #
|
|
141
|
+
|
|
142
|
+
FILTER = 'filter'
|
|
143
|
+
MAP = 'map'
|
|
144
|
+
SELECT = 'select'
|
|
145
|
+
SORT = 'sort'
|
|
146
|
+
AGGREGATE = 'aggregate'
|
|
147
|
+
|
|
148
|
+
# -- Getters -- #
|
|
149
|
+
|
|
150
|
+
@property
|
|
151
|
+
def order(self) -> int:
|
|
152
|
+
"""
|
|
153
|
+
Get the execution order of this pipeline step.
|
|
154
|
+
|
|
155
|
+
Returns
|
|
156
|
+
-------
|
|
157
|
+
int
|
|
158
|
+
The execution order of this pipeline step.
|
|
159
|
+
"""
|
|
160
|
+
return _PIPELINE_ORDER_INDEX[self]
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
# SECTION: INTERNAL CONSTANTS ============================================== #
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
# Precomputed order index for PipelineStep; avoids recomputing on each access.
|
|
167
|
+
_PIPELINE_ORDER_INDEX: dict[PipelineStep, int] = {
|
|
168
|
+
PipelineStep.FILTER: 0,
|
|
169
|
+
PipelineStep.MAP: 1,
|
|
170
|
+
PipelineStep.SELECT: 2,
|
|
171
|
+
PipelineStep.SORT: 3,
|
|
172
|
+
PipelineStep.AGGREGATE: 4,
|
|
173
|
+
}
|
etlplus/ops/run.py
CHANGED
|
@@ -11,14 +11,14 @@ from typing import Final
|
|
|
11
11
|
from typing import cast
|
|
12
12
|
|
|
13
13
|
from ..api import HttpMethod
|
|
14
|
+
from ..config import Config
|
|
14
15
|
from ..connector import DataConnectorType
|
|
15
16
|
from ..file import FileFormat
|
|
17
|
+
from ..ops.types import PipelineConfig
|
|
16
18
|
from ..types import JSONData
|
|
17
19
|
from ..types import JSONDict
|
|
18
|
-
from ..types import PipelineConfig
|
|
19
20
|
from ..types import StrPath
|
|
20
21
|
from ..utils import print_json
|
|
21
|
-
from ..workflow import load_pipeline_config
|
|
22
22
|
from .extract import extract
|
|
23
23
|
from .extract import extract_from_api_source
|
|
24
24
|
from .load import load
|
|
@@ -176,7 +176,7 @@ def run(
|
|
|
176
176
|
If the job is not found or if there are configuration issues.
|
|
177
177
|
"""
|
|
178
178
|
cfg_path = config_path or DEFAULT_CONFIG_PATH
|
|
179
|
-
cfg =
|
|
179
|
+
cfg = Config.from_yaml(cfg_path, substitute=True)
|
|
180
180
|
|
|
181
181
|
# Lookup job by name
|
|
182
182
|
if not (job_obj := next((j for j in cfg.jobs if j.name == job), None)):
|
etlplus/ops/transform.py
CHANGED
|
@@ -44,28 +44,28 @@ from collections.abc import Sequence
|
|
|
44
44
|
from typing import Any
|
|
45
45
|
from typing import cast
|
|
46
46
|
|
|
47
|
-
from ..
|
|
48
|
-
from ..enums import OperatorName
|
|
49
|
-
from ..enums import PipelineStep
|
|
50
|
-
from ..types import AggregateFunc
|
|
51
|
-
from ..types import AggregateSpec
|
|
52
|
-
from ..types import FieldName
|
|
53
|
-
from ..types import Fields
|
|
54
|
-
from ..types import FilterSpec
|
|
47
|
+
from ..ops.types import PipelineConfig
|
|
55
48
|
from ..types import JSONData
|
|
56
49
|
from ..types import JSONDict
|
|
57
50
|
from ..types import JSONList
|
|
58
|
-
from ..types import MapSpec
|
|
59
|
-
from ..types import OperatorFunc
|
|
60
|
-
from ..types import PipelineConfig
|
|
61
|
-
from ..types import PipelineStepName
|
|
62
|
-
from ..types import SortKey
|
|
63
|
-
from ..types import StepApplier
|
|
64
|
-
from ..types import StepOrSteps
|
|
65
|
-
from ..types import StepSpec
|
|
66
51
|
from ..types import StrPath
|
|
67
52
|
from ..utils import to_number
|
|
53
|
+
from .enums import AggregateName
|
|
54
|
+
from .enums import OperatorName
|
|
55
|
+
from .enums import PipelineStep
|
|
68
56
|
from .load import load_data
|
|
57
|
+
from .types import AggregateFunc
|
|
58
|
+
from .types import AggregateSpec
|
|
59
|
+
from .types import FieldName
|
|
60
|
+
from .types import Fields
|
|
61
|
+
from .types import FilterSpec
|
|
62
|
+
from .types import MapSpec
|
|
63
|
+
from .types import OperatorFunc
|
|
64
|
+
from .types import PipelineStepName
|
|
65
|
+
from .types import SortKey
|
|
66
|
+
from .types import StepApplier
|
|
67
|
+
from .types import StepOrSteps
|
|
68
|
+
from .types import StepSpec
|
|
69
69
|
|
|
70
70
|
# SECTION: EXPORTS ========================================================== #
|
|
71
71
|
|
etlplus/ops/types.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.ops.types` module.
|
|
3
|
+
|
|
4
|
+
Shared type aliases leveraged across :mod:`etlplus.ops` modules.
|
|
5
|
+
|
|
6
|
+
Notes
|
|
7
|
+
-----
|
|
8
|
+
- Centralizes ops-focused aliases (functions, specs, and pipeline helpers).
|
|
9
|
+
- Relies on Python 3.13 ``type`` statements for readability and IDE support.
|
|
10
|
+
|
|
11
|
+
Examples
|
|
12
|
+
--------
|
|
13
|
+
>>> from etlplus.ops.types import AggregateFunc, OperatorFunc
|
|
14
|
+
>>> def total(xs: list[float], _: int) -> float:
|
|
15
|
+
... return sum(xs)
|
|
16
|
+
>>> agg: AggregateFunc = total
|
|
17
|
+
>>> op: OperatorFunc = lambda a, b: a == b
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
from collections.abc import Callable
|
|
23
|
+
from collections.abc import Mapping
|
|
24
|
+
from collections.abc import Sequence
|
|
25
|
+
from typing import Any
|
|
26
|
+
from typing import Literal
|
|
27
|
+
|
|
28
|
+
from ..types import JSONList
|
|
29
|
+
from ..types import StrAnyMap
|
|
30
|
+
from ..types import StrSeqMap
|
|
31
|
+
from ..types import StrStrMap
|
|
32
|
+
|
|
33
|
+
# SECTION: EXPORTS ========================================================== #
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
__all__ = [
|
|
37
|
+
# Type Aliases (Functions)
|
|
38
|
+
'AggregateFunc',
|
|
39
|
+
'OperatorFunc',
|
|
40
|
+
# Type Aliases (Records & Fields)
|
|
41
|
+
'FieldName',
|
|
42
|
+
'Fields',
|
|
43
|
+
# Type Aliases (Transform Specs)
|
|
44
|
+
'AggregateSpec',
|
|
45
|
+
'FilterSpec',
|
|
46
|
+
'MapSpec',
|
|
47
|
+
'SelectSpec',
|
|
48
|
+
'SortSpec',
|
|
49
|
+
# Type Aliases (Pipelines)
|
|
50
|
+
'StepOrSteps',
|
|
51
|
+
'StepSeq',
|
|
52
|
+
'StepSpec',
|
|
53
|
+
'PipelineConfig',
|
|
54
|
+
'PipelineStepName',
|
|
55
|
+
# Type Aliases (Helpers)
|
|
56
|
+
'StepApplier',
|
|
57
|
+
'SortKey',
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# SECTION: TYPE ALIASES ===================================================== #
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# -- Functions -- #
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# TODO: Consider redefining to use `functools.reduce` signature.
|
|
68
|
+
# TODO: Consider adding `**kwargs` to support richer aggregation functions.
|
|
69
|
+
# TODO: Consider constraining first argument to `Sequence[float]`.
|
|
70
|
+
# TODO: Consider constraining return type to `float | int | None`.
|
|
71
|
+
# Callable reducing numeric collections into a summary value.
|
|
72
|
+
type AggregateFunc = Callable[[list[float], int], Any]
|
|
73
|
+
|
|
74
|
+
# Binary predicate consumed by filter operations.
|
|
75
|
+
type OperatorFunc = Callable[[Any, Any], bool]
|
|
76
|
+
|
|
77
|
+
# -- Records & Fields -- #
|
|
78
|
+
|
|
79
|
+
# Individual field identifier referenced inside specs.
|
|
80
|
+
type FieldName = str
|
|
81
|
+
|
|
82
|
+
# Ordered list of :data:`FieldName` entries preserving projection order.
|
|
83
|
+
type Fields = list[FieldName]
|
|
84
|
+
|
|
85
|
+
# -- Transform Specs -- #
|
|
86
|
+
|
|
87
|
+
# Filtering spec expecting ``field``, ``op``, and ``value`` keys.
|
|
88
|
+
type FilterSpec = StrAnyMap
|
|
89
|
+
|
|
90
|
+
# Field renaming instructions mapping old keys to new ones.
|
|
91
|
+
type MapSpec = StrStrMap
|
|
92
|
+
|
|
93
|
+
# Projection spec as a field list or mapping with metadata.
|
|
94
|
+
#
|
|
95
|
+
# Examples
|
|
96
|
+
# --------
|
|
97
|
+
# >>> from etlplus.ops.types import SelectSpec
|
|
98
|
+
# >>> spec1: SelectSpec = ['a','b']
|
|
99
|
+
# >>> spec2: SelectSpec = {'fields': [...]}
|
|
100
|
+
type SelectSpec = Fields | StrSeqMap
|
|
101
|
+
|
|
102
|
+
# Sort directive expressed as a field string or mapping with flags.
|
|
103
|
+
#
|
|
104
|
+
# Examples
|
|
105
|
+
# --------
|
|
106
|
+
# >>> from etlplus.ops.types import SortSpec
|
|
107
|
+
# >>> spec1: SortSpec = 'field'
|
|
108
|
+
# >>> spec2: SortSpec = {'field': 'x', 'reverse': True}
|
|
109
|
+
type SortSpec = str | StrAnyMap
|
|
110
|
+
|
|
111
|
+
# Aggregate instruction covering ``field``, ``func``, and optional alias.
|
|
112
|
+
#
|
|
113
|
+
# Supported functions: ``avg``, ``count``, ``max``, ``min``, and ``sum``.
|
|
114
|
+
# Examples
|
|
115
|
+
# --------
|
|
116
|
+
# >>> from etlplus.ops.types import AggregateSpec
|
|
117
|
+
# >>> spec: AggregateSpec = \
|
|
118
|
+
# ... {'field': 'x', 'func': 'sum' | 'avg' | ..., 'alias'?: '...'}
|
|
119
|
+
type AggregateSpec = StrAnyMap
|
|
120
|
+
|
|
121
|
+
# -- Pipelines-- #
|
|
122
|
+
|
|
123
|
+
# Unified pipeline step spec consumed by :mod:`etlplus.ops.transform`.
|
|
124
|
+
type StepSpec = AggregateSpec | FilterSpec | MapSpec | SelectSpec | SortSpec
|
|
125
|
+
|
|
126
|
+
# Collections of steps
|
|
127
|
+
|
|
128
|
+
# Ordered collection of :data:`StepSpec` entries.
|
|
129
|
+
type StepSeq = Sequence[StepSpec]
|
|
130
|
+
|
|
131
|
+
# Accepts either a single :data:`StepSpec` or a sequence of them.
|
|
132
|
+
type StepOrSteps = StepSpec | StepSeq
|
|
133
|
+
|
|
134
|
+
# Canonical literal names for supported transform stages.
|
|
135
|
+
type PipelineStepName = Literal['aggregate', 'filter', 'map', 'select', 'sort']
|
|
136
|
+
|
|
137
|
+
# Mapping from step name to its associated specification payload.
|
|
138
|
+
# TODO: Consider replacing with etlplus.workflow.types.PipelineConfig.
|
|
139
|
+
type PipelineConfig = Mapping[PipelineStepName, StepOrSteps]
|
|
140
|
+
|
|
141
|
+
# -- Helpers -- #
|
|
142
|
+
|
|
143
|
+
# Callable that applies step configuration to a batch of records.
|
|
144
|
+
type StepApplier = Callable[[JSONList, Any], JSONList]
|
|
145
|
+
|
|
146
|
+
# Tuple combining stable sort index and computed sort value.
|
|
147
|
+
type SortKey = tuple[int, Any]
|
etlplus/types.py
CHANGED
|
@@ -16,7 +16,8 @@ See Also
|
|
|
16
16
|
|
|
17
17
|
Examples
|
|
18
18
|
--------
|
|
19
|
-
>>> from etlplus.types import JSONDict
|
|
19
|
+
>>> from etlplus.types import JSONDict
|
|
20
|
+
>>> from etlplus.ops.types import PipelineConfig
|
|
20
21
|
>>> payload: JSONDict = {'id': 1, 'name': 'Ada'}
|
|
21
22
|
>>> isinstance(payload, dict)
|
|
22
23
|
True
|
|
@@ -53,30 +54,10 @@ __all__ = [
|
|
|
53
54
|
'JSONRecords',
|
|
54
55
|
# Type Aliases (File System)
|
|
55
56
|
'StrPath',
|
|
56
|
-
# Type Aliases (Functions)
|
|
57
|
-
'AggregateFunc',
|
|
58
|
-
'OperatorFunc',
|
|
59
|
-
# Type Aliases (Records & Fields)
|
|
60
|
-
'FieldName',
|
|
61
|
-
'Fields',
|
|
62
57
|
# Type Aliases (Transform Specs)
|
|
63
58
|
'StrAnyMap',
|
|
64
59
|
'StrSeqMap',
|
|
65
60
|
'StrStrMap',
|
|
66
|
-
'AggregateSpec',
|
|
67
|
-
'FilterSpec',
|
|
68
|
-
'MapSpec',
|
|
69
|
-
'SelectSpec',
|
|
70
|
-
'SortSpec',
|
|
71
|
-
# Type Aliases (Pipelines)
|
|
72
|
-
'StepOrSteps',
|
|
73
|
-
'StepSeq',
|
|
74
|
-
'StepSpec',
|
|
75
|
-
'PipelineStepName',
|
|
76
|
-
'PipelineConfig',
|
|
77
|
-
# Type Aliases (Helpers)
|
|
78
|
-
'StepApplier',
|
|
79
|
-
'SortKey',
|
|
80
61
|
# Type Aliases (Networking / Runtime)
|
|
81
62
|
'Sleeper',
|
|
82
63
|
'Timeout',
|
|
@@ -126,22 +107,6 @@ type JSONRecords = list[JSONRecord]
|
|
|
126
107
|
# Path-like inputs accepted by file helpers.
|
|
127
108
|
type StrPath = str | Path | PathLike[str]
|
|
128
109
|
|
|
129
|
-
# -- Functions -- #
|
|
130
|
-
|
|
131
|
-
# Callable reducing numeric collections into a summary value.
|
|
132
|
-
type AggregateFunc = Callable[[list[float], int], Any]
|
|
133
|
-
|
|
134
|
-
# Binary predicate consumed by filter operations.
|
|
135
|
-
type OperatorFunc = Callable[[Any, Any], bool]
|
|
136
|
-
|
|
137
|
-
# -- Records & Fields -- #
|
|
138
|
-
|
|
139
|
-
# Individual field identifier referenced inside specs.
|
|
140
|
-
type FieldName = str
|
|
141
|
-
|
|
142
|
-
# Ordered list of :data:`FieldName` entries preserving projection order.
|
|
143
|
-
type Fields = list[FieldName]
|
|
144
|
-
|
|
145
110
|
# -- Transform Specs -- #
|
|
146
111
|
|
|
147
112
|
# Kept intentionally broad for runtime-friendly validation in transform.py.
|
|
@@ -157,69 +122,6 @@ type StrStrMap = Mapping[str, str]
|
|
|
157
122
|
# Mapping whose values are homogeneous sequences.
|
|
158
123
|
type StrSeqMap = Mapping[str, Sequence[Any]]
|
|
159
124
|
|
|
160
|
-
# Transform step specifications
|
|
161
|
-
|
|
162
|
-
# Filtering spec expecting ``field``, ``op``, and ``value`` keys.
|
|
163
|
-
type FilterSpec = StrAnyMap
|
|
164
|
-
|
|
165
|
-
# Field renaming instructions mapping old keys to new ones.
|
|
166
|
-
type MapSpec = StrStrMap
|
|
167
|
-
|
|
168
|
-
# Projection spec as a field list or mapping with metadata.
|
|
169
|
-
#
|
|
170
|
-
# Examples
|
|
171
|
-
# --------
|
|
172
|
-
# >>> from etlplus.types import SelectSpec
|
|
173
|
-
# >>> spec1: SelectSpec = ['a','b']
|
|
174
|
-
# >>> spec2: SelectSpec = {'fields': [...]}
|
|
175
|
-
type SelectSpec = Fields | StrSeqMap
|
|
176
|
-
|
|
177
|
-
# Sort directive expressed as a field string or mapping with flags.
|
|
178
|
-
#
|
|
179
|
-
# Examples
|
|
180
|
-
# --------
|
|
181
|
-
# >>> from etlplus.types import SortSpec
|
|
182
|
-
# >>> spec1: SortSpec = 'field'
|
|
183
|
-
# >>> spec2: SortSpec = {'field': 'x', 'reverse': True}
|
|
184
|
-
type SortSpec = str | StrAnyMap
|
|
185
|
-
|
|
186
|
-
# Aggregate instruction covering ``field``, ``func``, and optional alias.
|
|
187
|
-
#
|
|
188
|
-
# Supported functions: ``avg``, ``count``, ``max``, ``min``, and ``sum``.
|
|
189
|
-
# Examples
|
|
190
|
-
# --------
|
|
191
|
-
# >>> from etlplus.types import AggregateSpec
|
|
192
|
-
# >>> spec: AggregateSpec = \
|
|
193
|
-
# ... {'field': 'x', 'func': 'sum' | 'avg' | ..., 'alias'?: '...'}
|
|
194
|
-
type AggregateSpec = StrAnyMap
|
|
195
|
-
|
|
196
|
-
# -- Pipelines-- #
|
|
197
|
-
|
|
198
|
-
# Unified pipeline step spec consumed by :mod:`etlplus.ops.transform`.
|
|
199
|
-
type StepSpec = AggregateSpec | FilterSpec | MapSpec | SelectSpec | SortSpec
|
|
200
|
-
|
|
201
|
-
# Collections of steps
|
|
202
|
-
|
|
203
|
-
# Ordered collection of :data:`StepSpec` entries.
|
|
204
|
-
type StepSeq = Sequence[StepSpec]
|
|
205
|
-
|
|
206
|
-
# Accepts either a single :data:`StepSpec` or a sequence of them.
|
|
207
|
-
type StepOrSteps = StepSpec | StepSeq
|
|
208
|
-
|
|
209
|
-
# Canonical literal names for supported transform stages.
|
|
210
|
-
type PipelineStepName = Literal['filter', 'map', 'select', 'sort', 'aggregate']
|
|
211
|
-
|
|
212
|
-
# Mapping from step name to its associated specification payload.
|
|
213
|
-
type PipelineConfig = Mapping[PipelineStepName, StepOrSteps]
|
|
214
|
-
|
|
215
|
-
# -- Helpers -- #
|
|
216
|
-
|
|
217
|
-
# Callable that applies step configuration to a batch of records.
|
|
218
|
-
type StepApplier = Callable[[JSONList, Any], JSONList]
|
|
219
|
-
|
|
220
|
-
# Tuple combining stable sort index and computed sort value.
|
|
221
|
-
type SortKey = tuple[int, Any]
|
|
222
|
-
|
|
223
125
|
# -- Networking / Runtime -- #
|
|
224
126
|
|
|
225
127
|
# Sleep function used by retry helpers.
|
etlplus/workflow/README.md
CHANGED
|
@@ -12,8 +12,6 @@ Back to project overview: see the top-level [README](../../README.md).
|
|
|
12
12
|
|
|
13
13
|
- [`etlplus.workflow` Subpackage](#etlplusworkflow-subpackage)
|
|
14
14
|
- [Supported Configuration Types](#supported-configuration-types)
|
|
15
|
-
- [Loading and Validating Configs](#loading-and-validating-configs)
|
|
16
|
-
- [Example: Loading a Pipeline Config](#example-loading-a-pipeline-config)
|
|
17
15
|
- [See Also](#see-also)
|
|
18
16
|
|
|
19
17
|
## Supported Configuration Types
|
|
@@ -23,28 +21,6 @@ Back to project overview: see the top-level [README](../../README.md).
|
|
|
23
21
|
- **Pipeline**: End-to-end pipeline configuration
|
|
24
22
|
- **Profile**: User or environment-specific settings
|
|
25
23
|
|
|
26
|
-
## Loading and Validating Configs
|
|
27
|
-
|
|
28
|
-
Use the provided classes to load and validate configuration files:
|
|
29
|
-
|
|
30
|
-
```python
|
|
31
|
-
from etlplus.workflow import PipelineConfig
|
|
32
|
-
|
|
33
|
-
cfg = PipelineConfig.from_yaml("pipeline.yml")
|
|
34
|
-
```
|
|
35
|
-
|
|
36
|
-
- Supports YAML and JSON formats
|
|
37
|
-
- Validates against expected schema
|
|
38
|
-
|
|
39
|
-
## Example: Loading a Pipeline Config
|
|
40
|
-
|
|
41
|
-
```python
|
|
42
|
-
from etlplus.workflow import PipelineConfig
|
|
43
|
-
|
|
44
|
-
pipeline = PipelineConfig.from_yaml("configs/pipeline.yml")
|
|
45
|
-
print(pipeline)
|
|
46
|
-
```
|
|
47
|
-
|
|
48
24
|
## See Also
|
|
49
25
|
|
|
50
26
|
- Top-level CLI and library usage in the main [README](../../README.md)
|
etlplus/workflow/__init__.py
CHANGED
|
@@ -12,8 +12,6 @@ from .jobs import JobConfig
|
|
|
12
12
|
from .jobs import LoadRef
|
|
13
13
|
from .jobs import TransformRef
|
|
14
14
|
from .jobs import ValidationRef
|
|
15
|
-
from .pipeline import PipelineConfig
|
|
16
|
-
from .pipeline import load_pipeline_config
|
|
17
15
|
from .profile import ProfileConfig
|
|
18
16
|
|
|
19
17
|
# SECTION: EXPORTS ========================================================== #
|
|
@@ -24,11 +22,9 @@ __all__ = [
|
|
|
24
22
|
'ExtractRef',
|
|
25
23
|
'JobConfig',
|
|
26
24
|
'LoadRef',
|
|
27
|
-
'PipelineConfig',
|
|
28
25
|
'ProfileConfig',
|
|
29
26
|
'TransformRef',
|
|
30
27
|
'ValidationRef',
|
|
31
28
|
# Functions
|
|
32
|
-
'load_pipeline_config',
|
|
33
29
|
'topological_sort_jobs',
|
|
34
30
|
]
|
etlplus/workflow/jobs.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
|
-
etlplus/README.md,sha256=
|
|
2
|
-
etlplus/__init__.py,sha256=
|
|
1
|
+
etlplus/README.md,sha256=L3un9q7Q7Mstfh0dmSjpsZMZzsOz2tvlWw4_-Y_LZEs,1887
|
|
2
|
+
etlplus/__init__.py,sha256=yHZt-sjjfPjB_CrNhcT9bcMlZCfwiul39ZhQ2VfxpRs,318
|
|
3
3
|
etlplus/__main__.py,sha256=btoROneNiigyfBU7BSzPKZ1R9gzBMpxcpsbPwmuHwTM,479
|
|
4
4
|
etlplus/__version__.py,sha256=1E0GMK_yUWCMQFKxXjTvyMwofi0qT2k4CDNiHWiymWE,327
|
|
5
|
-
etlplus/
|
|
5
|
+
etlplus/config.py,sha256=6BCI9hC1yWYAy5WclIHJlg90FYeWn5vAVrT1NWUTwpE,8817
|
|
6
|
+
etlplus/enums.py,sha256=MfQhy3XDpN7oqLrF7_WwZojl7n8cW3RAzsZGRnAbWgc,4073
|
|
6
7
|
etlplus/mixins.py,sha256=ifGpHwWv7U00yqGf-kN93vJax2IiK4jaGtTsPsO3Oak,1350
|
|
7
8
|
etlplus/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
-
etlplus/types.py,sha256=
|
|
9
|
+
etlplus/types.py,sha256=Op2H1dcmv0Srm9prFnBZjt7f1S4Mqrus7XrdsjoZkIM,3461
|
|
9
10
|
etlplus/utils.py,sha256=X-k_Y8i6oDjlE5aQu9sw3gPw7O2ikiSn4uoheVv_ERc,17091
|
|
10
11
|
etlplus/api/README.md,sha256=amxS_eIcsnNuVvD0x_w8nkyfedOTYbhlY0gGhaFg0DE,8705
|
|
11
12
|
etlplus/api/__init__.py,sha256=PK2lQv1FbsE7ZZS_ejevFZQSuOUHGApBc22YfHAzMqA,4615
|
|
@@ -28,9 +29,9 @@ etlplus/api/rate_limiting/config.py,sha256=Byc_kmnwFmjjfDEFIdc_sHc7Wnjde1NGgsjPH
|
|
|
28
29
|
etlplus/api/rate_limiting/rate_limiter.py,sha256=uYxn-l2qwLUKVclDQ3vJIIP3fozJx2JlHhz7_zyXVbA,7033
|
|
29
30
|
etlplus/cli/README.md,sha256=8H_G2d3HteYIU6ReX9K9DM485QjWDT5vHMQbGD_vv20,1237
|
|
30
31
|
etlplus/cli/__init__.py,sha256=J97-Rv931IL1_b4AXnB7Fbbd7HKnHBpx18NQfC_kE6c,299
|
|
31
|
-
etlplus/cli/commands.py,sha256=
|
|
32
|
+
etlplus/cli/commands.py,sha256=Mbnu_YYUrOumbDjkul9x5VjP8VXW5u08xNi4nLF9Yyo,25048
|
|
32
33
|
etlplus/cli/constants.py,sha256=0F7dXIQKWUhhVu2Us527GJeknJIWpBqz7CK2e5OQgcE,1947
|
|
33
|
-
etlplus/cli/handlers.py,sha256=
|
|
34
|
+
etlplus/cli/handlers.py,sha256=r06JatAUewxtZYGuu7lLWlcy4hKGQ8U-Oo_YtAuIhzI,18454
|
|
34
35
|
etlplus/cli/io.py,sha256=tGGNQ4ecezqj-mD285fgBVrYdphdeqApsyV9VojOj1I,7836
|
|
35
36
|
etlplus/cli/main.py,sha256=68_uJwmWajhOC9o4R_ns8IQloC9BFmAKC_9GlQOxKWg,5239
|
|
36
37
|
etlplus/cli/options.py,sha256=vfXT3YLh7wG1iC-aTdSg6ItMC8l6n0Lozmy53XjqLbA,1199
|
|
@@ -116,26 +117,27 @@ etlplus/file/yaml.py,sha256=b_SxDSEQPVXQv9a9Ih4wAcI940pE5Ksy5pQE6K6ckhw,2062
|
|
|
116
117
|
etlplus/file/zip.py,sha256=8wnmnGW_pGTx65736CzAG67XIi5y98KxucRT8sNDeuQ,4195
|
|
117
118
|
etlplus/file/zsav.py,sha256=5hMuBjYeHw--UL2ZCCDn6TzJkr_YNhdQhvKI6nr3WW0,1674
|
|
118
119
|
etlplus/ops/README.md,sha256=8omi7DYZhelc26JKk8Cm8QR8I3OGwziysPj1ivx41iQ,1380
|
|
119
|
-
etlplus/ops/__init__.py,sha256=
|
|
120
|
+
etlplus/ops/__init__.py,sha256=r5_-pPhSLCD1nq1EbN0rQrLOGpudueeIxCH_JvT2bt0,1718
|
|
121
|
+
etlplus/ops/enums.py,sha256=dC_8CfaTiB2i83Az-oG-2hkjMuAfDADNbcMF2f94UeU,4014
|
|
120
122
|
etlplus/ops/extract.py,sha256=LOYiPrALRMF7JDBabnRF24_HKnnIcfTdfXesWdS3QZM,11020
|
|
121
123
|
etlplus/ops/load.py,sha256=yicciVwomUKkdbhuRqbavKBNpT2Hg813BnQzG6IgF4o,10811
|
|
122
|
-
etlplus/ops/run.py,sha256=
|
|
123
|
-
etlplus/ops/transform.py,sha256
|
|
124
|
+
etlplus/ops/run.py,sha256=4HWelMevW0pW_76lJkoMcbzeQMiThMbxzO09wx6yoHg,11278
|
|
125
|
+
etlplus/ops/transform.py,sha256=-41uw_pwOGsMTUYxtXaeYOmTF_fTkN-L4Q9KT1OFe78,25671
|
|
126
|
+
etlplus/ops/types.py,sha256=Cvp8AJzJhJ1iYjyHd7j9ZLioxE2NdK__3g6fOI0qq6Q,4198
|
|
124
127
|
etlplus/ops/utils.py,sha256=lJmrO1KDob-xZU8Gc2SvZvMgdYLsVoaz-fTV42KkLVo,10835
|
|
125
128
|
etlplus/ops/validate.py,sha256=-OLAwQNNCmmDbmj0SB7zzYXDkJfcyBP_z9nTpqImLP0,13271
|
|
126
129
|
etlplus/templates/README.md,sha256=IfPXlj1TGVA-uFWosHJhE2rabFW-znxOlOMazO9Z5cE,1361
|
|
127
130
|
etlplus/templates/__init__.py,sha256=tsniN7XJYs3NwYxJ6c2HD5upHP3CDkLx-bQCMt97UOM,106
|
|
128
131
|
etlplus/templates/ddl.sql.j2,sha256=s8fMWvcb4eaJVXkifuib1aQPljtZ8buuyB_uA-ZdU3Q,4734
|
|
129
132
|
etlplus/templates/view.sql.j2,sha256=Iy8DHfhq5yyvrUKDxqp_aHIEXY4Tm6j4wT7YDEFWAhk,2180
|
|
130
|
-
etlplus/workflow/README.md,sha256=
|
|
131
|
-
etlplus/workflow/__init__.py,sha256=
|
|
133
|
+
etlplus/workflow/README.md,sha256=QelyVFGX-sZM9mx3v6BXkzX36hv6MI1yK4eCPSOKNwI,1050
|
|
134
|
+
etlplus/workflow/__init__.py,sha256=XgCQr684om0rONrQZ61yQ0r4qqFQL0iLAAB2Mn2BRSE,594
|
|
132
135
|
etlplus/workflow/dag.py,sha256=-f1x8N1eb-PUuiOwEvFLmJwfR7JaMDJihlCHlhrFhgE,2937
|
|
133
|
-
etlplus/workflow/jobs.py,sha256=
|
|
134
|
-
etlplus/workflow/pipeline.py,sha256=PA5zhcfrk--pAg3b3x4oBf29WMj5HqR8zOozz4oEmg8,9387
|
|
136
|
+
etlplus/workflow/jobs.py,sha256=hLE9QJUzQaI0aOEon0P-xxxa6xHp997ANei4F310WRY,8711
|
|
135
137
|
etlplus/workflow/profile.py,sha256=FQU3bzBZ9_yjKC9kCXKN1FQDS9zjNUjtWB1r3UL95_Q,1993
|
|
136
|
-
etlplus-0.16.
|
|
137
|
-
etlplus-0.16.
|
|
138
|
-
etlplus-0.16.
|
|
139
|
-
etlplus-0.16.
|
|
140
|
-
etlplus-0.16.
|
|
141
|
-
etlplus-0.16.
|
|
138
|
+
etlplus-0.16.4.dist-info/licenses/LICENSE,sha256=MuNO63i6kWmgnV2pbP2SLqP54mk1BGmu7CmbtxMmT-U,1069
|
|
139
|
+
etlplus-0.16.4.dist-info/METADATA,sha256=Pe-3vIQcAfTasRzy2HPq0A4cin2I5PJChAmA23S9dgo,28114
|
|
140
|
+
etlplus-0.16.4.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
141
|
+
etlplus-0.16.4.dist-info/entry_points.txt,sha256=6w-2-jzuPa55spzK34h-UKh2JTEShh38adFRONNP9QE,45
|
|
142
|
+
etlplus-0.16.4.dist-info/top_level.txt,sha256=aWWF-udn_sLGuHTM6W6MLh99ArS9ROkUWO8Mi8y1_2U,8
|
|
143
|
+
etlplus-0.16.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|