etlplus 0.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/__init__.py +43 -0
- etlplus/__main__.py +22 -0
- etlplus/__version__.py +14 -0
- etlplus/api/README.md +237 -0
- etlplus/api/__init__.py +136 -0
- etlplus/api/auth.py +432 -0
- etlplus/api/config.py +633 -0
- etlplus/api/endpoint_client.py +885 -0
- etlplus/api/errors.py +170 -0
- etlplus/api/pagination/__init__.py +47 -0
- etlplus/api/pagination/client.py +188 -0
- etlplus/api/pagination/config.py +440 -0
- etlplus/api/pagination/paginator.py +775 -0
- etlplus/api/rate_limiting/__init__.py +38 -0
- etlplus/api/rate_limiting/config.py +343 -0
- etlplus/api/rate_limiting/rate_limiter.py +266 -0
- etlplus/api/request_manager.py +589 -0
- etlplus/api/retry_manager.py +430 -0
- etlplus/api/transport.py +325 -0
- etlplus/api/types.py +172 -0
- etlplus/cli/__init__.py +15 -0
- etlplus/cli/app.py +1367 -0
- etlplus/cli/handlers.py +775 -0
- etlplus/cli/main.py +616 -0
- etlplus/config/__init__.py +56 -0
- etlplus/config/connector.py +372 -0
- etlplus/config/jobs.py +311 -0
- etlplus/config/pipeline.py +339 -0
- etlplus/config/profile.py +78 -0
- etlplus/config/types.py +204 -0
- etlplus/config/utils.py +120 -0
- etlplus/ddl.py +197 -0
- etlplus/enums.py +414 -0
- etlplus/extract.py +218 -0
- etlplus/file.py +657 -0
- etlplus/load.py +336 -0
- etlplus/mixins.py +62 -0
- etlplus/py.typed +0 -0
- etlplus/run.py +368 -0
- etlplus/run_helpers.py +843 -0
- etlplus/templates/__init__.py +5 -0
- etlplus/templates/ddl.sql.j2 +128 -0
- etlplus/templates/view.sql.j2 +69 -0
- etlplus/transform.py +1049 -0
- etlplus/types.py +227 -0
- etlplus/utils.py +638 -0
- etlplus/validate.py +493 -0
- etlplus/validation/__init__.py +44 -0
- etlplus/validation/utils.py +389 -0
- etlplus-0.5.4.dist-info/METADATA +616 -0
- etlplus-0.5.4.dist-info/RECORD +55 -0
- etlplus-0.5.4.dist-info/WHEEL +5 -0
- etlplus-0.5.4.dist-info/entry_points.txt +2 -0
- etlplus-0.5.4.dist-info/licenses/LICENSE +21 -0
- etlplus-0.5.4.dist-info/top_level.txt +1 -0
etlplus/config/utils.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.config.utils` module.
|
|
3
|
+
|
|
4
|
+
A module defining utility helpers for ETL pipeline configuration.
|
|
5
|
+
|
|
6
|
+
Notes
|
|
7
|
+
-----
|
|
8
|
+
- Inputs to parsers favor ``Mapping[str, Any]`` to remain permissive and
|
|
9
|
+
avoid unnecessary copies; normalization returns concrete types.
|
|
10
|
+
- Substitution is shallow for strings and recursive for containers.
|
|
11
|
+
- Numeric coercion helpers are intentionally forgiving: invalid values
|
|
12
|
+
become ``None`` rather than raising.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from collections.abc import Iterable
|
|
18
|
+
from collections.abc import Mapping
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
from ..types import StrAnyMap
|
|
22
|
+
|
|
23
|
+
# SECTION: EXPORTS ========================================================== #
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
# Functions
|
|
28
|
+
'deep_substitute',
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def deep_substitute(
|
|
36
|
+
value: Any,
|
|
37
|
+
vars_map: StrAnyMap | None,
|
|
38
|
+
env_map: Mapping[str, str] | None,
|
|
39
|
+
) -> Any:
|
|
40
|
+
"""
|
|
41
|
+
Recursively substitute ``${VAR}`` tokens in nested structures.
|
|
42
|
+
|
|
43
|
+
Only strings are substituted; other types are returned as-is.
|
|
44
|
+
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
value : Any
|
|
48
|
+
The value to perform substitutions on.
|
|
49
|
+
vars_map : StrAnyMap | None
|
|
50
|
+
Mapping of variable names to replacement values (lower precedence).
|
|
51
|
+
env_map : Mapping[str, str] | None
|
|
52
|
+
Mapping of environment variables overriding ``vars_map`` values (higher
|
|
53
|
+
precedence).
|
|
54
|
+
|
|
55
|
+
Returns
|
|
56
|
+
-------
|
|
57
|
+
Any
|
|
58
|
+
New structure with substitutions applied where tokens were found.
|
|
59
|
+
"""
|
|
60
|
+
substitutions = _prepare_substitutions(vars_map, env_map)
|
|
61
|
+
|
|
62
|
+
def _apply(node: Any) -> Any:
|
|
63
|
+
match node:
|
|
64
|
+
case str():
|
|
65
|
+
return _replace_tokens(node, substitutions)
|
|
66
|
+
case Mapping():
|
|
67
|
+
return {k: _apply(v) for k, v in node.items()}
|
|
68
|
+
case list() | tuple() as seq:
|
|
69
|
+
apply = [_apply(item) for item in seq]
|
|
70
|
+
return apply if isinstance(seq, list) else tuple(apply)
|
|
71
|
+
case set():
|
|
72
|
+
return {_apply(item) for item in node}
|
|
73
|
+
case frozenset():
|
|
74
|
+
return frozenset(_apply(item) for item in node)
|
|
75
|
+
case _:
|
|
76
|
+
return node
|
|
77
|
+
|
|
78
|
+
return _apply(value)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
# SECTION: INTERNAL FUNCTIONS ============================================== #
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _prepare_substitutions(
|
|
85
|
+
vars_map: StrAnyMap | None,
|
|
86
|
+
env_map: Mapping[str, Any] | None,
|
|
87
|
+
) -> tuple[tuple[str, Any], ...]:
|
|
88
|
+
"""Merge variable and environment maps into an ordered substitutions list.
|
|
89
|
+
|
|
90
|
+
Parameters
|
|
91
|
+
----------
|
|
92
|
+
vars_map : StrAnyMap | None
|
|
93
|
+
Mapping of variable names to replacement values (lower precedence).
|
|
94
|
+
env_map : Mapping[str, Any] | None
|
|
95
|
+
Environment-backed values that override entries from ``vars_map``.
|
|
96
|
+
|
|
97
|
+
Returns
|
|
98
|
+
-------
|
|
99
|
+
tuple[tuple[str, Any], ...]
|
|
100
|
+
Immutable sequence of ``(name, value)`` pairs suitable for token
|
|
101
|
+
replacement.
|
|
102
|
+
"""
|
|
103
|
+
if not vars_map and not env_map:
|
|
104
|
+
return ()
|
|
105
|
+
merged: dict[str, Any] = {**(vars_map or {}), **(env_map or {})}
|
|
106
|
+
return tuple(merged.items())
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _replace_tokens(
|
|
110
|
+
text: str,
|
|
111
|
+
substitutions: Iterable[tuple[str, Any]],
|
|
112
|
+
) -> str:
|
|
113
|
+
if not substitutions:
|
|
114
|
+
return text
|
|
115
|
+
out = text
|
|
116
|
+
for name, replacement in substitutions:
|
|
117
|
+
token = f'${{{name}}}'
|
|
118
|
+
if token in out:
|
|
119
|
+
out = out.replace(token, str(replacement))
|
|
120
|
+
return out
|
etlplus/ddl.py
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.ddl` module.
|
|
3
|
+
|
|
4
|
+
DDL rendering utilities for pipeline table schemas.
|
|
5
|
+
|
|
6
|
+
Exposes helpers to load YAML/JSON table specs and render them into SQL via
|
|
7
|
+
Jinja templates. Mirrors the behavior of ``tools/render_ddl.py`` so the CLI
|
|
8
|
+
can emit DDLs without shelling out to that script.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import importlib.resources
|
|
14
|
+
import json
|
|
15
|
+
import os
|
|
16
|
+
from collections.abc import Iterable
|
|
17
|
+
from collections.abc import Mapping
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
from jinja2 import DictLoader
|
|
22
|
+
from jinja2 import Environment
|
|
23
|
+
from jinja2 import FileSystemLoader
|
|
24
|
+
from jinja2 import StrictUndefined
|
|
25
|
+
|
|
26
|
+
# SECTION: EXPORTS ========================================================== #
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
'TEMPLATES',
|
|
31
|
+
'load_table_spec',
|
|
32
|
+
'render_table_sql',
|
|
33
|
+
'render_tables',
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# SECTION: CONSTANTS ======================================================== #
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
TEMPLATES = {
|
|
41
|
+
'ddl': 'ddl.sql.j2',
|
|
42
|
+
'view': 'view.sql.j2',
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _build_env(
|
|
50
|
+
*,
|
|
51
|
+
template_key: str | None,
|
|
52
|
+
template_path: str | None,
|
|
53
|
+
) -> Environment:
|
|
54
|
+
"""Return a Jinja2 environment using a built-in or file template."""
|
|
55
|
+
file_override = template_path or os.environ.get('TEMPLATE_NAME')
|
|
56
|
+
if file_override:
|
|
57
|
+
path = Path(file_override)
|
|
58
|
+
if not path.exists():
|
|
59
|
+
raise FileNotFoundError(f'Template file not found: {path}')
|
|
60
|
+
loader = FileSystemLoader(str(path.parent))
|
|
61
|
+
env = Environment(
|
|
62
|
+
loader=loader,
|
|
63
|
+
undefined=StrictUndefined,
|
|
64
|
+
trim_blocks=True,
|
|
65
|
+
lstrip_blocks=True,
|
|
66
|
+
)
|
|
67
|
+
env.globals['TEMPLATE_NAME'] = path.name
|
|
68
|
+
return env
|
|
69
|
+
|
|
70
|
+
key = (template_key or 'ddl').strip()
|
|
71
|
+
if key not in TEMPLATES:
|
|
72
|
+
choices = ', '.join(sorted(TEMPLATES))
|
|
73
|
+
raise ValueError(
|
|
74
|
+
f'Unknown template key "{key}". Choose from: {choices}',
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# Load template from package data
|
|
78
|
+
template_filename = TEMPLATES[key]
|
|
79
|
+
template_source = _load_template_text(template_filename)
|
|
80
|
+
|
|
81
|
+
env = Environment(
|
|
82
|
+
loader=DictLoader({key: template_source}),
|
|
83
|
+
undefined=StrictUndefined,
|
|
84
|
+
trim_blocks=True,
|
|
85
|
+
lstrip_blocks=True,
|
|
86
|
+
)
|
|
87
|
+
env.globals['TEMPLATE_NAME'] = key
|
|
88
|
+
return env
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _load_template_text(filename: str) -> str:
|
|
92
|
+
"""Return the raw template text bundled with the package."""
|
|
93
|
+
|
|
94
|
+
try:
|
|
95
|
+
return (
|
|
96
|
+
importlib.resources.files(
|
|
97
|
+
'etlplus.templates',
|
|
98
|
+
)
|
|
99
|
+
.joinpath(filename)
|
|
100
|
+
.read_text(encoding='utf-8')
|
|
101
|
+
)
|
|
102
|
+
except FileNotFoundError as exc: # pragma: no cover - deployment guard
|
|
103
|
+
raise FileNotFoundError(
|
|
104
|
+
f'Could not load template {filename} '
|
|
105
|
+
f'from etlplus.templates package data.',
|
|
106
|
+
) from exc
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def load_table_spec(path: Path | str) -> dict[str, Any]:
|
|
113
|
+
"""Load a table spec from JSON or YAML."""
|
|
114
|
+
|
|
115
|
+
spec_path = Path(path)
|
|
116
|
+
text = spec_path.read_text(encoding='utf-8')
|
|
117
|
+
suffix = spec_path.suffix.lower()
|
|
118
|
+
|
|
119
|
+
if suffix == '.json':
|
|
120
|
+
return json.loads(text)
|
|
121
|
+
|
|
122
|
+
if suffix in {'.yml', '.yaml'}:
|
|
123
|
+
try:
|
|
124
|
+
import yaml # type: ignore
|
|
125
|
+
except Exception as exc: # pragma: no cover
|
|
126
|
+
raise RuntimeError(
|
|
127
|
+
'Missing dependency: pyyaml is required for YAML specs.',
|
|
128
|
+
) from exc
|
|
129
|
+
return yaml.safe_load(text)
|
|
130
|
+
|
|
131
|
+
raise ValueError('Spec must be .json, .yml, or .yaml')
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def render_table_sql(
|
|
135
|
+
spec: Mapping[str, Any],
|
|
136
|
+
*,
|
|
137
|
+
template: str | None = 'ddl',
|
|
138
|
+
template_path: str | None = None,
|
|
139
|
+
) -> str:
|
|
140
|
+
"""
|
|
141
|
+
Render a single table spec into SQL text.
|
|
142
|
+
|
|
143
|
+
Parameters
|
|
144
|
+
----------
|
|
145
|
+
spec : Mapping[str, Any]
|
|
146
|
+
Table specification mapping.
|
|
147
|
+
template : str | None, optional
|
|
148
|
+
Template key to use (default: 'ddl').
|
|
149
|
+
template_path : str | None, optional
|
|
150
|
+
Path to a custom template file (overrides ``template``).
|
|
151
|
+
|
|
152
|
+
Returns
|
|
153
|
+
-------
|
|
154
|
+
str
|
|
155
|
+
Rendered SQL string.
|
|
156
|
+
|
|
157
|
+
Raises
|
|
158
|
+
------
|
|
159
|
+
TypeError
|
|
160
|
+
If the loaded template name is not a string.
|
|
161
|
+
"""
|
|
162
|
+
env = _build_env(template_key=template, template_path=template_path)
|
|
163
|
+
template_name = env.globals.get('TEMPLATE_NAME')
|
|
164
|
+
if not isinstance(template_name, str):
|
|
165
|
+
raise TypeError('TEMPLATE_NAME must be a string.')
|
|
166
|
+
tmpl = env.get_template(template_name)
|
|
167
|
+
return tmpl.render(spec=spec).rstrip() + '\n'
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def render_tables(
|
|
171
|
+
specs: Iterable[Mapping[str, Any]],
|
|
172
|
+
*,
|
|
173
|
+
template: str | None = 'ddl',
|
|
174
|
+
template_path: str | None = None,
|
|
175
|
+
) -> list[str]:
|
|
176
|
+
"""
|
|
177
|
+
Render multiple table specs into a list of SQL payloads.
|
|
178
|
+
|
|
179
|
+
Parameters
|
|
180
|
+
----------
|
|
181
|
+
specs : Iterable[Mapping[str, Any]]
|
|
182
|
+
Table specification mappings.
|
|
183
|
+
template : str | None, optional
|
|
184
|
+
Template key to use (default: 'ddl').
|
|
185
|
+
template_path : str | None, optional
|
|
186
|
+
Path to a custom template file (overrides ``template``).
|
|
187
|
+
|
|
188
|
+
Returns
|
|
189
|
+
-------
|
|
190
|
+
list[str]
|
|
191
|
+
Rendered SQL strings for each table spec.
|
|
192
|
+
"""
|
|
193
|
+
|
|
194
|
+
return [
|
|
195
|
+
render_table_sql(spec, template=template, template_path=template_path)
|
|
196
|
+
for spec in specs
|
|
197
|
+
]
|