etlplus 0.5.2__py3-none-any.whl → 0.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/api/README.md +24 -26
- etlplus/cli/commands.py +924 -0
- etlplus/cli/constants.py +71 -0
- etlplus/cli/handlers.py +369 -484
- etlplus/cli/io.py +336 -0
- etlplus/cli/main.py +16 -418
- etlplus/cli/options.py +49 -0
- etlplus/cli/state.py +336 -0
- etlplus/cli/types.py +33 -0
- etlplus/database/__init__.py +44 -0
- etlplus/database/ddl.py +319 -0
- etlplus/database/engine.py +151 -0
- etlplus/database/orm.py +354 -0
- etlplus/database/schema.py +274 -0
- etlplus/database/types.py +33 -0
- etlplus/enums.py +51 -1
- etlplus/load.py +1 -1
- etlplus/run.py +2 -4
- etlplus/types.py +5 -0
- etlplus/utils.py +1 -32
- {etlplus-0.5.2.dist-info → etlplus-0.9.1.dist-info}/METADATA +84 -40
- {etlplus-0.5.2.dist-info → etlplus-0.9.1.dist-info}/RECORD +26 -16
- etlplus/cli/app.py +0 -1367
- etlplus/ddl.py +0 -197
- {etlplus-0.5.2.dist-info → etlplus-0.9.1.dist-info}/WHEEL +0 -0
- {etlplus-0.5.2.dist-info → etlplus-0.9.1.dist-info}/entry_points.txt +0 -0
- {etlplus-0.5.2.dist-info → etlplus-0.9.1.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.5.2.dist-info → etlplus-0.9.1.dist-info}/top_level.txt +0 -0
etlplus/database/ddl.py
ADDED
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.database.ddl` module.
|
|
3
|
+
|
|
4
|
+
DDL rendering utilities for pipeline table schemas.
|
|
5
|
+
|
|
6
|
+
Exposes helpers to load YAML/JSON table specs and render them into SQL via
|
|
7
|
+
Jinja templates. Mirrors the behavior of ``tools/render_ddl.py`` so the CLI
|
|
8
|
+
can emit DDLs without shelling out to that script.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import importlib.resources
|
|
14
|
+
import os
|
|
15
|
+
from collections.abc import Iterable
|
|
16
|
+
from collections.abc import Mapping
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Final
|
|
19
|
+
|
|
20
|
+
from jinja2 import DictLoader
|
|
21
|
+
from jinja2 import Environment
|
|
22
|
+
from jinja2 import FileSystemLoader
|
|
23
|
+
from jinja2 import StrictUndefined
|
|
24
|
+
|
|
25
|
+
from ..file import File
|
|
26
|
+
from ..types import StrAnyMap
|
|
27
|
+
from ..types import StrPath
|
|
28
|
+
from ..types import TemplateKey
|
|
29
|
+
|
|
30
|
+
# SECTION: EXPORTS ========================================================== #
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
__all__ = [
|
|
34
|
+
'TEMPLATES',
|
|
35
|
+
'load_table_spec',
|
|
36
|
+
'render_table_sql',
|
|
37
|
+
'render_tables',
|
|
38
|
+
'render_tables_to_string',
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# SECTION: INTERNAL CONSTANTS =============================================== #
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
_SUPPORTED_SPEC_SUFFIXES: Final[frozenset[str]] = frozenset(
|
|
46
|
+
{
|
|
47
|
+
'.json',
|
|
48
|
+
'.yml',
|
|
49
|
+
'.yaml',
|
|
50
|
+
},
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# SECTION: CONSTANTS ======================================================== #
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
TEMPLATES: Final[dict[TemplateKey, str]] = {
|
|
58
|
+
'ddl': 'ddl.sql.j2',
|
|
59
|
+
'view': 'view.sql.j2',
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _load_template_text(
|
|
67
|
+
filename: str,
|
|
68
|
+
) -> str:
|
|
69
|
+
"""
|
|
70
|
+
Return the bundled template text.
|
|
71
|
+
|
|
72
|
+
Parameters
|
|
73
|
+
----------
|
|
74
|
+
filename : str
|
|
75
|
+
Template filename located inside the package data folder.
|
|
76
|
+
|
|
77
|
+
Returns
|
|
78
|
+
-------
|
|
79
|
+
str
|
|
80
|
+
Raw template contents.
|
|
81
|
+
|
|
82
|
+
Raises
|
|
83
|
+
------
|
|
84
|
+
FileNotFoundError
|
|
85
|
+
If the template file cannot be located in package data.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
return (
|
|
90
|
+
importlib.resources.files(
|
|
91
|
+
'etlplus.templates',
|
|
92
|
+
)
|
|
93
|
+
.joinpath(filename)
|
|
94
|
+
.read_text(encoding='utf-8')
|
|
95
|
+
)
|
|
96
|
+
except FileNotFoundError as exc: # pragma: no cover - deployment guard
|
|
97
|
+
raise FileNotFoundError(
|
|
98
|
+
f'Could not load template {filename} '
|
|
99
|
+
f'from etlplus.templates package data.',
|
|
100
|
+
) from exc
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _resolve_template(
|
|
104
|
+
*,
|
|
105
|
+
template_key: TemplateKey | None,
|
|
106
|
+
template_path: StrPath | None,
|
|
107
|
+
) -> tuple[Environment, str]:
|
|
108
|
+
"""
|
|
109
|
+
Return environment and template name for rendering.
|
|
110
|
+
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
template_key : TemplateKey | None
|
|
114
|
+
Named template key bundled with the package.
|
|
115
|
+
template_path : StrPath | None
|
|
116
|
+
Explicit template file override.
|
|
117
|
+
|
|
118
|
+
Returns
|
|
119
|
+
-------
|
|
120
|
+
tuple[Environment, str]
|
|
121
|
+
Pair of configured Jinja environment and the template identifier.
|
|
122
|
+
|
|
123
|
+
Raises
|
|
124
|
+
------
|
|
125
|
+
FileNotFoundError
|
|
126
|
+
If the provided template path does not exist.
|
|
127
|
+
ValueError
|
|
128
|
+
If the template key is unknown.
|
|
129
|
+
"""
|
|
130
|
+
file_override = (
|
|
131
|
+
str(template_path)
|
|
132
|
+
if template_path is not None
|
|
133
|
+
else os.environ.get('TEMPLATE_NAME')
|
|
134
|
+
)
|
|
135
|
+
if file_override:
|
|
136
|
+
path = Path(file_override)
|
|
137
|
+
if not path.exists():
|
|
138
|
+
raise FileNotFoundError(f'Template file not found: {path}')
|
|
139
|
+
loader = FileSystemLoader(str(path.parent))
|
|
140
|
+
env = Environment(
|
|
141
|
+
loader=loader,
|
|
142
|
+
undefined=StrictUndefined,
|
|
143
|
+
trim_blocks=True,
|
|
144
|
+
lstrip_blocks=True,
|
|
145
|
+
)
|
|
146
|
+
return env, path.name
|
|
147
|
+
|
|
148
|
+
key: TemplateKey = template_key or 'ddl'
|
|
149
|
+
if key not in TEMPLATES:
|
|
150
|
+
choices = ', '.join(sorted(TEMPLATES))
|
|
151
|
+
raise ValueError(
|
|
152
|
+
f'Unknown template key "{key}". Choose from: {choices}',
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# Load template from package data.
|
|
156
|
+
template_filename = TEMPLATES[key]
|
|
157
|
+
template_source = _load_template_text(template_filename)
|
|
158
|
+
|
|
159
|
+
env = Environment(
|
|
160
|
+
loader=DictLoader({key: template_source}),
|
|
161
|
+
undefined=StrictUndefined,
|
|
162
|
+
trim_blocks=True,
|
|
163
|
+
lstrip_blocks=True,
|
|
164
|
+
)
|
|
165
|
+
return env, key
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def load_table_spec(
|
|
172
|
+
path: StrPath,
|
|
173
|
+
) -> StrAnyMap:
|
|
174
|
+
"""
|
|
175
|
+
Load a table specification from disk.
|
|
176
|
+
|
|
177
|
+
Parameters
|
|
178
|
+
----------
|
|
179
|
+
path : StrPath
|
|
180
|
+
Path to the JSON or YAML specification file.
|
|
181
|
+
|
|
182
|
+
Returns
|
|
183
|
+
-------
|
|
184
|
+
StrAnyMap
|
|
185
|
+
Parsed table specification mapping.
|
|
186
|
+
|
|
187
|
+
Raises
|
|
188
|
+
------
|
|
189
|
+
ImportError
|
|
190
|
+
If the file cannot be read due to missing dependencies.
|
|
191
|
+
RuntimeError
|
|
192
|
+
If the YAML dependency is missing for YAML specs.
|
|
193
|
+
TypeError
|
|
194
|
+
If the loaded spec is not a mapping.
|
|
195
|
+
ValueError
|
|
196
|
+
If the file suffix is not supported.
|
|
197
|
+
"""
|
|
198
|
+
|
|
199
|
+
spec_path = Path(path)
|
|
200
|
+
suffix = spec_path.suffix.lower()
|
|
201
|
+
|
|
202
|
+
if suffix not in _SUPPORTED_SPEC_SUFFIXES:
|
|
203
|
+
raise ValueError('Spec must be .json, .yml, or .yaml')
|
|
204
|
+
|
|
205
|
+
try:
|
|
206
|
+
spec = File.read_file(spec_path)
|
|
207
|
+
except ImportError as e:
|
|
208
|
+
if suffix in {'.yml', '.yaml'}:
|
|
209
|
+
raise RuntimeError(
|
|
210
|
+
'Missing dependency: pyyaml is required for YAML specs.',
|
|
211
|
+
) from e
|
|
212
|
+
raise
|
|
213
|
+
|
|
214
|
+
if not isinstance(spec, Mapping):
|
|
215
|
+
raise TypeError('Table spec must be a mapping')
|
|
216
|
+
|
|
217
|
+
return dict(spec)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def render_table_sql(
|
|
221
|
+
spec: StrAnyMap,
|
|
222
|
+
*,
|
|
223
|
+
template: TemplateKey | None = 'ddl',
|
|
224
|
+
template_path: str | None = None,
|
|
225
|
+
) -> str:
|
|
226
|
+
"""
|
|
227
|
+
Render a single table spec into SQL text.
|
|
228
|
+
|
|
229
|
+
Parameters
|
|
230
|
+
----------
|
|
231
|
+
spec : StrAnyMap
|
|
232
|
+
Table specification mapping.
|
|
233
|
+
template : TemplateKey | None, optional
|
|
234
|
+
Template key to use (default: 'ddl').
|
|
235
|
+
template_path : str | None, optional
|
|
236
|
+
Path to a custom template file (overrides ``template``).
|
|
237
|
+
|
|
238
|
+
Returns
|
|
239
|
+
-------
|
|
240
|
+
str
|
|
241
|
+
Rendered SQL string.
|
|
242
|
+
"""
|
|
243
|
+
env, template_name = _resolve_template(
|
|
244
|
+
template_key=template,
|
|
245
|
+
template_path=template_path,
|
|
246
|
+
)
|
|
247
|
+
tmpl = env.get_template(template_name)
|
|
248
|
+
return tmpl.render(spec=spec).rstrip() + '\n'
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def render_tables(
|
|
252
|
+
specs: Iterable[StrAnyMap],
|
|
253
|
+
*,
|
|
254
|
+
template: TemplateKey | None = 'ddl',
|
|
255
|
+
template_path: str | None = None,
|
|
256
|
+
) -> list[str]:
|
|
257
|
+
"""
|
|
258
|
+
Render multiple table specs into a list of SQL payloads.
|
|
259
|
+
|
|
260
|
+
Parameters
|
|
261
|
+
----------
|
|
262
|
+
specs : Iterable[StrAnyMap]
|
|
263
|
+
Table specification mappings.
|
|
264
|
+
template : TemplateKey | None, optional
|
|
265
|
+
Template key to use (default: 'ddl').
|
|
266
|
+
template_path : str | None, optional
|
|
267
|
+
Path to a custom template file (overrides ``template``).
|
|
268
|
+
|
|
269
|
+
Returns
|
|
270
|
+
-------
|
|
271
|
+
list[str]
|
|
272
|
+
Rendered SQL strings for each table spec.
|
|
273
|
+
"""
|
|
274
|
+
|
|
275
|
+
return [
|
|
276
|
+
render_table_sql(spec, template=template, template_path=template_path)
|
|
277
|
+
for spec in specs
|
|
278
|
+
]
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def render_tables_to_string(
|
|
282
|
+
spec_paths: Iterable[StrPath],
|
|
283
|
+
*,
|
|
284
|
+
template: TemplateKey | None = 'ddl',
|
|
285
|
+
template_path: StrPath | None = None,
|
|
286
|
+
) -> str:
|
|
287
|
+
"""
|
|
288
|
+
Render one or more specs and concatenate the SQL payloads.
|
|
289
|
+
|
|
290
|
+
Parameters
|
|
291
|
+
----------
|
|
292
|
+
spec_paths : Iterable[StrPath]
|
|
293
|
+
Paths to table specification files.
|
|
294
|
+
template : TemplateKey | None, optional
|
|
295
|
+
Template key bundled with ETLPlus. Defaults to ``'ddl'``.
|
|
296
|
+
template_path : StrPath | None, optional
|
|
297
|
+
Custom Jinja template to override the bundled templates.
|
|
298
|
+
|
|
299
|
+
Returns
|
|
300
|
+
-------
|
|
301
|
+
str
|
|
302
|
+
Concatenated SQL payload suitable for writing to disk or stdout.
|
|
303
|
+
"""
|
|
304
|
+
|
|
305
|
+
resolved_template_path = (
|
|
306
|
+
str(template_path) if template_path is not None else None
|
|
307
|
+
)
|
|
308
|
+
rendered_sql: list[str] = []
|
|
309
|
+
for spec_path in spec_paths:
|
|
310
|
+
spec = load_table_spec(spec_path)
|
|
311
|
+
rendered_sql.append(
|
|
312
|
+
render_table_sql(
|
|
313
|
+
spec,
|
|
314
|
+
template=template,
|
|
315
|
+
template_path=resolved_template_path,
|
|
316
|
+
),
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
return ''.join(rendered_sql)
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.database.engine` module.
|
|
3
|
+
|
|
4
|
+
Lightweight engine/session factory with optional config-driven URL loading.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
from collections.abc import Mapping
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any
|
|
13
|
+
from typing import Final
|
|
14
|
+
|
|
15
|
+
from sqlalchemy import create_engine
|
|
16
|
+
from sqlalchemy.engine import Engine
|
|
17
|
+
from sqlalchemy.orm import sessionmaker
|
|
18
|
+
|
|
19
|
+
from ..file import File
|
|
20
|
+
from ..types import StrAnyMap
|
|
21
|
+
from ..types import StrPath
|
|
22
|
+
|
|
23
|
+
# SECTION: EXPORTS ========================================================== #
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
# Functions
|
|
28
|
+
'load_database_url_from_config',
|
|
29
|
+
'make_engine',
|
|
30
|
+
# Singletons
|
|
31
|
+
'engine',
|
|
32
|
+
'session',
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# SECTION: INTERNAL CONSTANTS =============================================== #
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
DATABASE_URL: Final[str] = (
|
|
40
|
+
os.getenv('DATABASE_URL')
|
|
41
|
+
or os.getenv('DATABASE_DSN')
|
|
42
|
+
or 'sqlite+pysqlite:///:memory:'
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _resolve_url_from_mapping(
|
|
50
|
+
cfg: StrAnyMap,
|
|
51
|
+
) -> str | None:
|
|
52
|
+
"""
|
|
53
|
+
Return a URL/DSN from a mapping if present.
|
|
54
|
+
|
|
55
|
+
Parameters
|
|
56
|
+
----------
|
|
57
|
+
cfg : StrAnyMap
|
|
58
|
+
Configuration mapping potentially containing connection fields.
|
|
59
|
+
|
|
60
|
+
Returns
|
|
61
|
+
-------
|
|
62
|
+
str | None
|
|
63
|
+
Resolved URL/DSN string, if present.
|
|
64
|
+
"""
|
|
65
|
+
conn = cfg.get('connection_string') or cfg.get('url') or cfg.get('dsn')
|
|
66
|
+
if isinstance(conn, str) and conn.strip():
|
|
67
|
+
return conn.strip()
|
|
68
|
+
|
|
69
|
+
# Some configs nest defaults.
|
|
70
|
+
# E.g., databases: { mssql: { default: {...} } }
|
|
71
|
+
default_cfg = cfg.get('default')
|
|
72
|
+
if isinstance(default_cfg, Mapping):
|
|
73
|
+
return _resolve_url_from_mapping(default_cfg)
|
|
74
|
+
|
|
75
|
+
return None
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def load_database_url_from_config(
|
|
82
|
+
path: StrPath,
|
|
83
|
+
*,
|
|
84
|
+
name: str | None = None,
|
|
85
|
+
) -> str:
|
|
86
|
+
"""
|
|
87
|
+
Extract a database URL/DSN from a YAML/JSON config file.
|
|
88
|
+
|
|
89
|
+
The loader is schema-tolerant: it looks for a top-level "databases" map
|
|
90
|
+
and then for a named entry (``name``). Each entry may contain either a
|
|
91
|
+
``connection_string``/``url``/``dsn`` or a nested ``default`` block with
|
|
92
|
+
those fields.
|
|
93
|
+
|
|
94
|
+
Parameters
|
|
95
|
+
----------
|
|
96
|
+
path : StrPath
|
|
97
|
+
Location of the configuration file.
|
|
98
|
+
name : str | None, optional
|
|
99
|
+
Named database entry under the ``databases`` map (default:
|
|
100
|
+
``default``).
|
|
101
|
+
|
|
102
|
+
Returns
|
|
103
|
+
-------
|
|
104
|
+
str
|
|
105
|
+
Resolved database URL/DSN string.
|
|
106
|
+
|
|
107
|
+
Raises
|
|
108
|
+
------
|
|
109
|
+
KeyError
|
|
110
|
+
If the specified database entry is not found.
|
|
111
|
+
TypeError
|
|
112
|
+
If the config structure is invalid.
|
|
113
|
+
ValueError
|
|
114
|
+
If no connection string/URL/DSN is found for the specified entry.
|
|
115
|
+
"""
|
|
116
|
+
cfg = File.read_file(Path(path))
|
|
117
|
+
if not isinstance(cfg, Mapping):
|
|
118
|
+
raise TypeError('Database config must be a mapping')
|
|
119
|
+
|
|
120
|
+
databases = cfg.get('databases') if isinstance(cfg, Mapping) else None
|
|
121
|
+
if not isinstance(databases, Mapping):
|
|
122
|
+
raise KeyError('Config missing top-level "databases" mapping')
|
|
123
|
+
|
|
124
|
+
target = name or 'default'
|
|
125
|
+
entry = databases.get(target)
|
|
126
|
+
if entry is None:
|
|
127
|
+
raise KeyError(f'Database entry "{target}" not found in config')
|
|
128
|
+
if not isinstance(entry, Mapping):
|
|
129
|
+
raise TypeError(f'Database entry "{target}" must be a mapping')
|
|
130
|
+
|
|
131
|
+
url = _resolve_url_from_mapping(entry)
|
|
132
|
+
if not url:
|
|
133
|
+
raise ValueError(
|
|
134
|
+
f'Database entry "{target}" lacks connection_string/url/dsn',
|
|
135
|
+
)
|
|
136
|
+
return url
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def make_engine(url: str | None = None, **engine_kwargs: Any) -> Engine:
|
|
140
|
+
"""Create a SQLAlchemy Engine, defaulting to env config if no URL given."""
|
|
141
|
+
|
|
142
|
+
resolved_url = url or DATABASE_URL
|
|
143
|
+
return create_engine(resolved_url, pool_pre_ping=True, **engine_kwargs)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
# SECTION: SINGLETONS ======================================================= #
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# Default engine/session for callers that rely on module-level singletons.
|
|
150
|
+
engine = make_engine()
|
|
151
|
+
session = sessionmaker(bind=engine, autoflush=False, autocommit=False)
|