etlplus 0.4.7__py3-none-any.whl → 0.8.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/api/README.md +24 -26
- etlplus/cli/commands.py +870 -0
- etlplus/cli/constants.py +65 -0
- etlplus/cli/handlers.py +426 -434
- etlplus/cli/io.py +320 -0
- etlplus/cli/main.py +14 -367
- etlplus/cli/options.py +49 -0
- etlplus/cli/state.py +335 -0
- etlplus/cli/types.py +33 -0
- etlplus/config/pipeline.py +11 -0
- etlplus/database/__init__.py +44 -0
- etlplus/database/ddl.py +319 -0
- etlplus/database/engine.py +151 -0
- etlplus/database/orm.py +354 -0
- etlplus/database/schema.py +274 -0
- etlplus/database/types.py +33 -0
- etlplus/run.py +2 -4
- etlplus/templates/__init__.py +5 -0
- etlplus/templates/ddl.sql.j2 +128 -0
- etlplus/templates/view.sql.j2 +69 -0
- etlplus/types.py +5 -0
- etlplus/utils.py +0 -31
- {etlplus-0.4.7.dist-info → etlplus-0.8.3.dist-info}/METADATA +66 -1
- {etlplus-0.4.7.dist-info → etlplus-0.8.3.dist-info}/RECORD +28 -14
- etlplus/cli/app.py +0 -1239
- {etlplus-0.4.7.dist-info → etlplus-0.8.3.dist-info}/WHEEL +0 -0
- {etlplus-0.4.7.dist-info → etlplus-0.8.3.dist-info}/entry_points.txt +0 -0
- {etlplus-0.4.7.dist-info → etlplus-0.8.3.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.4.7.dist-info → etlplus-0.8.3.dist-info}/top_level.txt +0 -0
etlplus/cli/handlers.py
CHANGED
|
@@ -6,124 +6,120 @@ Command handler functions for the ``etlplus`` command-line interface (CLI).
|
|
|
6
6
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
|
-
import argparse
|
|
10
|
-
import csv
|
|
11
|
-
import io
|
|
12
|
-
import json
|
|
13
9
|
import os
|
|
14
10
|
import sys
|
|
11
|
+
from collections.abc import Mapping
|
|
15
12
|
from pathlib import Path
|
|
16
13
|
from typing import Any
|
|
14
|
+
from typing import Literal
|
|
17
15
|
from typing import cast
|
|
18
16
|
|
|
19
17
|
from ..config import PipelineConfig
|
|
20
18
|
from ..config import load_pipeline_config
|
|
21
|
-
from ..
|
|
19
|
+
from ..database import load_table_spec
|
|
20
|
+
from ..database import render_tables
|
|
22
21
|
from ..extract import extract
|
|
23
22
|
from ..file import File
|
|
24
23
|
from ..load import load
|
|
25
24
|
from ..run import run
|
|
26
25
|
from ..transform import transform
|
|
27
26
|
from ..types import JSONData
|
|
28
|
-
from ..
|
|
29
|
-
from ..
|
|
27
|
+
from ..types import TemplateKey
|
|
28
|
+
from ..validate import FieldRules
|
|
30
29
|
from ..validate import validate
|
|
30
|
+
from . import io as cli_io
|
|
31
31
|
|
|
32
32
|
# SECTION: EXPORTS ========================================================== #
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
__all__ = [
|
|
36
36
|
# Functions
|
|
37
|
-
'
|
|
38
|
-
'
|
|
39
|
-
'
|
|
40
|
-
'
|
|
41
|
-
'
|
|
42
|
-
'
|
|
43
|
-
'
|
|
37
|
+
'extract_handler',
|
|
38
|
+
'check_handler',
|
|
39
|
+
'load_handler',
|
|
40
|
+
'render_handler',
|
|
41
|
+
'run_handler',
|
|
42
|
+
'transform_handler',
|
|
43
|
+
'validate_handler',
|
|
44
44
|
]
|
|
45
45
|
|
|
46
46
|
|
|
47
47
|
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
48
48
|
|
|
49
49
|
|
|
50
|
-
def
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
) -> None:
|
|
50
|
+
def _collect_table_specs(
|
|
51
|
+
config_path: str | None,
|
|
52
|
+
spec_path: str | None,
|
|
53
|
+
) -> list[dict[str, Any]]:
|
|
55
54
|
"""
|
|
56
|
-
|
|
55
|
+
Load table schemas from a pipeline config and/or standalone spec.
|
|
57
56
|
|
|
58
57
|
Parameters
|
|
59
58
|
----------
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
compact JSON string.
|
|
65
|
-
"""
|
|
66
|
-
if pretty:
|
|
67
|
-
print_json(data)
|
|
68
|
-
return
|
|
69
|
-
|
|
70
|
-
dumped = json.dumps(
|
|
71
|
-
data,
|
|
72
|
-
ensure_ascii=False,
|
|
73
|
-
separators=(',', ':'),
|
|
74
|
-
)
|
|
75
|
-
print(dumped)
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
def _infer_payload_format(
|
|
79
|
-
text: str,
|
|
80
|
-
) -> str:
|
|
81
|
-
"""
|
|
82
|
-
Infer JSON vs CSV from payload text.
|
|
83
|
-
|
|
84
|
-
Parameters
|
|
85
|
-
----------
|
|
86
|
-
text : str
|
|
87
|
-
Incoming payload as plain text.
|
|
59
|
+
config_path : str | None
|
|
60
|
+
Path to a pipeline YAML config file.
|
|
61
|
+
spec_path : str | None
|
|
62
|
+
Path to a standalone table spec file.
|
|
88
63
|
|
|
89
64
|
Returns
|
|
90
65
|
-------
|
|
91
|
-
str
|
|
92
|
-
|
|
66
|
+
list[dict[str, Any]]
|
|
67
|
+
Collected table specification mappings.
|
|
93
68
|
"""
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
69
|
+
specs: list[dict[str, Any]] = []
|
|
70
|
+
|
|
71
|
+
if spec_path:
|
|
72
|
+
specs.append(dict(load_table_spec(Path(spec_path))))
|
|
73
|
+
|
|
74
|
+
if config_path:
|
|
75
|
+
cfg = load_pipeline_config(config_path, substitute=True)
|
|
76
|
+
specs.extend(getattr(cfg, 'table_schemas', []))
|
|
77
|
+
|
|
78
|
+
return specs
|
|
98
79
|
|
|
99
80
|
|
|
100
|
-
def
|
|
81
|
+
def _check_sections(
|
|
101
82
|
cfg: PipelineConfig,
|
|
102
|
-
|
|
83
|
+
*,
|
|
84
|
+
jobs: bool,
|
|
85
|
+
pipelines: bool,
|
|
86
|
+
sources: bool,
|
|
87
|
+
targets: bool,
|
|
88
|
+
transforms: bool,
|
|
103
89
|
) -> dict[str, Any]:
|
|
104
90
|
"""
|
|
105
|
-
Build sectioned metadata output for the
|
|
91
|
+
Build sectioned metadata output for the check command.
|
|
106
92
|
|
|
107
93
|
Parameters
|
|
108
94
|
----------
|
|
109
95
|
cfg : PipelineConfig
|
|
110
96
|
The loaded pipeline configuration.
|
|
111
|
-
|
|
112
|
-
|
|
97
|
+
jobs : bool
|
|
98
|
+
Whether to include job metadata.
|
|
99
|
+
pipelines : bool
|
|
100
|
+
Whether to include pipeline metadata.
|
|
101
|
+
sources : bool
|
|
102
|
+
Whether to include source metadata.
|
|
103
|
+
targets : bool
|
|
104
|
+
Whether to include target metadata.
|
|
105
|
+
transforms : bool
|
|
106
|
+
Whether to include transform metadata.
|
|
113
107
|
|
|
114
108
|
Returns
|
|
115
109
|
-------
|
|
116
110
|
dict[str, Any]
|
|
117
|
-
Metadata output for the
|
|
111
|
+
Metadata output for the check command.
|
|
118
112
|
"""
|
|
119
113
|
sections: dict[str, Any] = {}
|
|
120
|
-
if
|
|
114
|
+
if jobs:
|
|
115
|
+
sections['jobs'] = _pipeline_summary(cfg)['jobs']
|
|
116
|
+
if pipelines:
|
|
121
117
|
sections['pipelines'] = [cfg.name]
|
|
122
|
-
if
|
|
118
|
+
if sources:
|
|
123
119
|
sections['sources'] = [src.name for src in cfg.sources]
|
|
124
|
-
if
|
|
120
|
+
if targets:
|
|
125
121
|
sections['targets'] = [tgt.name for tgt in cfg.targets]
|
|
126
|
-
if
|
|
122
|
+
if transforms:
|
|
127
123
|
sections['transforms'] = [
|
|
128
124
|
getattr(trf, 'name', None) for trf in cfg.transforms
|
|
129
125
|
]
|
|
@@ -132,106 +128,6 @@ def _list_sections(
|
|
|
132
128
|
return sections
|
|
133
129
|
|
|
134
130
|
|
|
135
|
-
def _explicit_cli_format(
|
|
136
|
-
args: argparse.Namespace,
|
|
137
|
-
) -> str | None:
|
|
138
|
-
"""Return the explicit CLI format hint when provided."""
|
|
139
|
-
|
|
140
|
-
if not getattr(args, '_format_explicit', False):
|
|
141
|
-
return None
|
|
142
|
-
for attr in ('format', 'target_format', 'source_format'):
|
|
143
|
-
value = getattr(args, attr, None)
|
|
144
|
-
if value is None:
|
|
145
|
-
continue
|
|
146
|
-
normalized = value.strip().lower()
|
|
147
|
-
if normalized:
|
|
148
|
-
return normalized
|
|
149
|
-
return None
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
def _materialize_file_payload(
|
|
153
|
-
source: object,
|
|
154
|
-
*,
|
|
155
|
-
format_hint: str | None,
|
|
156
|
-
format_explicit: bool,
|
|
157
|
-
) -> JSONData | object:
|
|
158
|
-
"""
|
|
159
|
-
Return structured payloads when ``source`` references a file.
|
|
160
|
-
|
|
161
|
-
Parameters
|
|
162
|
-
----------
|
|
163
|
-
source : object
|
|
164
|
-
Input source of data, possibly a file path.
|
|
165
|
-
format_hint : str | None
|
|
166
|
-
Explicit format hint: 'json', 'csv', or None to infer.
|
|
167
|
-
format_explicit : bool
|
|
168
|
-
Whether an explicit format hint was provided.
|
|
169
|
-
|
|
170
|
-
Returns
|
|
171
|
-
-------
|
|
172
|
-
JSONData | object
|
|
173
|
-
Parsed JSON data when ``source`` is a file; otherwise the original
|
|
174
|
-
``source`` object.
|
|
175
|
-
"""
|
|
176
|
-
if isinstance(source, (dict, list)):
|
|
177
|
-
return cast(JSONData, source)
|
|
178
|
-
if not isinstance(source, (str, os.PathLike)):
|
|
179
|
-
return source
|
|
180
|
-
|
|
181
|
-
path = Path(source)
|
|
182
|
-
|
|
183
|
-
normalized_hint = (format_hint or '').strip().lower()
|
|
184
|
-
fmt: FileFormat | None = None
|
|
185
|
-
|
|
186
|
-
if format_explicit and normalized_hint:
|
|
187
|
-
try:
|
|
188
|
-
fmt = FileFormat(normalized_hint)
|
|
189
|
-
except ValueError:
|
|
190
|
-
fmt = None
|
|
191
|
-
elif not format_explicit:
|
|
192
|
-
suffix = path.suffix.lower().lstrip('.')
|
|
193
|
-
if suffix:
|
|
194
|
-
try:
|
|
195
|
-
fmt = FileFormat(suffix)
|
|
196
|
-
except ValueError:
|
|
197
|
-
fmt = None
|
|
198
|
-
|
|
199
|
-
if fmt is None:
|
|
200
|
-
return source
|
|
201
|
-
if fmt == FileFormat.CSV:
|
|
202
|
-
return _read_csv_rows(path)
|
|
203
|
-
return File(path, fmt).read()
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
def _parse_text_payload(
|
|
207
|
-
text: str,
|
|
208
|
-
fmt: str | None,
|
|
209
|
-
) -> JSONData | str:
|
|
210
|
-
"""
|
|
211
|
-
Parse JSON/CSV text into a Python payload.
|
|
212
|
-
|
|
213
|
-
Parameters
|
|
214
|
-
----------
|
|
215
|
-
text : str
|
|
216
|
-
The input text payload.
|
|
217
|
-
fmt : str | None
|
|
218
|
-
Explicit format hint: 'json', 'csv', or None to infer.
|
|
219
|
-
|
|
220
|
-
Returns
|
|
221
|
-
-------
|
|
222
|
-
JSONData | str
|
|
223
|
-
The parsed payload as JSON data or raw text.
|
|
224
|
-
"""
|
|
225
|
-
|
|
226
|
-
effective = (fmt or '').strip().lower() or _infer_payload_format(text)
|
|
227
|
-
if effective == 'json':
|
|
228
|
-
return cast(JSONData, json_type(text))
|
|
229
|
-
if effective == 'csv':
|
|
230
|
-
reader = csv.DictReader(io.StringIO(text))
|
|
231
|
-
return [dict(row) for row in reader]
|
|
232
|
-
return text
|
|
233
|
-
|
|
234
|
-
|
|
235
131
|
def _pipeline_summary(
|
|
236
132
|
cfg: PipelineConfig,
|
|
237
133
|
) -> dict[str, Any]:
|
|
@@ -260,406 +156,502 @@ def _pipeline_summary(
|
|
|
260
156
|
}
|
|
261
157
|
|
|
262
158
|
|
|
263
|
-
|
|
264
|
-
args: argparse.Namespace,
|
|
265
|
-
) -> tuple[bool, bool]:
|
|
266
|
-
"""Return presentation toggles from the parsed namespace.
|
|
267
|
-
|
|
268
|
-
Parameters
|
|
269
|
-
----------
|
|
270
|
-
args : argparse.Namespace
|
|
271
|
-
Namespace produced by the CLI parser.
|
|
272
|
-
|
|
273
|
-
Returns
|
|
274
|
-
-------
|
|
275
|
-
tuple[bool, bool]
|
|
276
|
-
Pair of ``(pretty, quiet)`` flags with safe defaults.
|
|
277
|
-
"""
|
|
278
|
-
return getattr(args, 'pretty', True), getattr(args, 'quiet', False)
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
def _read_csv_rows(
|
|
282
|
-
path: Path,
|
|
283
|
-
) -> list[dict[str, str]]:
|
|
284
|
-
"""
|
|
285
|
-
Read CSV rows into dictionaries.
|
|
286
|
-
|
|
287
|
-
Parameters
|
|
288
|
-
----------
|
|
289
|
-
path : Path
|
|
290
|
-
Path to a CSV file.
|
|
291
|
-
|
|
292
|
-
Returns
|
|
293
|
-
-------
|
|
294
|
-
list[dict[str, str]]
|
|
295
|
-
List of dictionaries, each representing a row in the CSV file.
|
|
296
|
-
"""
|
|
297
|
-
with path.open(newline='', encoding='utf-8') as handle:
|
|
298
|
-
reader = csv.DictReader(handle)
|
|
299
|
-
return [dict(row) for row in reader]
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
def _read_stdin_text() -> str:
|
|
303
|
-
"""
|
|
304
|
-
Return every character from ``stdin`` as a single string.
|
|
305
|
-
|
|
306
|
-
Returns
|
|
307
|
-
-------
|
|
308
|
-
str
|
|
309
|
-
Entire ``stdin`` contents.
|
|
310
|
-
"""
|
|
311
|
-
return sys.stdin.read()
|
|
159
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
312
160
|
|
|
313
161
|
|
|
314
|
-
def
|
|
315
|
-
source: object,
|
|
162
|
+
def check_handler(
|
|
316
163
|
*,
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
164
|
+
config: str,
|
|
165
|
+
jobs: bool = False,
|
|
166
|
+
pipelines: bool = False,
|
|
167
|
+
sources: bool = False,
|
|
168
|
+
summary: bool = False,
|
|
169
|
+
targets: bool = False,
|
|
170
|
+
transforms: bool = False,
|
|
171
|
+
substitute: bool = True,
|
|
172
|
+
pretty: bool = True,
|
|
173
|
+
) -> int:
|
|
321
174
|
"""
|
|
322
|
-
|
|
175
|
+
Print requested pipeline sections from a YAML configuration.
|
|
323
176
|
|
|
324
177
|
Parameters
|
|
325
178
|
----------
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
179
|
+
config : str
|
|
180
|
+
Path to the pipeline YAML configuration.
|
|
181
|
+
jobs : bool, optional
|
|
182
|
+
Whether to include job metadata. Default is ``False``.
|
|
183
|
+
pipelines : bool, optional
|
|
184
|
+
Whether to include pipeline metadata. Default is ``False``.
|
|
185
|
+
sources : bool, optional
|
|
186
|
+
Whether to include source metadata. Default is ``False``.
|
|
187
|
+
summary : bool, optional
|
|
188
|
+
Whether to print a full summary of the pipeline. Default is ``False``.
|
|
189
|
+
targets : bool, optional
|
|
190
|
+
Whether to include target metadata. Default is ``False``.
|
|
191
|
+
transforms : bool, optional
|
|
192
|
+
Whether to include transform metadata. Default is ``False``.
|
|
193
|
+
substitute : bool, optional
|
|
194
|
+
Whether to perform environment variable substitution. Default is
|
|
195
|
+
``True``.
|
|
196
|
+
pretty : bool, optional
|
|
197
|
+
Whether to pretty-print output. Default is ``True``.
|
|
336
198
|
|
|
337
199
|
Returns
|
|
338
200
|
-------
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
disabled.
|
|
342
|
-
"""
|
|
343
|
-
|
|
344
|
-
if isinstance(source, (os.PathLike, str)) and str(source) == '-':
|
|
345
|
-
text = _read_stdin_text()
|
|
346
|
-
return _parse_text_payload(text, format_hint)
|
|
201
|
+
int
|
|
202
|
+
Zero on success.
|
|
347
203
|
|
|
348
|
-
|
|
349
|
-
|
|
204
|
+
"""
|
|
205
|
+
cfg = load_pipeline_config(config, substitute=substitute)
|
|
206
|
+
if summary:
|
|
207
|
+
cli_io.emit_json(_pipeline_summary(cfg), pretty=True)
|
|
208
|
+
return 0
|
|
350
209
|
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
210
|
+
cli_io.emit_json(
|
|
211
|
+
_check_sections(
|
|
212
|
+
cfg,
|
|
213
|
+
jobs=jobs,
|
|
214
|
+
pipelines=pipelines,
|
|
215
|
+
sources=sources,
|
|
216
|
+
targets=targets,
|
|
217
|
+
transforms=transforms,
|
|
218
|
+
),
|
|
219
|
+
pretty=pretty,
|
|
355
220
|
)
|
|
221
|
+
return 0
|
|
356
222
|
|
|
357
223
|
|
|
358
|
-
def
|
|
359
|
-
data: Any,
|
|
360
|
-
output_path: str | None,
|
|
224
|
+
def extract_handler(
|
|
361
225
|
*,
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
data : Any
|
|
370
|
-
Data to write.
|
|
371
|
-
output_path : str | None
|
|
372
|
-
Path to write the output to. None to print to stdout.
|
|
373
|
-
success_message : str
|
|
374
|
-
Message to print upon successful write.
|
|
375
|
-
|
|
376
|
-
Returns
|
|
377
|
-
-------
|
|
378
|
-
bool
|
|
379
|
-
True if output was written to a file, False if printed to stdout.
|
|
380
|
-
"""
|
|
381
|
-
if not output_path or output_path == '-':
|
|
382
|
-
return False
|
|
383
|
-
File(Path(output_path), FileFormat.JSON).write_json(data)
|
|
384
|
-
print(f'{success_message} {output_path}')
|
|
385
|
-
return True
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
# SECTION: FUNCTIONS ======================================================== #
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
def cmd_extract(
|
|
392
|
-
args: argparse.Namespace,
|
|
226
|
+
source_type: str,
|
|
227
|
+
source: str,
|
|
228
|
+
format_hint: str | None = None,
|
|
229
|
+
format_explicit: bool = False,
|
|
230
|
+
target: str | None = None,
|
|
231
|
+
output: str | None = None,
|
|
232
|
+
pretty: bool = True,
|
|
393
233
|
) -> int:
|
|
394
234
|
"""
|
|
395
235
|
Extract data from a source.
|
|
396
236
|
|
|
397
237
|
Parameters
|
|
398
238
|
----------
|
|
399
|
-
|
|
400
|
-
|
|
239
|
+
source_type : str
|
|
240
|
+
The type of the source (e.g., 'file', 'api', 'database').
|
|
241
|
+
source : str
|
|
242
|
+
The source identifier (e.g., path, URL, DSN).
|
|
243
|
+
format_hint : str | None, optional
|
|
244
|
+
An optional format hint (e.g., 'json', 'csv'). Default is ``None``.
|
|
245
|
+
format_explicit : bool, optional
|
|
246
|
+
Whether the format hint was explicitly provided. Default is ``False``.
|
|
247
|
+
target : str | None, optional
|
|
248
|
+
The target destination (e.g., path, database). Default is ``None``.
|
|
249
|
+
output : str | None, optional
|
|
250
|
+
Path to write output data. Default is ``None``.
|
|
251
|
+
pretty : bool, optional
|
|
252
|
+
Whether to pretty-print output. Default is ``True``.
|
|
401
253
|
|
|
402
254
|
Returns
|
|
403
255
|
-------
|
|
404
256
|
int
|
|
405
257
|
Zero on success.
|
|
258
|
+
|
|
406
259
|
"""
|
|
407
|
-
|
|
408
|
-
explicit_format = _explicit_cli_format(args)
|
|
260
|
+
explicit_format = format_hint if format_explicit else None
|
|
409
261
|
|
|
410
|
-
if
|
|
411
|
-
text =
|
|
412
|
-
payload =
|
|
413
|
-
|
|
262
|
+
if source == '-':
|
|
263
|
+
text = cli_io.read_stdin_text()
|
|
264
|
+
payload = cli_io.parse_text_payload(
|
|
265
|
+
text,
|
|
266
|
+
format_hint,
|
|
267
|
+
)
|
|
268
|
+
cli_io.emit_json(payload, pretty=pretty)
|
|
414
269
|
|
|
415
270
|
return 0
|
|
416
271
|
|
|
417
272
|
result = extract(
|
|
418
|
-
|
|
419
|
-
|
|
273
|
+
source_type,
|
|
274
|
+
source,
|
|
420
275
|
file_format=explicit_format,
|
|
421
276
|
)
|
|
422
|
-
output_path =
|
|
423
|
-
if output_path is None:
|
|
424
|
-
output_path = getattr(args, 'output', None)
|
|
277
|
+
output_path = target or output
|
|
425
278
|
|
|
426
|
-
|
|
279
|
+
cli_io.emit_or_write(
|
|
427
280
|
result,
|
|
428
281
|
output_path,
|
|
282
|
+
pretty=pretty,
|
|
429
283
|
success_message='Data extracted and saved to',
|
|
430
|
-
)
|
|
431
|
-
_emit_json(result, pretty=pretty)
|
|
284
|
+
)
|
|
432
285
|
|
|
433
286
|
return 0
|
|
434
287
|
|
|
435
288
|
|
|
436
|
-
def
|
|
437
|
-
|
|
289
|
+
def load_handler(
|
|
290
|
+
*,
|
|
291
|
+
source: str,
|
|
292
|
+
target_type: str,
|
|
293
|
+
target: str,
|
|
294
|
+
source_format: str | None = None,
|
|
295
|
+
target_format: str | None = None,
|
|
296
|
+
format_explicit: bool = False,
|
|
297
|
+
output: str | None = None,
|
|
298
|
+
pretty: bool = True,
|
|
438
299
|
) -> int:
|
|
439
300
|
"""
|
|
440
|
-
|
|
301
|
+
Load data into a target.
|
|
441
302
|
|
|
442
303
|
Parameters
|
|
443
304
|
----------
|
|
444
|
-
|
|
445
|
-
|
|
305
|
+
source : str
|
|
306
|
+
The source payload (e.g., path, inline data).
|
|
307
|
+
target_type : str
|
|
308
|
+
The type of the target (e.g., 'file', 'database').
|
|
309
|
+
target : str
|
|
310
|
+
The target destination (e.g., path, DSN).
|
|
311
|
+
source_format : str | None, optional
|
|
312
|
+
An optional source format hint (e.g., 'json', 'csv'). Default is
|
|
313
|
+
``None``.
|
|
314
|
+
target_format : str | None, optional
|
|
315
|
+
An optional target format hint (e.g., 'json', 'csv'). Default is
|
|
316
|
+
``None``.
|
|
317
|
+
format_explicit : bool, optional
|
|
318
|
+
Whether the format hint was explicitly provided. Default is ``False``.
|
|
319
|
+
output : str | None, optional
|
|
320
|
+
Path to write output data. Default is ``None``.
|
|
321
|
+
pretty : bool, optional
|
|
322
|
+
Whether to pretty-print output. Default is ``True``.
|
|
446
323
|
|
|
447
324
|
Returns
|
|
448
325
|
-------
|
|
449
326
|
int
|
|
450
327
|
Zero on success.
|
|
451
328
|
"""
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
format_hint=
|
|
460
|
-
format_explicit=
|
|
329
|
+
explicit_format = target_format if format_explicit else None
|
|
330
|
+
|
|
331
|
+
# Allow piping into load.
|
|
332
|
+
source_value = cast(
|
|
333
|
+
str | Path | os.PathLike[str] | dict[str, Any] | list[dict[str, Any]],
|
|
334
|
+
cli_io.resolve_cli_payload(
|
|
335
|
+
source,
|
|
336
|
+
format_hint=source_format,
|
|
337
|
+
format_explicit=source_format is not None,
|
|
338
|
+
hydrate_files=False,
|
|
461
339
|
),
|
|
462
340
|
)
|
|
463
|
-
result = validate(payload, args.rules)
|
|
464
341
|
|
|
465
|
-
|
|
466
|
-
if
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
342
|
+
# Allow piping out of load for file targets.
|
|
343
|
+
if target_type == 'file' and target == '-':
|
|
344
|
+
payload = cli_io.materialize_file_payload(
|
|
345
|
+
source_value,
|
|
346
|
+
format_hint=source_format,
|
|
347
|
+
format_explicit=source_format is not None,
|
|
348
|
+
)
|
|
349
|
+
cli_io.emit_json(payload, pretty=pretty)
|
|
350
|
+
return 0
|
|
351
|
+
|
|
352
|
+
result = load(
|
|
353
|
+
source_value,
|
|
354
|
+
target_type,
|
|
355
|
+
target,
|
|
356
|
+
file_format=explicit_format,
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
output_path = output
|
|
360
|
+
cli_io.emit_or_write(
|
|
361
|
+
result,
|
|
362
|
+
output_path,
|
|
363
|
+
pretty=pretty,
|
|
364
|
+
success_message='Load result saved to',
|
|
365
|
+
)
|
|
481
366
|
|
|
482
367
|
return 0
|
|
483
368
|
|
|
484
369
|
|
|
485
|
-
def
|
|
486
|
-
|
|
370
|
+
def render_handler(
|
|
371
|
+
*,
|
|
372
|
+
config: str | None = None,
|
|
373
|
+
spec: str | None = None,
|
|
374
|
+
table: str | None = None,
|
|
375
|
+
template: TemplateKey | None = None,
|
|
376
|
+
template_path: str | None = None,
|
|
377
|
+
output: str | None = None,
|
|
378
|
+
pretty: bool = True,
|
|
379
|
+
quiet: bool = False,
|
|
487
380
|
) -> int:
|
|
488
381
|
"""
|
|
489
|
-
|
|
382
|
+
Render SQL DDL statements from table schema specs.
|
|
490
383
|
|
|
491
384
|
Parameters
|
|
492
385
|
----------
|
|
493
|
-
|
|
494
|
-
|
|
386
|
+
config : str | None, optional
|
|
387
|
+
Path to a pipeline YAML configuration. Default is ``None``.
|
|
388
|
+
spec : str | None, optional
|
|
389
|
+
Path to a standalone table spec file. Default is ``None``.
|
|
390
|
+
table : str | None, optional
|
|
391
|
+
Table name filter. Default is ``None``.
|
|
392
|
+
template : TemplateKey | None, optional
|
|
393
|
+
The template key to use for rendering. Default is ``None``.
|
|
394
|
+
template_path : str | None, optional
|
|
395
|
+
Path to a custom template file. Default is ``None``.
|
|
396
|
+
output : str | None, optional
|
|
397
|
+
Path to write output SQL. Default is ``None``.
|
|
398
|
+
pretty : bool, optional
|
|
399
|
+
Whether to pretty-print output. Default is ``True``.
|
|
400
|
+
quiet : bool, optional
|
|
401
|
+
Whether to suppress non-error output. Default is ``False``.
|
|
495
402
|
|
|
496
403
|
Returns
|
|
497
404
|
-------
|
|
498
405
|
int
|
|
499
406
|
Zero on success.
|
|
500
407
|
"""
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
408
|
+
template_value: TemplateKey = template or 'ddl'
|
|
409
|
+
template_path_override = template_path
|
|
410
|
+
table_filter = table
|
|
411
|
+
spec_path = spec
|
|
412
|
+
config_path = config
|
|
413
|
+
|
|
414
|
+
# If the provided template points to a file, treat it as a path override.
|
|
415
|
+
file_override = template_path_override
|
|
416
|
+
template_key: TemplateKey | None = template_value
|
|
417
|
+
if template_path_override is None:
|
|
418
|
+
candidate_path = Path(template_value)
|
|
419
|
+
if candidate_path.exists():
|
|
420
|
+
file_override = str(candidate_path)
|
|
421
|
+
template_key = None
|
|
422
|
+
|
|
423
|
+
specs = _collect_table_specs(config_path, spec_path)
|
|
424
|
+
if table_filter:
|
|
425
|
+
specs = [
|
|
426
|
+
spec
|
|
427
|
+
for spec in specs
|
|
428
|
+
if str(spec.get('table')) == table_filter
|
|
429
|
+
or str(spec.get('name', '')) == table_filter
|
|
430
|
+
]
|
|
504
431
|
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
432
|
+
if not specs:
|
|
433
|
+
target_desc = table_filter or 'table_schemas'
|
|
434
|
+
print(
|
|
435
|
+
'No table schemas found for '
|
|
436
|
+
f'{target_desc}. Provide --spec or a pipeline --config with '
|
|
437
|
+
'table_schemas.',
|
|
438
|
+
file=sys.stderr,
|
|
439
|
+
)
|
|
440
|
+
return 1
|
|
513
441
|
|
|
514
|
-
|
|
442
|
+
rendered_chunks = render_tables(
|
|
443
|
+
specs,
|
|
444
|
+
template=template_key,
|
|
445
|
+
template_path=file_override,
|
|
446
|
+
)
|
|
447
|
+
sql_text = (
|
|
448
|
+
'\n'.join(chunk.rstrip() for chunk in rendered_chunks).rstrip() + '\n'
|
|
449
|
+
)
|
|
450
|
+
rendered_output = sql_text if pretty else sql_text.rstrip('\n')
|
|
515
451
|
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
452
|
+
output_path = output
|
|
453
|
+
if output_path and output_path != '-':
|
|
454
|
+
Path(output_path).write_text(rendered_output, encoding='utf-8')
|
|
455
|
+
if not quiet:
|
|
456
|
+
print(f'Rendered {len(specs)} schema(s) to {output_path}')
|
|
457
|
+
return 0
|
|
522
458
|
|
|
459
|
+
print(rendered_output)
|
|
523
460
|
return 0
|
|
524
461
|
|
|
525
462
|
|
|
526
|
-
def
|
|
527
|
-
|
|
463
|
+
def run_handler(
|
|
464
|
+
*,
|
|
465
|
+
config: str,
|
|
466
|
+
job: str | None = None,
|
|
467
|
+
pipeline: str | None = None,
|
|
468
|
+
pretty: bool = True,
|
|
528
469
|
) -> int:
|
|
529
470
|
"""
|
|
530
|
-
|
|
471
|
+
Execute an ETL job end-to-end from a pipeline YAML configuration.
|
|
531
472
|
|
|
532
473
|
Parameters
|
|
533
474
|
----------
|
|
534
|
-
|
|
535
|
-
|
|
475
|
+
config : str
|
|
476
|
+
Path to the pipeline YAML configuration.
|
|
477
|
+
job : str | None, optional
|
|
478
|
+
Name of the job to run. If not provided, runs the entire pipeline.
|
|
479
|
+
Default is ``None``.
|
|
480
|
+
pipeline : str | None, optional
|
|
481
|
+
Alias for ``job``. Default is ``None``.
|
|
482
|
+
pretty : bool, optional
|
|
483
|
+
Whether to pretty-print output. Default is ``True``.
|
|
536
484
|
|
|
537
485
|
Returns
|
|
538
486
|
-------
|
|
539
487
|
int
|
|
540
488
|
Zero on success.
|
|
541
489
|
"""
|
|
542
|
-
|
|
543
|
-
explicit_format = _explicit_cli_format(args)
|
|
544
|
-
|
|
545
|
-
# Allow piping into load.
|
|
546
|
-
source_format = getattr(args, 'source_format', None)
|
|
547
|
-
source_value = cast(
|
|
548
|
-
str | Path | os.PathLike[str] | dict[str, Any] | list[dict[str, Any]],
|
|
549
|
-
_resolve_cli_payload(
|
|
550
|
-
args.source,
|
|
551
|
-
format_hint=source_format,
|
|
552
|
-
format_explicit=source_format is not None,
|
|
553
|
-
hydrate_files=False,
|
|
554
|
-
),
|
|
555
|
-
)
|
|
490
|
+
cfg = load_pipeline_config(config, substitute=True)
|
|
556
491
|
|
|
557
|
-
|
|
558
|
-
if
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
format_hint=source_format,
|
|
562
|
-
format_explicit=source_format is not None,
|
|
563
|
-
)
|
|
564
|
-
_emit_json(payload, pretty=pretty)
|
|
492
|
+
job_name = job or pipeline
|
|
493
|
+
if job_name:
|
|
494
|
+
result = run(job=job_name, config_path=config)
|
|
495
|
+
cli_io.emit_json({'status': 'ok', 'result': result}, pretty=pretty)
|
|
565
496
|
return 0
|
|
566
497
|
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
args.target_type,
|
|
570
|
-
args.target,
|
|
571
|
-
file_format=explicit_format,
|
|
572
|
-
)
|
|
498
|
+
cli_io.emit_json(_pipeline_summary(cfg), pretty=pretty)
|
|
499
|
+
return 0
|
|
573
500
|
|
|
574
|
-
output_path = getattr(args, 'output', None)
|
|
575
|
-
if not _write_json_output(
|
|
576
|
-
result,
|
|
577
|
-
output_path,
|
|
578
|
-
success_message='Load result saved to',
|
|
579
|
-
):
|
|
580
|
-
_emit_json(result, pretty=pretty)
|
|
581
501
|
|
|
582
|
-
|
|
502
|
+
TransformOperations = Mapping[
|
|
503
|
+
Literal['filter', 'map', 'select', 'sort', 'aggregate'],
|
|
504
|
+
Any,
|
|
505
|
+
]
|
|
583
506
|
|
|
584
507
|
|
|
585
|
-
def
|
|
586
|
-
|
|
508
|
+
def transform_handler(
|
|
509
|
+
*,
|
|
510
|
+
source: str,
|
|
511
|
+
operations: JSONData | str,
|
|
512
|
+
target: str | None = None,
|
|
513
|
+
source_format: str | None = None,
|
|
514
|
+
target_format: str | None = None,
|
|
515
|
+
pretty: bool = True,
|
|
516
|
+
format_explicit: bool = False,
|
|
587
517
|
) -> int:
|
|
588
518
|
"""
|
|
589
|
-
|
|
519
|
+
Transform data from a source.
|
|
590
520
|
|
|
591
521
|
Parameters
|
|
592
522
|
----------
|
|
593
|
-
|
|
594
|
-
|
|
523
|
+
source : str
|
|
524
|
+
The source payload (e.g., path, inline data).
|
|
525
|
+
operations : JSONData | str
|
|
526
|
+
The transformation operations (inline JSON or path).
|
|
527
|
+
target : str | None, optional
|
|
528
|
+
The target destination (e.g., path). Default is ``None``.
|
|
529
|
+
source_format : str | None, optional
|
|
530
|
+
An optional source format hint (e.g., 'json', 'csv'). Default is
|
|
531
|
+
``None``.
|
|
532
|
+
target_format : str | None, optional
|
|
533
|
+
An optional target format hint (e.g., 'json', 'csv'). Default is
|
|
534
|
+
``None``.
|
|
535
|
+
pretty : bool, optional
|
|
536
|
+
Whether to pretty-print output. Default is ``True``.
|
|
537
|
+
format_explicit : bool, optional
|
|
538
|
+
Whether the format hint was explicitly provided. Default is ``False``.
|
|
595
539
|
|
|
596
540
|
Returns
|
|
597
541
|
-------
|
|
598
542
|
int
|
|
599
543
|
Zero on success.
|
|
544
|
+
|
|
545
|
+
Raises
|
|
546
|
+
------
|
|
547
|
+
ValueError
|
|
548
|
+
If the operations payload is not a mapping.
|
|
600
549
|
"""
|
|
601
|
-
|
|
550
|
+
format_hint: str | None = source_format
|
|
551
|
+
format_explicit = format_hint is not None or format_explicit
|
|
552
|
+
|
|
553
|
+
payload = cast(
|
|
554
|
+
JSONData | str,
|
|
555
|
+
cli_io.resolve_cli_payload(
|
|
556
|
+
source,
|
|
557
|
+
format_hint=format_hint,
|
|
558
|
+
format_explicit=format_explicit,
|
|
559
|
+
),
|
|
560
|
+
)
|
|
602
561
|
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
or getattr(args, 'pipeline', None)
|
|
562
|
+
operations_payload = cli_io.resolve_cli_payload(
|
|
563
|
+
operations,
|
|
564
|
+
format_hint=None,
|
|
565
|
+
format_explicit=format_explicit,
|
|
608
566
|
)
|
|
567
|
+
if not isinstance(operations_payload, dict):
|
|
568
|
+
raise ValueError('operations must resolve to a mapping of transforms')
|
|
609
569
|
|
|
610
|
-
|
|
611
|
-
print_json({'jobs': _pipeline_summary(cfg)['jobs']})
|
|
612
|
-
return 0
|
|
570
|
+
data = transform(payload, cast(TransformOperations, operations_payload))
|
|
613
571
|
|
|
614
|
-
if
|
|
615
|
-
|
|
616
|
-
|
|
572
|
+
if target and target != '-':
|
|
573
|
+
File.write_file(target, data, file_format=target_format)
|
|
574
|
+
print(f'Data transformed and saved to {target}')
|
|
617
575
|
return 0
|
|
618
576
|
|
|
619
|
-
|
|
577
|
+
cli_io.emit_json(data, pretty=pretty)
|
|
620
578
|
return 0
|
|
621
579
|
|
|
622
580
|
|
|
623
|
-
def
|
|
581
|
+
def validate_handler(
|
|
582
|
+
*,
|
|
583
|
+
source: str,
|
|
584
|
+
rules: JSONData | str,
|
|
585
|
+
source_format: str | None = None,
|
|
586
|
+
target: str | None = None,
|
|
587
|
+
format_explicit: bool = False,
|
|
588
|
+
pretty: bool = True,
|
|
589
|
+
) -> int:
|
|
624
590
|
"""
|
|
625
|
-
|
|
591
|
+
Validate data from a source.
|
|
626
592
|
|
|
627
593
|
Parameters
|
|
628
594
|
----------
|
|
629
|
-
|
|
630
|
-
|
|
595
|
+
source : str
|
|
596
|
+
The source payload (e.g., path, inline data).
|
|
597
|
+
rules : JSONData | str
|
|
598
|
+
The validation rules (inline JSON or path).
|
|
599
|
+
source_format : str | None, optional
|
|
600
|
+
An optional source format hint (e.g., 'json', 'csv'). Default is
|
|
601
|
+
``None``.
|
|
602
|
+
target : str | None, optional
|
|
603
|
+
The target destination (e.g., path). Default is ``None``.
|
|
604
|
+
format_explicit : bool, optional
|
|
605
|
+
Whether the format hint was explicitly provided. Default is ``False``.
|
|
606
|
+
pretty : bool, optional
|
|
607
|
+
Whether to pretty-print output. Default is ``True``.
|
|
631
608
|
|
|
632
609
|
Returns
|
|
633
610
|
-------
|
|
634
611
|
int
|
|
635
612
|
Zero on success.
|
|
636
|
-
"""
|
|
637
|
-
cfg = load_pipeline_config(args.config, substitute=True)
|
|
638
|
-
print_json(_list_sections(cfg, args))
|
|
639
|
-
return 0
|
|
640
|
-
|
|
641
613
|
|
|
642
|
-
|
|
614
|
+
Raises
|
|
615
|
+
------
|
|
616
|
+
ValueError
|
|
617
|
+
If the rules payload is not a mapping.
|
|
643
618
|
"""
|
|
644
|
-
|
|
619
|
+
format_hint: str | None = source_format
|
|
620
|
+
payload = cast(
|
|
621
|
+
JSONData | str,
|
|
622
|
+
cli_io.resolve_cli_payload(
|
|
623
|
+
source,
|
|
624
|
+
format_hint=format_hint,
|
|
625
|
+
format_explicit=format_explicit,
|
|
626
|
+
),
|
|
627
|
+
)
|
|
645
628
|
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
629
|
+
rules_payload = cli_io.resolve_cli_payload(
|
|
630
|
+
rules,
|
|
631
|
+
format_hint=None,
|
|
632
|
+
format_explicit=format_explicit,
|
|
633
|
+
)
|
|
634
|
+
if not isinstance(rules_payload, dict):
|
|
635
|
+
raise ValueError('rules must resolve to a mapping of field rules')
|
|
650
636
|
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
int
|
|
654
|
-
Zero on success.
|
|
655
|
-
"""
|
|
656
|
-
cfg = load_pipeline_config(args.config, substitute=True)
|
|
637
|
+
field_rules = cast(Mapping[str, FieldRules], rules_payload)
|
|
638
|
+
result = validate(payload, field_rules)
|
|
657
639
|
|
|
658
|
-
|
|
659
|
-
if
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
640
|
+
target_path = target
|
|
641
|
+
if target_path:
|
|
642
|
+
validated_data = result.get('data')
|
|
643
|
+
if validated_data is not None:
|
|
644
|
+
cli_io.write_json_output(
|
|
645
|
+
validated_data,
|
|
646
|
+
target_path,
|
|
647
|
+
success_message='Validation result saved to',
|
|
648
|
+
)
|
|
649
|
+
else:
|
|
650
|
+
print(
|
|
651
|
+
f'Validation failed, no data to save for {target_path}',
|
|
652
|
+
file=sys.stderr,
|
|
653
|
+
)
|
|
654
|
+
else:
|
|
655
|
+
cli_io.emit_json(result, pretty=pretty)
|
|
663
656
|
|
|
664
|
-
print_json(_pipeline_summary(cfg))
|
|
665
657
|
return 0
|