etlplus 0.5.2__py3-none-any.whl → 0.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/api/README.md +24 -26
- etlplus/cli/commands.py +924 -0
- etlplus/cli/constants.py +71 -0
- etlplus/cli/handlers.py +369 -484
- etlplus/cli/io.py +336 -0
- etlplus/cli/main.py +16 -418
- etlplus/cli/options.py +49 -0
- etlplus/cli/state.py +336 -0
- etlplus/cli/types.py +33 -0
- etlplus/database/__init__.py +44 -0
- etlplus/database/ddl.py +319 -0
- etlplus/database/engine.py +151 -0
- etlplus/database/orm.py +354 -0
- etlplus/database/schema.py +274 -0
- etlplus/database/types.py +33 -0
- etlplus/enums.py +51 -1
- etlplus/load.py +1 -1
- etlplus/run.py +2 -4
- etlplus/types.py +5 -0
- etlplus/utils.py +1 -32
- {etlplus-0.5.2.dist-info → etlplus-0.9.1.dist-info}/METADATA +84 -40
- {etlplus-0.5.2.dist-info → etlplus-0.9.1.dist-info}/RECORD +26 -16
- etlplus/cli/app.py +0 -1367
- etlplus/ddl.py +0 -197
- {etlplus-0.5.2.dist-info → etlplus-0.9.1.dist-info}/WHEEL +0 -0
- {etlplus-0.5.2.dist-info → etlplus-0.9.1.dist-info}/entry_points.txt +0 -0
- {etlplus-0.5.2.dist-info → etlplus-0.9.1.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.5.2.dist-info → etlplus-0.9.1.dist-info}/top_level.txt +0 -0
etlplus/cli/handlers.py
CHANGED
|
@@ -6,44 +6,41 @@ Command handler functions for the ``etlplus`` command-line interface (CLI).
|
|
|
6
6
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
|
-
import argparse
|
|
10
|
-
import csv
|
|
11
|
-
import io
|
|
12
|
-
import json
|
|
13
9
|
import os
|
|
14
10
|
import sys
|
|
11
|
+
from collections.abc import Mapping
|
|
15
12
|
from pathlib import Path
|
|
16
13
|
from typing import Any
|
|
14
|
+
from typing import Literal
|
|
17
15
|
from typing import cast
|
|
18
16
|
|
|
19
17
|
from ..config import PipelineConfig
|
|
20
18
|
from ..config import load_pipeline_config
|
|
21
|
-
from ..
|
|
22
|
-
from ..
|
|
23
|
-
from ..enums import FileFormat
|
|
19
|
+
from ..database import load_table_spec
|
|
20
|
+
from ..database import render_tables
|
|
24
21
|
from ..extract import extract
|
|
25
22
|
from ..file import File
|
|
26
23
|
from ..load import load
|
|
27
24
|
from ..run import run
|
|
28
25
|
from ..transform import transform
|
|
29
26
|
from ..types import JSONData
|
|
30
|
-
from ..
|
|
31
|
-
from ..
|
|
27
|
+
from ..types import TemplateKey
|
|
28
|
+
from ..validate import FieldRules
|
|
32
29
|
from ..validate import validate
|
|
30
|
+
from . import io as cli_io
|
|
33
31
|
|
|
34
32
|
# SECTION: EXPORTS ========================================================== #
|
|
35
33
|
|
|
36
34
|
|
|
37
35
|
__all__ = [
|
|
38
36
|
# Functions
|
|
39
|
-
'
|
|
40
|
-
'
|
|
41
|
-
'
|
|
42
|
-
'
|
|
43
|
-
'
|
|
44
|
-
'
|
|
45
|
-
'
|
|
46
|
-
'cmd_validate',
|
|
37
|
+
'extract_handler',
|
|
38
|
+
'check_handler',
|
|
39
|
+
'load_handler',
|
|
40
|
+
'render_handler',
|
|
41
|
+
'run_handler',
|
|
42
|
+
'transform_handler',
|
|
43
|
+
'validate_handler',
|
|
47
44
|
]
|
|
48
45
|
|
|
49
46
|
|
|
@@ -72,7 +69,7 @@ def _collect_table_specs(
|
|
|
72
69
|
specs: list[dict[str, Any]] = []
|
|
73
70
|
|
|
74
71
|
if spec_path:
|
|
75
|
-
specs.append(load_table_spec(Path(spec_path)))
|
|
72
|
+
specs.append(dict(load_table_spec(Path(spec_path))))
|
|
76
73
|
|
|
77
74
|
if config_path:
|
|
78
75
|
cfg = load_pipeline_config(config_path, substitute=True)
|
|
@@ -81,102 +78,48 @@ def _collect_table_specs(
|
|
|
81
78
|
return specs
|
|
82
79
|
|
|
83
80
|
|
|
84
|
-
def
|
|
85
|
-
data: Any,
|
|
86
|
-
*,
|
|
87
|
-
pretty: bool,
|
|
88
|
-
) -> None:
|
|
89
|
-
"""
|
|
90
|
-
Emit JSON to stdout honoring the pretty/compact preference.
|
|
91
|
-
|
|
92
|
-
Parameters
|
|
93
|
-
----------
|
|
94
|
-
data : Any
|
|
95
|
-
Arbitrary JSON-serializable payload.
|
|
96
|
-
pretty : bool
|
|
97
|
-
When ``True`` pretty-print via :func:`print_json`; otherwise emit a
|
|
98
|
-
compact JSON string.
|
|
99
|
-
"""
|
|
100
|
-
if pretty:
|
|
101
|
-
print_json(data)
|
|
102
|
-
return
|
|
103
|
-
|
|
104
|
-
dumped = json.dumps(
|
|
105
|
-
data,
|
|
106
|
-
ensure_ascii=False,
|
|
107
|
-
separators=(',', ':'),
|
|
108
|
-
)
|
|
109
|
-
print(dumped)
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
def _explicit_cli_format(
|
|
113
|
-
args: argparse.Namespace,
|
|
114
|
-
) -> str | None:
|
|
115
|
-
"""Return the explicit CLI format hint when provided."""
|
|
116
|
-
|
|
117
|
-
if not getattr(args, '_format_explicit', False):
|
|
118
|
-
return None
|
|
119
|
-
for attr in ('format', 'target_format', 'source_format'):
|
|
120
|
-
value = getattr(args, attr, None)
|
|
121
|
-
if value is None:
|
|
122
|
-
continue
|
|
123
|
-
normalized = value.strip().lower()
|
|
124
|
-
if normalized:
|
|
125
|
-
return normalized
|
|
126
|
-
return None
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
def _infer_payload_format(
|
|
130
|
-
text: str,
|
|
131
|
-
) -> str:
|
|
132
|
-
"""
|
|
133
|
-
Infer JSON vs CSV from payload text.
|
|
134
|
-
|
|
135
|
-
Parameters
|
|
136
|
-
----------
|
|
137
|
-
text : str
|
|
138
|
-
Incoming payload as plain text.
|
|
139
|
-
|
|
140
|
-
Returns
|
|
141
|
-
-------
|
|
142
|
-
str
|
|
143
|
-
``'json'`` when the text starts with ``{``/``[``, else ``'csv'``.
|
|
144
|
-
"""
|
|
145
|
-
stripped = text.lstrip()
|
|
146
|
-
if stripped.startswith('{') or stripped.startswith('['):
|
|
147
|
-
return 'json'
|
|
148
|
-
return 'csv'
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
def _list_sections(
|
|
81
|
+
def _check_sections(
|
|
152
82
|
cfg: PipelineConfig,
|
|
153
|
-
|
|
83
|
+
*,
|
|
84
|
+
jobs: bool,
|
|
85
|
+
pipelines: bool,
|
|
86
|
+
sources: bool,
|
|
87
|
+
targets: bool,
|
|
88
|
+
transforms: bool,
|
|
154
89
|
) -> dict[str, Any]:
|
|
155
90
|
"""
|
|
156
|
-
Build sectioned metadata output for the
|
|
91
|
+
Build sectioned metadata output for the check command.
|
|
157
92
|
|
|
158
93
|
Parameters
|
|
159
94
|
----------
|
|
160
95
|
cfg : PipelineConfig
|
|
161
96
|
The loaded pipeline configuration.
|
|
162
|
-
|
|
163
|
-
|
|
97
|
+
jobs : bool
|
|
98
|
+
Whether to include job metadata.
|
|
99
|
+
pipelines : bool
|
|
100
|
+
Whether to include pipeline metadata.
|
|
101
|
+
sources : bool
|
|
102
|
+
Whether to include source metadata.
|
|
103
|
+
targets : bool
|
|
104
|
+
Whether to include target metadata.
|
|
105
|
+
transforms : bool
|
|
106
|
+
Whether to include transform metadata.
|
|
164
107
|
|
|
165
108
|
Returns
|
|
166
109
|
-------
|
|
167
110
|
dict[str, Any]
|
|
168
|
-
Metadata output for the
|
|
111
|
+
Metadata output for the check command.
|
|
169
112
|
"""
|
|
170
113
|
sections: dict[str, Any] = {}
|
|
171
|
-
if
|
|
114
|
+
if jobs:
|
|
172
115
|
sections['jobs'] = _pipeline_summary(cfg)['jobs']
|
|
173
|
-
if
|
|
116
|
+
if pipelines:
|
|
174
117
|
sections['pipelines'] = [cfg.name]
|
|
175
|
-
if
|
|
118
|
+
if sources:
|
|
176
119
|
sections['sources'] = [src.name for src in cfg.sources]
|
|
177
|
-
if
|
|
120
|
+
if targets:
|
|
178
121
|
sections['targets'] = [tgt.name for tgt in cfg.targets]
|
|
179
|
-
if
|
|
122
|
+
if transforms:
|
|
180
123
|
sections['transforms'] = [
|
|
181
124
|
getattr(trf, 'name', None) for trf in cfg.transforms
|
|
182
125
|
]
|
|
@@ -185,88 +128,6 @@ def _list_sections(
|
|
|
185
128
|
return sections
|
|
186
129
|
|
|
187
130
|
|
|
188
|
-
def _materialize_file_payload(
|
|
189
|
-
source: object,
|
|
190
|
-
*,
|
|
191
|
-
format_hint: str | None,
|
|
192
|
-
format_explicit: bool,
|
|
193
|
-
) -> JSONData | object:
|
|
194
|
-
"""
|
|
195
|
-
Return structured payloads when ``source`` references a file.
|
|
196
|
-
|
|
197
|
-
Parameters
|
|
198
|
-
----------
|
|
199
|
-
source : object
|
|
200
|
-
Input source of data, possibly a file path.
|
|
201
|
-
format_hint : str | None
|
|
202
|
-
Explicit format hint: 'json', 'csv', or None to infer.
|
|
203
|
-
format_explicit : bool
|
|
204
|
-
Whether an explicit format hint was provided.
|
|
205
|
-
|
|
206
|
-
Returns
|
|
207
|
-
-------
|
|
208
|
-
JSONData | object
|
|
209
|
-
Parsed JSON data when ``source`` is a file; otherwise the original
|
|
210
|
-
``source`` object.
|
|
211
|
-
"""
|
|
212
|
-
if isinstance(source, (dict, list)):
|
|
213
|
-
return cast(JSONData, source)
|
|
214
|
-
if not isinstance(source, (str, os.PathLike)):
|
|
215
|
-
return source
|
|
216
|
-
|
|
217
|
-
path = Path(source)
|
|
218
|
-
|
|
219
|
-
normalized_hint = (format_hint or '').strip().lower()
|
|
220
|
-
fmt: FileFormat | None = None
|
|
221
|
-
|
|
222
|
-
if format_explicit and normalized_hint:
|
|
223
|
-
try:
|
|
224
|
-
fmt = FileFormat(normalized_hint)
|
|
225
|
-
except ValueError:
|
|
226
|
-
fmt = None
|
|
227
|
-
elif not format_explicit:
|
|
228
|
-
suffix = path.suffix.lower().lstrip('.')
|
|
229
|
-
if suffix:
|
|
230
|
-
try:
|
|
231
|
-
fmt = FileFormat(suffix)
|
|
232
|
-
except ValueError:
|
|
233
|
-
fmt = None
|
|
234
|
-
|
|
235
|
-
if fmt is None:
|
|
236
|
-
return source
|
|
237
|
-
if fmt == FileFormat.CSV:
|
|
238
|
-
return _read_csv_rows(path)
|
|
239
|
-
return File(path, fmt).read()
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
def _parse_text_payload(
|
|
243
|
-
text: str,
|
|
244
|
-
fmt: str | None,
|
|
245
|
-
) -> JSONData | str:
|
|
246
|
-
"""
|
|
247
|
-
Parse JSON/CSV text into a Python payload.
|
|
248
|
-
|
|
249
|
-
Parameters
|
|
250
|
-
----------
|
|
251
|
-
text : str
|
|
252
|
-
The input text payload.
|
|
253
|
-
fmt : str | None
|
|
254
|
-
Explicit format hint: 'json', 'csv', or None to infer.
|
|
255
|
-
|
|
256
|
-
Returns
|
|
257
|
-
-------
|
|
258
|
-
JSONData | str
|
|
259
|
-
The parsed payload as JSON data or raw text.
|
|
260
|
-
"""
|
|
261
|
-
effective = (fmt or '').strip().lower() or _infer_payload_format(text)
|
|
262
|
-
if effective == 'json':
|
|
263
|
-
return cast(JSONData, json_type(text))
|
|
264
|
-
if effective == 'csv':
|
|
265
|
-
reader = csv.DictReader(io.StringIO(text))
|
|
266
|
-
return [dict(row) for row in reader]
|
|
267
|
-
return text
|
|
268
|
-
|
|
269
|
-
|
|
270
131
|
def _pipeline_summary(
|
|
271
132
|
cfg: PipelineConfig,
|
|
272
133
|
) -> dict[str, Any]:
|
|
@@ -295,294 +156,183 @@ def _pipeline_summary(
|
|
|
295
156
|
}
|
|
296
157
|
|
|
297
158
|
|
|
298
|
-
|
|
299
|
-
args: argparse.Namespace,
|
|
300
|
-
) -> tuple[bool, bool]:
|
|
301
|
-
"""
|
|
302
|
-
Return presentation toggles from the parsed namespace.
|
|
303
|
-
|
|
304
|
-
Parameters
|
|
305
|
-
----------
|
|
306
|
-
args : argparse.Namespace
|
|
307
|
-
Namespace produced by the CLI parser.
|
|
308
|
-
|
|
309
|
-
Returns
|
|
310
|
-
-------
|
|
311
|
-
tuple[bool, bool]
|
|
312
|
-
Pair of ``(pretty, quiet)`` flags with safe defaults.
|
|
313
|
-
"""
|
|
314
|
-
return getattr(args, 'pretty', True), getattr(args, 'quiet', False)
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
def _read_csv_rows(
|
|
318
|
-
path: Path,
|
|
319
|
-
) -> list[dict[str, str]]:
|
|
320
|
-
"""
|
|
321
|
-
Read CSV rows into dictionaries.
|
|
322
|
-
|
|
323
|
-
Parameters
|
|
324
|
-
----------
|
|
325
|
-
path : Path
|
|
326
|
-
Path to a CSV file.
|
|
327
|
-
|
|
328
|
-
Returns
|
|
329
|
-
-------
|
|
330
|
-
list[dict[str, str]]
|
|
331
|
-
List of dictionaries, each representing a row in the CSV file.
|
|
332
|
-
"""
|
|
333
|
-
with path.open(newline='', encoding='utf-8') as handle:
|
|
334
|
-
reader = csv.DictReader(handle)
|
|
335
|
-
return [dict(row) for row in reader]
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
def _read_stdin_text() -> str:
|
|
339
|
-
"""
|
|
340
|
-
Return every character from ``stdin`` as a single string.
|
|
341
|
-
|
|
342
|
-
Returns
|
|
343
|
-
-------
|
|
344
|
-
str
|
|
345
|
-
Entire ``stdin`` contents.
|
|
346
|
-
"""
|
|
347
|
-
return sys.stdin.read()
|
|
159
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
348
160
|
|
|
349
161
|
|
|
350
|
-
def
|
|
351
|
-
source: object,
|
|
162
|
+
def check_handler(
|
|
352
163
|
*,
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
164
|
+
config: str,
|
|
165
|
+
jobs: bool = False,
|
|
166
|
+
pipelines: bool = False,
|
|
167
|
+
sources: bool = False,
|
|
168
|
+
summary: bool = False,
|
|
169
|
+
targets: bool = False,
|
|
170
|
+
transforms: bool = False,
|
|
171
|
+
substitute: bool = True,
|
|
172
|
+
pretty: bool = True,
|
|
173
|
+
) -> int:
|
|
357
174
|
"""
|
|
358
|
-
|
|
175
|
+
Print requested pipeline sections from a YAML configuration.
|
|
359
176
|
|
|
360
177
|
Parameters
|
|
361
178
|
----------
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
179
|
+
config : str
|
|
180
|
+
Path to the pipeline YAML configuration.
|
|
181
|
+
jobs : bool, optional
|
|
182
|
+
Whether to include job metadata. Default is ``False``.
|
|
183
|
+
pipelines : bool, optional
|
|
184
|
+
Whether to include pipeline metadata. Default is ``False``.
|
|
185
|
+
sources : bool, optional
|
|
186
|
+
Whether to include source metadata. Default is ``False``.
|
|
187
|
+
summary : bool, optional
|
|
188
|
+
Whether to print a full summary of the pipeline. Default is ``False``.
|
|
189
|
+
targets : bool, optional
|
|
190
|
+
Whether to include target metadata. Default is ``False``.
|
|
191
|
+
transforms : bool, optional
|
|
192
|
+
Whether to include transform metadata. Default is ``False``.
|
|
193
|
+
substitute : bool, optional
|
|
194
|
+
Whether to perform environment variable substitution. Default is
|
|
195
|
+
``True``.
|
|
196
|
+
pretty : bool, optional
|
|
197
|
+
Whether to pretty-print output. Default is ``True``.
|
|
372
198
|
|
|
373
199
|
Returns
|
|
374
200
|
-------
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
disabled.
|
|
378
|
-
"""
|
|
379
|
-
if isinstance(source, (os.PathLike, str)) and str(source) == '-':
|
|
380
|
-
text = _read_stdin_text()
|
|
381
|
-
return _parse_text_payload(text, format_hint)
|
|
201
|
+
int
|
|
202
|
+
Zero on success.
|
|
382
203
|
|
|
383
|
-
|
|
384
|
-
|
|
204
|
+
"""
|
|
205
|
+
cfg = load_pipeline_config(config, substitute=substitute)
|
|
206
|
+
if summary:
|
|
207
|
+
cli_io.emit_json(_pipeline_summary(cfg), pretty=True)
|
|
208
|
+
return 0
|
|
385
209
|
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
210
|
+
cli_io.emit_json(
|
|
211
|
+
_check_sections(
|
|
212
|
+
cfg,
|
|
213
|
+
jobs=jobs,
|
|
214
|
+
pipelines=pipelines,
|
|
215
|
+
sources=sources,
|
|
216
|
+
targets=targets,
|
|
217
|
+
transforms=transforms,
|
|
218
|
+
),
|
|
219
|
+
pretty=pretty,
|
|
390
220
|
)
|
|
221
|
+
return 0
|
|
391
222
|
|
|
392
223
|
|
|
393
|
-
def
|
|
394
|
-
data: Any,
|
|
395
|
-
output_path: str | None,
|
|
224
|
+
def extract_handler(
|
|
396
225
|
*,
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
data : Any
|
|
405
|
-
Data to write.
|
|
406
|
-
output_path : str | None
|
|
407
|
-
Path to write the output to. None to print to stdout.
|
|
408
|
-
success_message : str
|
|
409
|
-
Message to print upon successful write.
|
|
410
|
-
|
|
411
|
-
Returns
|
|
412
|
-
-------
|
|
413
|
-
bool
|
|
414
|
-
True if output was written to a file, False if printed to stdout.
|
|
415
|
-
"""
|
|
416
|
-
if not output_path or output_path == '-':
|
|
417
|
-
return False
|
|
418
|
-
File(Path(output_path), FileFormat.JSON).write_json(data)
|
|
419
|
-
print(f'{success_message} {output_path}')
|
|
420
|
-
return True
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
# SECTION: FUNCTIONS ======================================================== #
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
def cmd_extract(
|
|
427
|
-
args: argparse.Namespace,
|
|
226
|
+
source_type: str,
|
|
227
|
+
source: str,
|
|
228
|
+
format_hint: str | None = None,
|
|
229
|
+
format_explicit: bool = False,
|
|
230
|
+
target: str | None = None,
|
|
231
|
+
output: str | None = None,
|
|
232
|
+
pretty: bool = True,
|
|
428
233
|
) -> int:
|
|
429
234
|
"""
|
|
430
235
|
Extract data from a source.
|
|
431
236
|
|
|
432
237
|
Parameters
|
|
433
238
|
----------
|
|
434
|
-
|
|
435
|
-
|
|
239
|
+
source_type : str
|
|
240
|
+
The type of the source (e.g., 'file', 'api', 'database').
|
|
241
|
+
source : str
|
|
242
|
+
The source identifier (e.g., path, URL, DSN).
|
|
243
|
+
format_hint : str | None, optional
|
|
244
|
+
An optional format hint (e.g., 'json', 'csv'). Default is ``None``.
|
|
245
|
+
format_explicit : bool, optional
|
|
246
|
+
Whether the format hint was explicitly provided. Default is ``False``.
|
|
247
|
+
target : str | None, optional
|
|
248
|
+
The target destination (e.g., path, database). Default is ``None``.
|
|
249
|
+
output : str | None, optional
|
|
250
|
+
Path to write output data. Default is ``None``.
|
|
251
|
+
pretty : bool, optional
|
|
252
|
+
Whether to pretty-print output. Default is ``True``.
|
|
436
253
|
|
|
437
254
|
Returns
|
|
438
255
|
-------
|
|
439
256
|
int
|
|
440
257
|
Zero on success.
|
|
258
|
+
|
|
441
259
|
"""
|
|
442
|
-
|
|
443
|
-
explicit_format = _explicit_cli_format(args)
|
|
260
|
+
explicit_format = format_hint if format_explicit else None
|
|
444
261
|
|
|
445
|
-
if
|
|
446
|
-
text =
|
|
447
|
-
payload =
|
|
448
|
-
|
|
262
|
+
if source == '-':
|
|
263
|
+
text = cli_io.read_stdin_text()
|
|
264
|
+
payload = cli_io.parse_text_payload(
|
|
265
|
+
text,
|
|
266
|
+
format_hint,
|
|
267
|
+
)
|
|
268
|
+
cli_io.emit_json(payload, pretty=pretty)
|
|
449
269
|
|
|
450
270
|
return 0
|
|
451
271
|
|
|
452
272
|
result = extract(
|
|
453
|
-
|
|
454
|
-
|
|
273
|
+
source_type,
|
|
274
|
+
source,
|
|
455
275
|
file_format=explicit_format,
|
|
456
276
|
)
|
|
457
|
-
output_path =
|
|
458
|
-
if output_path is None:
|
|
459
|
-
output_path = getattr(args, 'output', None)
|
|
277
|
+
output_path = target or output
|
|
460
278
|
|
|
461
|
-
|
|
279
|
+
cli_io.emit_or_write(
|
|
462
280
|
result,
|
|
463
281
|
output_path,
|
|
282
|
+
pretty=pretty,
|
|
464
283
|
success_message='Data extracted and saved to',
|
|
465
|
-
):
|
|
466
|
-
_emit_json(result, pretty=pretty)
|
|
467
|
-
|
|
468
|
-
return 0
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
def cmd_validate(
|
|
472
|
-
args: argparse.Namespace,
|
|
473
|
-
) -> int:
|
|
474
|
-
"""
|
|
475
|
-
Validate data from a source.
|
|
476
|
-
|
|
477
|
-
Parameters
|
|
478
|
-
----------
|
|
479
|
-
args : argparse.Namespace
|
|
480
|
-
Parsed command-line arguments.
|
|
481
|
-
|
|
482
|
-
Returns
|
|
483
|
-
-------
|
|
484
|
-
int
|
|
485
|
-
Zero on success.
|
|
486
|
-
"""
|
|
487
|
-
pretty, _quiet = _presentation_flags(args)
|
|
488
|
-
format_explicit: bool = getattr(args, '_format_explicit', False)
|
|
489
|
-
format_hint: str | None = getattr(args, 'source_format', None)
|
|
490
|
-
payload = cast(
|
|
491
|
-
JSONData | str,
|
|
492
|
-
_resolve_cli_payload(
|
|
493
|
-
args.source,
|
|
494
|
-
format_hint=format_hint,
|
|
495
|
-
format_explicit=format_explicit,
|
|
496
|
-
),
|
|
497
284
|
)
|
|
498
|
-
result = validate(payload, args.rules)
|
|
499
|
-
|
|
500
|
-
target_path = getattr(args, 'target', None)
|
|
501
|
-
if target_path:
|
|
502
|
-
validated_data = result.get('data')
|
|
503
|
-
if validated_data is not None:
|
|
504
|
-
_write_json_output(
|
|
505
|
-
validated_data,
|
|
506
|
-
target_path,
|
|
507
|
-
success_message='Validation result saved to',
|
|
508
|
-
)
|
|
509
|
-
else:
|
|
510
|
-
print(
|
|
511
|
-
f'Validation failed, no data to save for {target_path}',
|
|
512
|
-
file=sys.stderr,
|
|
513
|
-
)
|
|
514
|
-
else:
|
|
515
|
-
_emit_json(result, pretty=pretty)
|
|
516
285
|
|
|
517
286
|
return 0
|
|
518
287
|
|
|
519
288
|
|
|
520
|
-
def
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
Returns
|
|
532
|
-
-------
|
|
533
|
-
int
|
|
534
|
-
Zero on success.
|
|
535
|
-
"""
|
|
536
|
-
pretty, _quiet = _presentation_flags(args)
|
|
537
|
-
format_hint: str | None = getattr(args, 'source_format', None)
|
|
538
|
-
format_explicit: bool = format_hint is not None
|
|
539
|
-
|
|
540
|
-
payload = cast(
|
|
541
|
-
JSONData | str,
|
|
542
|
-
_resolve_cli_payload(
|
|
543
|
-
args.source,
|
|
544
|
-
format_hint=format_hint,
|
|
545
|
-
format_explicit=format_explicit,
|
|
546
|
-
),
|
|
547
|
-
)
|
|
548
|
-
|
|
549
|
-
data = transform(payload, args.operations)
|
|
550
|
-
|
|
551
|
-
if not _write_json_output(
|
|
552
|
-
data,
|
|
553
|
-
getattr(args, 'target', None),
|
|
554
|
-
success_message='Data transformed and saved to',
|
|
555
|
-
):
|
|
556
|
-
_emit_json(data, pretty=pretty)
|
|
557
|
-
|
|
558
|
-
return 0
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
def cmd_load(
|
|
562
|
-
args: argparse.Namespace,
|
|
289
|
+
def load_handler(
|
|
290
|
+
*,
|
|
291
|
+
source: str,
|
|
292
|
+
target_type: str,
|
|
293
|
+
target: str,
|
|
294
|
+
source_format: str | None = None,
|
|
295
|
+
target_format: str | None = None,
|
|
296
|
+
format_explicit: bool = False,
|
|
297
|
+
output: str | None = None,
|
|
298
|
+
pretty: bool = True,
|
|
563
299
|
) -> int:
|
|
564
300
|
"""
|
|
565
301
|
Load data into a target.
|
|
566
302
|
|
|
567
303
|
Parameters
|
|
568
304
|
----------
|
|
569
|
-
|
|
570
|
-
|
|
305
|
+
source : str
|
|
306
|
+
The source payload (e.g., path, inline data).
|
|
307
|
+
target_type : str
|
|
308
|
+
The type of the target (e.g., 'file', 'database').
|
|
309
|
+
target : str
|
|
310
|
+
The target destination (e.g., path, DSN).
|
|
311
|
+
source_format : str | None, optional
|
|
312
|
+
An optional source format hint (e.g., 'json', 'csv'). Default is
|
|
313
|
+
``None``.
|
|
314
|
+
target_format : str | None, optional
|
|
315
|
+
An optional target format hint (e.g., 'json', 'csv'). Default is
|
|
316
|
+
``None``.
|
|
317
|
+
format_explicit : bool, optional
|
|
318
|
+
Whether the format hint was explicitly provided. Default is ``False``.
|
|
319
|
+
output : str | None, optional
|
|
320
|
+
Path to write output data. Default is ``None``.
|
|
321
|
+
pretty : bool, optional
|
|
322
|
+
Whether to pretty-print output. Default is ``True``.
|
|
571
323
|
|
|
572
324
|
Returns
|
|
573
325
|
-------
|
|
574
326
|
int
|
|
575
327
|
Zero on success.
|
|
576
328
|
"""
|
|
577
|
-
|
|
578
|
-
explicit_format = _explicit_cli_format(args)
|
|
329
|
+
explicit_format = target_format if format_explicit else None
|
|
579
330
|
|
|
580
331
|
# Allow piping into load.
|
|
581
|
-
source_format = getattr(args, 'source_format', None)
|
|
582
332
|
source_value = cast(
|
|
583
333
|
str | Path | os.PathLike[str] | dict[str, Any] | list[dict[str, Any]],
|
|
584
|
-
|
|
585
|
-
|
|
334
|
+
cli_io.resolve_cli_payload(
|
|
335
|
+
source,
|
|
586
336
|
format_hint=source_format,
|
|
587
337
|
format_explicit=source_format is not None,
|
|
588
338
|
hydrate_files=False,
|
|
@@ -590,94 +340,81 @@ def cmd_load(
|
|
|
590
340
|
)
|
|
591
341
|
|
|
592
342
|
# Allow piping out of load for file targets.
|
|
593
|
-
if
|
|
594
|
-
payload =
|
|
343
|
+
if target_type == 'file' and target == '-':
|
|
344
|
+
payload = cli_io.materialize_file_payload(
|
|
595
345
|
source_value,
|
|
596
346
|
format_hint=source_format,
|
|
597
347
|
format_explicit=source_format is not None,
|
|
598
348
|
)
|
|
599
|
-
|
|
349
|
+
cli_io.emit_json(payload, pretty=pretty)
|
|
600
350
|
return 0
|
|
601
351
|
|
|
602
352
|
result = load(
|
|
603
353
|
source_value,
|
|
604
|
-
|
|
605
|
-
|
|
354
|
+
target_type,
|
|
355
|
+
target,
|
|
606
356
|
file_format=explicit_format,
|
|
607
357
|
)
|
|
608
358
|
|
|
609
|
-
output_path =
|
|
610
|
-
|
|
359
|
+
output_path = output
|
|
360
|
+
cli_io.emit_or_write(
|
|
611
361
|
result,
|
|
612
362
|
output_path,
|
|
363
|
+
pretty=pretty,
|
|
613
364
|
success_message='Load result saved to',
|
|
614
|
-
)
|
|
615
|
-
_emit_json(result, pretty=pretty)
|
|
365
|
+
)
|
|
616
366
|
|
|
617
367
|
return 0
|
|
618
368
|
|
|
619
369
|
|
|
620
|
-
def
|
|
621
|
-
|
|
370
|
+
def render_handler(
|
|
371
|
+
*,
|
|
372
|
+
config: str | None = None,
|
|
373
|
+
spec: str | None = None,
|
|
374
|
+
table: str | None = None,
|
|
375
|
+
template: TemplateKey | None = None,
|
|
376
|
+
template_path: str | None = None,
|
|
377
|
+
output: str | None = None,
|
|
378
|
+
pretty: bool = True,
|
|
379
|
+
quiet: bool = False,
|
|
622
380
|
) -> int:
|
|
623
381
|
"""
|
|
624
|
-
|
|
382
|
+
Render SQL DDL statements from table schema specs.
|
|
625
383
|
|
|
626
384
|
Parameters
|
|
627
385
|
----------
|
|
628
|
-
|
|
629
|
-
|
|
386
|
+
config : str | None, optional
|
|
387
|
+
Path to a pipeline YAML configuration. Default is ``None``.
|
|
388
|
+
spec : str | None, optional
|
|
389
|
+
Path to a standalone table spec file. Default is ``None``.
|
|
390
|
+
table : str | None, optional
|
|
391
|
+
Table name filter. Default is ``None``.
|
|
392
|
+
template : TemplateKey | None, optional
|
|
393
|
+
The template key to use for rendering. Default is ``None``.
|
|
394
|
+
template_path : str | None, optional
|
|
395
|
+
Path to a custom template file. Default is ``None``.
|
|
396
|
+
output : str | None, optional
|
|
397
|
+
Path to write output SQL. Default is ``None``.
|
|
398
|
+
pretty : bool, optional
|
|
399
|
+
Whether to pretty-print output. Default is ``True``.
|
|
400
|
+
quiet : bool, optional
|
|
401
|
+
Whether to suppress non-error output. Default is ``False``.
|
|
630
402
|
|
|
631
403
|
Returns
|
|
632
404
|
-------
|
|
633
405
|
int
|
|
634
406
|
Zero on success.
|
|
635
407
|
"""
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
cfg = load_pipeline_config(args.config, substitute=True)
|
|
643
|
-
|
|
644
|
-
list_flag = getattr(args, 'list', False) or getattr(args, 'jobs', False)
|
|
645
|
-
run_target = (
|
|
646
|
-
getattr(args, 'run', None)
|
|
647
|
-
or getattr(args, 'job', None)
|
|
648
|
-
or getattr(args, 'pipeline', None)
|
|
649
|
-
)
|
|
650
|
-
|
|
651
|
-
if list_flag and not run_target:
|
|
652
|
-
print_json({'jobs': _pipeline_summary(cfg)['jobs']})
|
|
653
|
-
return 0
|
|
654
|
-
|
|
655
|
-
if run_target:
|
|
656
|
-
result = run(job=run_target, config_path=args.config)
|
|
657
|
-
print_json({'status': 'ok', 'result': result})
|
|
658
|
-
return 0
|
|
659
|
-
|
|
660
|
-
print_json(_pipeline_summary(cfg))
|
|
661
|
-
return 0
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
def cmd_render(
|
|
665
|
-
args: argparse.Namespace,
|
|
666
|
-
) -> int:
|
|
667
|
-
"""Render SQL DDL statements from table schema specs."""
|
|
668
|
-
|
|
669
|
-
_pretty, quiet = _presentation_flags(args)
|
|
670
|
-
|
|
671
|
-
template_value = getattr(args, 'template', 'ddl') or 'ddl'
|
|
672
|
-
template_path = getattr(args, 'template_path', None)
|
|
673
|
-
table_filter = getattr(args, 'table', None)
|
|
674
|
-
spec_path = getattr(args, 'spec', None)
|
|
675
|
-
config_path = getattr(args, 'config', None)
|
|
408
|
+
template_value: TemplateKey = template or 'ddl'
|
|
409
|
+
template_path_override = template_path
|
|
410
|
+
table_filter = table
|
|
411
|
+
spec_path = spec
|
|
412
|
+
config_path = config
|
|
676
413
|
|
|
677
414
|
# If the provided template points to a file, treat it as a path override.
|
|
678
|
-
file_override =
|
|
679
|
-
template_key = template_value
|
|
680
|
-
if
|
|
415
|
+
file_override = template_path_override
|
|
416
|
+
template_key: TemplateKey | None = template_value
|
|
417
|
+
if template_path_override is None:
|
|
681
418
|
candidate_path = Path(template_value)
|
|
682
419
|
if candidate_path.exists():
|
|
683
420
|
file_override = str(candidate_path)
|
|
@@ -710,62 +447,210 @@ def cmd_render(
|
|
|
710
447
|
sql_text = (
|
|
711
448
|
'\n'.join(chunk.rstrip() for chunk in rendered_chunks).rstrip() + '\n'
|
|
712
449
|
)
|
|
450
|
+
rendered_output = sql_text if pretty else sql_text.rstrip('\n')
|
|
713
451
|
|
|
714
|
-
output_path =
|
|
452
|
+
output_path = output
|
|
715
453
|
if output_path and output_path != '-':
|
|
716
|
-
Path(output_path).write_text(
|
|
454
|
+
Path(output_path).write_text(rendered_output, encoding='utf-8')
|
|
717
455
|
if not quiet:
|
|
718
456
|
print(f'Rendered {len(specs)} schema(s) to {output_path}')
|
|
719
457
|
return 0
|
|
720
458
|
|
|
721
|
-
print(
|
|
459
|
+
print(rendered_output)
|
|
722
460
|
return 0
|
|
723
461
|
|
|
724
462
|
|
|
725
|
-
def
|
|
463
|
+
def run_handler(
|
|
464
|
+
*,
|
|
465
|
+
config: str,
|
|
466
|
+
job: str | None = None,
|
|
467
|
+
pipeline: str | None = None,
|
|
468
|
+
pretty: bool = True,
|
|
469
|
+
) -> int:
|
|
726
470
|
"""
|
|
727
|
-
|
|
471
|
+
Execute an ETL job end-to-end from a pipeline YAML configuration.
|
|
728
472
|
|
|
729
473
|
Parameters
|
|
730
474
|
----------
|
|
731
|
-
|
|
732
|
-
|
|
475
|
+
config : str
|
|
476
|
+
Path to the pipeline YAML configuration.
|
|
477
|
+
job : str | None, optional
|
|
478
|
+
Name of the job to run. If not provided, runs the entire pipeline.
|
|
479
|
+
Default is ``None``.
|
|
480
|
+
pipeline : str | None, optional
|
|
481
|
+
Alias for ``job``. Default is ``None``.
|
|
482
|
+
pretty : bool, optional
|
|
483
|
+
Whether to pretty-print output. Default is ``True``.
|
|
733
484
|
|
|
734
485
|
Returns
|
|
735
486
|
-------
|
|
736
487
|
int
|
|
737
488
|
Zero on success.
|
|
738
489
|
"""
|
|
739
|
-
cfg = load_pipeline_config(
|
|
740
|
-
|
|
741
|
-
|
|
490
|
+
cfg = load_pipeline_config(config, substitute=True)
|
|
491
|
+
|
|
492
|
+
job_name = job or pipeline
|
|
493
|
+
if job_name:
|
|
494
|
+
result = run(job=job_name, config_path=config)
|
|
495
|
+
cli_io.emit_json({'status': 'ok', 'result': result}, pretty=pretty)
|
|
742
496
|
return 0
|
|
743
497
|
|
|
744
|
-
|
|
498
|
+
cli_io.emit_json(_pipeline_summary(cfg), pretty=pretty)
|
|
745
499
|
return 0
|
|
746
500
|
|
|
747
501
|
|
|
748
|
-
|
|
502
|
+
TransformOperations = Mapping[
|
|
503
|
+
Literal['filter', 'map', 'select', 'sort', 'aggregate'],
|
|
504
|
+
Any,
|
|
505
|
+
]
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
def transform_handler(
|
|
509
|
+
*,
|
|
510
|
+
source: str,
|
|
511
|
+
operations: JSONData | str,
|
|
512
|
+
target: str | None = None,
|
|
513
|
+
source_format: str | None = None,
|
|
514
|
+
target_format: str | None = None,
|
|
515
|
+
pretty: bool = True,
|
|
516
|
+
format_explicit: bool = False,
|
|
517
|
+
) -> int:
|
|
749
518
|
"""
|
|
750
|
-
|
|
519
|
+
Transform data from a source.
|
|
751
520
|
|
|
752
521
|
Parameters
|
|
753
522
|
----------
|
|
754
|
-
|
|
755
|
-
|
|
523
|
+
source : str
|
|
524
|
+
The source payload (e.g., path, inline data).
|
|
525
|
+
operations : JSONData | str
|
|
526
|
+
The transformation operations (inline JSON or path).
|
|
527
|
+
target : str | None, optional
|
|
528
|
+
The target destination (e.g., path). Default is ``None``.
|
|
529
|
+
source_format : str | None, optional
|
|
530
|
+
An optional source format hint (e.g., 'json', 'csv'). Default is
|
|
531
|
+
``None``.
|
|
532
|
+
target_format : str | None, optional
|
|
533
|
+
An optional target format hint (e.g., 'json', 'csv'). Default is
|
|
534
|
+
``None``.
|
|
535
|
+
pretty : bool, optional
|
|
536
|
+
Whether to pretty-print output. Default is ``True``.
|
|
537
|
+
format_explicit : bool, optional
|
|
538
|
+
Whether the format hint was explicitly provided. Default is ``False``.
|
|
756
539
|
|
|
757
540
|
Returns
|
|
758
541
|
-------
|
|
759
542
|
int
|
|
760
543
|
Zero on success.
|
|
544
|
+
|
|
545
|
+
Raises
|
|
546
|
+
------
|
|
547
|
+
ValueError
|
|
548
|
+
If the operations payload is not a mapping.
|
|
761
549
|
"""
|
|
762
|
-
|
|
550
|
+
format_hint: str | None = source_format
|
|
551
|
+
format_explicit = format_hint is not None or format_explicit
|
|
763
552
|
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
553
|
+
payload = cast(
|
|
554
|
+
JSONData | str,
|
|
555
|
+
cli_io.resolve_cli_payload(
|
|
556
|
+
source,
|
|
557
|
+
format_hint=format_hint,
|
|
558
|
+
format_explicit=format_explicit,
|
|
559
|
+
),
|
|
560
|
+
)
|
|
561
|
+
|
|
562
|
+
operations_payload = cli_io.resolve_cli_payload(
|
|
563
|
+
operations,
|
|
564
|
+
format_hint=None,
|
|
565
|
+
format_explicit=format_explicit,
|
|
566
|
+
)
|
|
567
|
+
if not isinstance(operations_payload, dict):
|
|
568
|
+
raise ValueError('operations must resolve to a mapping of transforms')
|
|
569
|
+
|
|
570
|
+
data = transform(payload, cast(TransformOperations, operations_payload))
|
|
571
|
+
|
|
572
|
+
if target and target != '-':
|
|
573
|
+
File.write_file(target, data, file_format=target_format)
|
|
574
|
+
print(f'Data transformed and saved to {target}')
|
|
768
575
|
return 0
|
|
769
576
|
|
|
770
|
-
|
|
577
|
+
cli_io.emit_json(data, pretty=pretty)
|
|
578
|
+
return 0
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
def validate_handler(
|
|
582
|
+
*,
|
|
583
|
+
source: str,
|
|
584
|
+
rules: JSONData | str,
|
|
585
|
+
source_format: str | None = None,
|
|
586
|
+
target: str | None = None,
|
|
587
|
+
format_explicit: bool = False,
|
|
588
|
+
pretty: bool = True,
|
|
589
|
+
) -> int:
|
|
590
|
+
"""
|
|
591
|
+
Validate data from a source.
|
|
592
|
+
|
|
593
|
+
Parameters
|
|
594
|
+
----------
|
|
595
|
+
source : str
|
|
596
|
+
The source payload (e.g., path, inline data).
|
|
597
|
+
rules : JSONData | str
|
|
598
|
+
The validation rules (inline JSON or path).
|
|
599
|
+
source_format : str | None, optional
|
|
600
|
+
An optional source format hint (e.g., 'json', 'csv'). Default is
|
|
601
|
+
``None``.
|
|
602
|
+
target : str | None, optional
|
|
603
|
+
The target destination (e.g., path). Default is ``None``.
|
|
604
|
+
format_explicit : bool, optional
|
|
605
|
+
Whether the format hint was explicitly provided. Default is ``False``.
|
|
606
|
+
pretty : bool, optional
|
|
607
|
+
Whether to pretty-print output. Default is ``True``.
|
|
608
|
+
|
|
609
|
+
Returns
|
|
610
|
+
-------
|
|
611
|
+
int
|
|
612
|
+
Zero on success.
|
|
613
|
+
|
|
614
|
+
Raises
|
|
615
|
+
------
|
|
616
|
+
ValueError
|
|
617
|
+
If the rules payload is not a mapping.
|
|
618
|
+
"""
|
|
619
|
+
format_hint: str | None = source_format
|
|
620
|
+
payload = cast(
|
|
621
|
+
JSONData | str,
|
|
622
|
+
cli_io.resolve_cli_payload(
|
|
623
|
+
source,
|
|
624
|
+
format_hint=format_hint,
|
|
625
|
+
format_explicit=format_explicit,
|
|
626
|
+
),
|
|
627
|
+
)
|
|
628
|
+
|
|
629
|
+
rules_payload = cli_io.resolve_cli_payload(
|
|
630
|
+
rules,
|
|
631
|
+
format_hint=None,
|
|
632
|
+
format_explicit=format_explicit,
|
|
633
|
+
)
|
|
634
|
+
if not isinstance(rules_payload, dict):
|
|
635
|
+
raise ValueError('rules must resolve to a mapping of field rules')
|
|
636
|
+
|
|
637
|
+
field_rules = cast(Mapping[str, FieldRules], rules_payload)
|
|
638
|
+
result = validate(payload, field_rules)
|
|
639
|
+
|
|
640
|
+
if target and target != '-':
|
|
641
|
+
validated_data = result.get('data')
|
|
642
|
+
if validated_data is not None:
|
|
643
|
+
cli_io.write_json_output(
|
|
644
|
+
validated_data,
|
|
645
|
+
target,
|
|
646
|
+
success_message='Validation result saved to',
|
|
647
|
+
)
|
|
648
|
+
else:
|
|
649
|
+
print(
|
|
650
|
+
f'Validation failed, no data to save for {target}',
|
|
651
|
+
file=sys.stderr,
|
|
652
|
+
)
|
|
653
|
+
else:
|
|
654
|
+
cli_io.emit_json(result, pretty=pretty)
|
|
655
|
+
|
|
771
656
|
return 0
|