etlplus 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/api/README.md +24 -26
- etlplus/cli/commands.py +924 -0
- etlplus/cli/constants.py +71 -0
- etlplus/cli/handlers.py +302 -420
- etlplus/cli/io.py +336 -0
- etlplus/cli/main.py +16 -418
- etlplus/cli/options.py +49 -0
- etlplus/cli/state.py +336 -0
- etlplus/cli/types.py +33 -0
- etlplus/database/__init__.py +2 -0
- etlplus/database/ddl.py +37 -29
- etlplus/database/engine.py +10 -5
- etlplus/database/orm.py +18 -11
- etlplus/database/schema.py +3 -2
- etlplus/database/types.py +33 -0
- etlplus/load.py +1 -1
- etlplus/types.py +5 -0
- etlplus/utils.py +1 -32
- {etlplus-0.7.0.dist-info → etlplus-0.9.0.dist-info}/METADATA +65 -32
- {etlplus-0.7.0.dist-info → etlplus-0.9.0.dist-info}/RECORD +24 -18
- etlplus/cli/app.py +0 -1367
- {etlplus-0.7.0.dist-info → etlplus-0.9.0.dist-info}/WHEEL +0 -0
- {etlplus-0.7.0.dist-info → etlplus-0.9.0.dist-info}/entry_points.txt +0 -0
- {etlplus-0.7.0.dist-info → etlplus-0.9.0.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.7.0.dist-info → etlplus-0.9.0.dist-info}/top_level.txt +0 -0
etlplus/cli/handlers.py
CHANGED
|
@@ -6,30 +6,28 @@ Command handler functions for the ``etlplus`` command-line interface (CLI).
|
|
|
6
6
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
|
-
import argparse
|
|
10
|
-
import csv
|
|
11
|
-
import io
|
|
12
|
-
import json
|
|
13
9
|
import os
|
|
14
10
|
import sys
|
|
11
|
+
from collections.abc import Mapping
|
|
15
12
|
from pathlib import Path
|
|
16
13
|
from typing import Any
|
|
14
|
+
from typing import Literal
|
|
17
15
|
from typing import cast
|
|
18
16
|
|
|
19
17
|
from ..config import PipelineConfig
|
|
20
18
|
from ..config import load_pipeline_config
|
|
21
19
|
from ..database import load_table_spec
|
|
22
20
|
from ..database import render_tables
|
|
23
|
-
from ..enums import FileFormat
|
|
24
21
|
from ..extract import extract
|
|
25
22
|
from ..file import File
|
|
26
23
|
from ..load import load
|
|
27
24
|
from ..run import run
|
|
28
25
|
from ..transform import transform
|
|
29
26
|
from ..types import JSONData
|
|
30
|
-
from ..
|
|
31
|
-
from ..
|
|
27
|
+
from ..types import TemplateKey
|
|
28
|
+
from ..validate import FieldRules
|
|
32
29
|
from ..validate import validate
|
|
30
|
+
from . import io as cli_io
|
|
33
31
|
|
|
34
32
|
# SECTION: EXPORTS ========================================================== #
|
|
35
33
|
|
|
@@ -39,7 +37,6 @@ __all__ = [
|
|
|
39
37
|
'extract_handler',
|
|
40
38
|
'check_handler',
|
|
41
39
|
'load_handler',
|
|
42
|
-
'pipeline_handler',
|
|
43
40
|
'render_handler',
|
|
44
41
|
'run_handler',
|
|
45
42
|
'transform_handler',
|
|
@@ -72,7 +69,7 @@ def _collect_table_specs(
|
|
|
72
69
|
specs: list[dict[str, Any]] = []
|
|
73
70
|
|
|
74
71
|
if spec_path:
|
|
75
|
-
specs.append(load_table_spec(Path(spec_path)))
|
|
72
|
+
specs.append(dict(load_table_spec(Path(spec_path))))
|
|
76
73
|
|
|
77
74
|
if config_path:
|
|
78
75
|
cfg = load_pipeline_config(config_path, substitute=True)
|
|
@@ -81,76 +78,14 @@ def _collect_table_specs(
|
|
|
81
78
|
return specs
|
|
82
79
|
|
|
83
80
|
|
|
84
|
-
def _emit_json(
|
|
85
|
-
data: Any,
|
|
86
|
-
*,
|
|
87
|
-
pretty: bool,
|
|
88
|
-
) -> None:
|
|
89
|
-
"""
|
|
90
|
-
Emit JSON to stdout honoring the pretty/compact preference.
|
|
91
|
-
|
|
92
|
-
Parameters
|
|
93
|
-
----------
|
|
94
|
-
data : Any
|
|
95
|
-
Arbitrary JSON-serializable payload.
|
|
96
|
-
pretty : bool
|
|
97
|
-
When ``True`` pretty-print via :func:`print_json`; otherwise emit a
|
|
98
|
-
compact JSON string.
|
|
99
|
-
"""
|
|
100
|
-
if pretty:
|
|
101
|
-
print_json(data)
|
|
102
|
-
return
|
|
103
|
-
|
|
104
|
-
dumped = json.dumps(
|
|
105
|
-
data,
|
|
106
|
-
ensure_ascii=False,
|
|
107
|
-
separators=(',', ':'),
|
|
108
|
-
)
|
|
109
|
-
print(dumped)
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
def _explicit_cli_format(
|
|
113
|
-
args: argparse.Namespace,
|
|
114
|
-
) -> str | None:
|
|
115
|
-
"""Return the explicit CLI format hint when provided."""
|
|
116
|
-
|
|
117
|
-
if not getattr(args, '_format_explicit', False):
|
|
118
|
-
return None
|
|
119
|
-
for attr in ('format', 'target_format', 'source_format'):
|
|
120
|
-
value = getattr(args, attr, None)
|
|
121
|
-
if value is None:
|
|
122
|
-
continue
|
|
123
|
-
normalized = value.strip().lower()
|
|
124
|
-
if normalized:
|
|
125
|
-
return normalized
|
|
126
|
-
return None
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
def _infer_payload_format(
|
|
130
|
-
text: str,
|
|
131
|
-
) -> str:
|
|
132
|
-
"""
|
|
133
|
-
Infer JSON vs CSV from payload text.
|
|
134
|
-
|
|
135
|
-
Parameters
|
|
136
|
-
----------
|
|
137
|
-
text : str
|
|
138
|
-
Incoming payload as plain text.
|
|
139
|
-
|
|
140
|
-
Returns
|
|
141
|
-
-------
|
|
142
|
-
str
|
|
143
|
-
``'json'`` when the text starts with ``{``/``[``, else ``'csv'``.
|
|
144
|
-
"""
|
|
145
|
-
stripped = text.lstrip()
|
|
146
|
-
if stripped.startswith('{') or stripped.startswith('['):
|
|
147
|
-
return 'json'
|
|
148
|
-
return 'csv'
|
|
149
|
-
|
|
150
|
-
|
|
151
81
|
def _check_sections(
|
|
152
82
|
cfg: PipelineConfig,
|
|
153
|
-
|
|
83
|
+
*,
|
|
84
|
+
jobs: bool,
|
|
85
|
+
pipelines: bool,
|
|
86
|
+
sources: bool,
|
|
87
|
+
targets: bool,
|
|
88
|
+
transforms: bool,
|
|
154
89
|
) -> dict[str, Any]:
|
|
155
90
|
"""
|
|
156
91
|
Build sectioned metadata output for the check command.
|
|
@@ -159,8 +94,16 @@ def _check_sections(
|
|
|
159
94
|
----------
|
|
160
95
|
cfg : PipelineConfig
|
|
161
96
|
The loaded pipeline configuration.
|
|
162
|
-
|
|
163
|
-
|
|
97
|
+
jobs : bool
|
|
98
|
+
Whether to include job metadata.
|
|
99
|
+
pipelines : bool
|
|
100
|
+
Whether to include pipeline metadata.
|
|
101
|
+
sources : bool
|
|
102
|
+
Whether to include source metadata.
|
|
103
|
+
targets : bool
|
|
104
|
+
Whether to include target metadata.
|
|
105
|
+
transforms : bool
|
|
106
|
+
Whether to include transform metadata.
|
|
164
107
|
|
|
165
108
|
Returns
|
|
166
109
|
-------
|
|
@@ -168,15 +111,15 @@ def _check_sections(
|
|
|
168
111
|
Metadata output for the check command.
|
|
169
112
|
"""
|
|
170
113
|
sections: dict[str, Any] = {}
|
|
171
|
-
if
|
|
114
|
+
if jobs:
|
|
172
115
|
sections['jobs'] = _pipeline_summary(cfg)['jobs']
|
|
173
|
-
if
|
|
116
|
+
if pipelines:
|
|
174
117
|
sections['pipelines'] = [cfg.name]
|
|
175
|
-
if
|
|
118
|
+
if sources:
|
|
176
119
|
sections['sources'] = [src.name for src in cfg.sources]
|
|
177
|
-
if
|
|
120
|
+
if targets:
|
|
178
121
|
sections['targets'] = [tgt.name for tgt in cfg.targets]
|
|
179
|
-
if
|
|
122
|
+
if transforms:
|
|
180
123
|
sections['transforms'] = [
|
|
181
124
|
getattr(trf, 'name', None) for trf in cfg.transforms
|
|
182
125
|
]
|
|
@@ -185,88 +128,6 @@ def _check_sections(
|
|
|
185
128
|
return sections
|
|
186
129
|
|
|
187
130
|
|
|
188
|
-
def _materialize_file_payload(
|
|
189
|
-
source: object,
|
|
190
|
-
*,
|
|
191
|
-
format_hint: str | None,
|
|
192
|
-
format_explicit: bool,
|
|
193
|
-
) -> JSONData | object:
|
|
194
|
-
"""
|
|
195
|
-
Return structured payloads when ``source`` references a file.
|
|
196
|
-
|
|
197
|
-
Parameters
|
|
198
|
-
----------
|
|
199
|
-
source : object
|
|
200
|
-
Input source of data, possibly a file path.
|
|
201
|
-
format_hint : str | None
|
|
202
|
-
Explicit format hint: 'json', 'csv', or None to infer.
|
|
203
|
-
format_explicit : bool
|
|
204
|
-
Whether an explicit format hint was provided.
|
|
205
|
-
|
|
206
|
-
Returns
|
|
207
|
-
-------
|
|
208
|
-
JSONData | object
|
|
209
|
-
Parsed JSON data when ``source`` is a file; otherwise the original
|
|
210
|
-
``source`` object.
|
|
211
|
-
"""
|
|
212
|
-
if isinstance(source, (dict, list)):
|
|
213
|
-
return cast(JSONData, source)
|
|
214
|
-
if not isinstance(source, (str, os.PathLike)):
|
|
215
|
-
return source
|
|
216
|
-
|
|
217
|
-
path = Path(source)
|
|
218
|
-
|
|
219
|
-
normalized_hint = (format_hint or '').strip().lower()
|
|
220
|
-
fmt: FileFormat | None = None
|
|
221
|
-
|
|
222
|
-
if format_explicit and normalized_hint:
|
|
223
|
-
try:
|
|
224
|
-
fmt = FileFormat(normalized_hint)
|
|
225
|
-
except ValueError:
|
|
226
|
-
fmt = None
|
|
227
|
-
elif not format_explicit:
|
|
228
|
-
suffix = path.suffix.lower().lstrip('.')
|
|
229
|
-
if suffix:
|
|
230
|
-
try:
|
|
231
|
-
fmt = FileFormat(suffix)
|
|
232
|
-
except ValueError:
|
|
233
|
-
fmt = None
|
|
234
|
-
|
|
235
|
-
if fmt is None:
|
|
236
|
-
return source
|
|
237
|
-
if fmt == FileFormat.CSV:
|
|
238
|
-
return _read_csv_rows(path)
|
|
239
|
-
return File(path, fmt).read()
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
def _parse_text_payload(
|
|
243
|
-
text: str,
|
|
244
|
-
fmt: str | None,
|
|
245
|
-
) -> JSONData | str:
|
|
246
|
-
"""
|
|
247
|
-
Parse JSON/CSV text into a Python payload.
|
|
248
|
-
|
|
249
|
-
Parameters
|
|
250
|
-
----------
|
|
251
|
-
text : str
|
|
252
|
-
The input text payload.
|
|
253
|
-
fmt : str | None
|
|
254
|
-
Explicit format hint: 'json', 'csv', or None to infer.
|
|
255
|
-
|
|
256
|
-
Returns
|
|
257
|
-
-------
|
|
258
|
-
JSONData | str
|
|
259
|
-
The parsed payload as JSON data or raw text.
|
|
260
|
-
"""
|
|
261
|
-
effective = (fmt or '').strip().lower() or _infer_payload_format(text)
|
|
262
|
-
if effective == 'json':
|
|
263
|
-
return cast(JSONData, json_type(text))
|
|
264
|
-
if effective == 'csv':
|
|
265
|
-
reader = csv.DictReader(io.StringIO(text))
|
|
266
|
-
return [dict(row) for row in reader]
|
|
267
|
-
return text
|
|
268
|
-
|
|
269
|
-
|
|
270
131
|
def _pipeline_summary(
|
|
271
132
|
cfg: PipelineConfig,
|
|
272
133
|
) -> dict[str, Any]:
|
|
@@ -295,229 +156,183 @@ def _pipeline_summary(
|
|
|
295
156
|
}
|
|
296
157
|
|
|
297
158
|
|
|
298
|
-
def _presentation_flags(
|
|
299
|
-
args: argparse.Namespace,
|
|
300
|
-
) -> tuple[bool, bool]:
|
|
301
|
-
"""
|
|
302
|
-
Return presentation toggles from the parsed namespace.
|
|
303
|
-
|
|
304
|
-
Parameters
|
|
305
|
-
----------
|
|
306
|
-
args : argparse.Namespace
|
|
307
|
-
Namespace produced by the CLI parser.
|
|
308
|
-
|
|
309
|
-
Returns
|
|
310
|
-
-------
|
|
311
|
-
tuple[bool, bool]
|
|
312
|
-
Pair of ``(pretty, quiet)`` flags with safe defaults.
|
|
313
|
-
"""
|
|
314
|
-
return getattr(args, 'pretty', True), getattr(args, 'quiet', False)
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
def _read_csv_rows(
|
|
318
|
-
path: Path,
|
|
319
|
-
) -> list[dict[str, str]]:
|
|
320
|
-
"""
|
|
321
|
-
Read CSV rows into dictionaries.
|
|
322
|
-
|
|
323
|
-
Parameters
|
|
324
|
-
----------
|
|
325
|
-
path : Path
|
|
326
|
-
Path to a CSV file.
|
|
327
|
-
|
|
328
|
-
Returns
|
|
329
|
-
-------
|
|
330
|
-
list[dict[str, str]]
|
|
331
|
-
List of dictionaries, each representing a row in the CSV file.
|
|
332
|
-
"""
|
|
333
|
-
with path.open(newline='', encoding='utf-8') as handle:
|
|
334
|
-
reader = csv.DictReader(handle)
|
|
335
|
-
return [dict(row) for row in reader]
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
def _read_stdin_text() -> str:
|
|
339
|
-
"""
|
|
340
|
-
Return every character from ``stdin`` as a single string.
|
|
341
|
-
|
|
342
|
-
Returns
|
|
343
|
-
-------
|
|
344
|
-
str
|
|
345
|
-
Entire ``stdin`` contents.
|
|
346
|
-
"""
|
|
347
|
-
return sys.stdin.read()
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
def _resolve_cli_payload(
|
|
351
|
-
source: object,
|
|
352
|
-
*,
|
|
353
|
-
format_hint: str | None,
|
|
354
|
-
format_explicit: bool,
|
|
355
|
-
hydrate_files: bool = True,
|
|
356
|
-
) -> JSONData | object:
|
|
357
|
-
"""
|
|
358
|
-
Normalize CLI-provided payloads, honoring stdin and inline data.
|
|
359
|
-
|
|
360
|
-
Parameters
|
|
361
|
-
----------
|
|
362
|
-
source : object
|
|
363
|
-
Raw CLI value (path, inline payload, or ``'-'`` for stdin).
|
|
364
|
-
format_hint : str | None
|
|
365
|
-
Explicit format hint supplied by the CLI option.
|
|
366
|
-
format_explicit : bool
|
|
367
|
-
Flag indicating whether the format hint was explicitly provided.
|
|
368
|
-
hydrate_files : bool, optional
|
|
369
|
-
When ``True`` (default) materialize file paths into structured data.
|
|
370
|
-
When ``False``, keep the original path so downstream code can stream
|
|
371
|
-
from disk directly.
|
|
372
|
-
|
|
373
|
-
Returns
|
|
374
|
-
-------
|
|
375
|
-
JSONData | object
|
|
376
|
-
Parsed payload or the original source value when hydration is
|
|
377
|
-
disabled.
|
|
378
|
-
"""
|
|
379
|
-
if isinstance(source, (os.PathLike, str)) and str(source) == '-':
|
|
380
|
-
text = _read_stdin_text()
|
|
381
|
-
return _parse_text_payload(text, format_hint)
|
|
382
|
-
|
|
383
|
-
if not hydrate_files:
|
|
384
|
-
return source
|
|
385
|
-
|
|
386
|
-
return _materialize_file_payload(
|
|
387
|
-
source,
|
|
388
|
-
format_hint=format_hint,
|
|
389
|
-
format_explicit=format_explicit,
|
|
390
|
-
)
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
def _write_json_output(
|
|
394
|
-
data: Any,
|
|
395
|
-
output_path: str | None,
|
|
396
|
-
*,
|
|
397
|
-
success_message: str,
|
|
398
|
-
) -> bool:
|
|
399
|
-
"""
|
|
400
|
-
Optionally persist JSON data to disk.
|
|
401
|
-
|
|
402
|
-
Parameters
|
|
403
|
-
----------
|
|
404
|
-
data : Any
|
|
405
|
-
Data to write.
|
|
406
|
-
output_path : str | None
|
|
407
|
-
Path to write the output to. None to print to stdout.
|
|
408
|
-
success_message : str
|
|
409
|
-
Message to print upon successful write.
|
|
410
|
-
|
|
411
|
-
Returns
|
|
412
|
-
-------
|
|
413
|
-
bool
|
|
414
|
-
True if output was written to a file, False if printed to stdout.
|
|
415
|
-
"""
|
|
416
|
-
if not output_path or output_path == '-':
|
|
417
|
-
return False
|
|
418
|
-
File(Path(output_path), FileFormat.JSON).write_json(data)
|
|
419
|
-
print(f'{success_message} {output_path}')
|
|
420
|
-
return True
|
|
421
|
-
|
|
422
|
-
|
|
423
159
|
# SECTION: FUNCTIONS ======================================================== #
|
|
424
160
|
|
|
425
161
|
|
|
426
162
|
def check_handler(
|
|
427
|
-
|
|
163
|
+
*,
|
|
164
|
+
config: str,
|
|
165
|
+
jobs: bool = False,
|
|
166
|
+
pipelines: bool = False,
|
|
167
|
+
sources: bool = False,
|
|
168
|
+
summary: bool = False,
|
|
169
|
+
targets: bool = False,
|
|
170
|
+
transforms: bool = False,
|
|
171
|
+
substitute: bool = True,
|
|
172
|
+
pretty: bool = True,
|
|
428
173
|
) -> int:
|
|
429
174
|
"""
|
|
430
175
|
Print requested pipeline sections from a YAML configuration.
|
|
431
176
|
|
|
432
177
|
Parameters
|
|
433
178
|
----------
|
|
434
|
-
|
|
435
|
-
|
|
179
|
+
config : str
|
|
180
|
+
Path to the pipeline YAML configuration.
|
|
181
|
+
jobs : bool, optional
|
|
182
|
+
Whether to include job metadata. Default is ``False``.
|
|
183
|
+
pipelines : bool, optional
|
|
184
|
+
Whether to include pipeline metadata. Default is ``False``.
|
|
185
|
+
sources : bool, optional
|
|
186
|
+
Whether to include source metadata. Default is ``False``.
|
|
187
|
+
summary : bool, optional
|
|
188
|
+
Whether to print a full summary of the pipeline. Default is ``False``.
|
|
189
|
+
targets : bool, optional
|
|
190
|
+
Whether to include target metadata. Default is ``False``.
|
|
191
|
+
transforms : bool, optional
|
|
192
|
+
Whether to include transform metadata. Default is ``False``.
|
|
193
|
+
substitute : bool, optional
|
|
194
|
+
Whether to perform environment variable substitution. Default is
|
|
195
|
+
``True``.
|
|
196
|
+
pretty : bool, optional
|
|
197
|
+
Whether to pretty-print output. Default is ``True``.
|
|
436
198
|
|
|
437
199
|
Returns
|
|
438
200
|
-------
|
|
439
201
|
int
|
|
440
202
|
Zero on success.
|
|
203
|
+
|
|
441
204
|
"""
|
|
442
|
-
cfg = load_pipeline_config(
|
|
443
|
-
if
|
|
444
|
-
|
|
205
|
+
cfg = load_pipeline_config(config, substitute=substitute)
|
|
206
|
+
if summary:
|
|
207
|
+
cli_io.emit_json(_pipeline_summary(cfg), pretty=True)
|
|
445
208
|
return 0
|
|
446
209
|
|
|
447
|
-
|
|
210
|
+
cli_io.emit_json(
|
|
211
|
+
_check_sections(
|
|
212
|
+
cfg,
|
|
213
|
+
jobs=jobs,
|
|
214
|
+
pipelines=pipelines,
|
|
215
|
+
sources=sources,
|
|
216
|
+
targets=targets,
|
|
217
|
+
transforms=transforms,
|
|
218
|
+
),
|
|
219
|
+
pretty=pretty,
|
|
220
|
+
)
|
|
448
221
|
return 0
|
|
449
222
|
|
|
450
223
|
|
|
451
224
|
def extract_handler(
|
|
452
|
-
|
|
225
|
+
*,
|
|
226
|
+
source_type: str,
|
|
227
|
+
source: str,
|
|
228
|
+
format_hint: str | None = None,
|
|
229
|
+
format_explicit: bool = False,
|
|
230
|
+
target: str | None = None,
|
|
231
|
+
output: str | None = None,
|
|
232
|
+
pretty: bool = True,
|
|
453
233
|
) -> int:
|
|
454
234
|
"""
|
|
455
235
|
Extract data from a source.
|
|
456
236
|
|
|
457
237
|
Parameters
|
|
458
238
|
----------
|
|
459
|
-
|
|
460
|
-
|
|
239
|
+
source_type : str
|
|
240
|
+
The type of the source (e.g., 'file', 'api', 'database').
|
|
241
|
+
source : str
|
|
242
|
+
The source identifier (e.g., path, URL, DSN).
|
|
243
|
+
format_hint : str | None, optional
|
|
244
|
+
An optional format hint (e.g., 'json', 'csv'). Default is ``None``.
|
|
245
|
+
format_explicit : bool, optional
|
|
246
|
+
Whether the format hint was explicitly provided. Default is ``False``.
|
|
247
|
+
target : str | None, optional
|
|
248
|
+
The target destination (e.g., path, database). Default is ``None``.
|
|
249
|
+
output : str | None, optional
|
|
250
|
+
Path to write output data. Default is ``None``.
|
|
251
|
+
pretty : bool, optional
|
|
252
|
+
Whether to pretty-print output. Default is ``True``.
|
|
461
253
|
|
|
462
254
|
Returns
|
|
463
255
|
-------
|
|
464
256
|
int
|
|
465
257
|
Zero on success.
|
|
258
|
+
|
|
466
259
|
"""
|
|
467
|
-
|
|
468
|
-
explicit_format = _explicit_cli_format(args)
|
|
260
|
+
explicit_format = format_hint if format_explicit else None
|
|
469
261
|
|
|
470
|
-
if
|
|
471
|
-
text =
|
|
472
|
-
payload =
|
|
473
|
-
|
|
262
|
+
if source == '-':
|
|
263
|
+
text = cli_io.read_stdin_text()
|
|
264
|
+
payload = cli_io.parse_text_payload(
|
|
265
|
+
text,
|
|
266
|
+
format_hint,
|
|
267
|
+
)
|
|
268
|
+
cli_io.emit_json(payload, pretty=pretty)
|
|
474
269
|
|
|
475
270
|
return 0
|
|
476
271
|
|
|
477
272
|
result = extract(
|
|
478
|
-
|
|
479
|
-
|
|
273
|
+
source_type,
|
|
274
|
+
source,
|
|
480
275
|
file_format=explicit_format,
|
|
481
276
|
)
|
|
482
|
-
output_path =
|
|
483
|
-
if output_path is None:
|
|
484
|
-
output_path = getattr(args, 'output', None)
|
|
277
|
+
output_path = target or output
|
|
485
278
|
|
|
486
|
-
|
|
279
|
+
cli_io.emit_or_write(
|
|
487
280
|
result,
|
|
488
281
|
output_path,
|
|
282
|
+
pretty=pretty,
|
|
489
283
|
success_message='Data extracted and saved to',
|
|
490
|
-
)
|
|
491
|
-
_emit_json(result, pretty=pretty)
|
|
284
|
+
)
|
|
492
285
|
|
|
493
286
|
return 0
|
|
494
287
|
|
|
495
288
|
|
|
496
289
|
def load_handler(
|
|
497
|
-
|
|
290
|
+
*,
|
|
291
|
+
source: str,
|
|
292
|
+
target_type: str,
|
|
293
|
+
target: str,
|
|
294
|
+
source_format: str | None = None,
|
|
295
|
+
target_format: str | None = None,
|
|
296
|
+
format_explicit: bool = False,
|
|
297
|
+
output: str | None = None,
|
|
298
|
+
pretty: bool = True,
|
|
498
299
|
) -> int:
|
|
499
300
|
"""
|
|
500
301
|
Load data into a target.
|
|
501
302
|
|
|
502
303
|
Parameters
|
|
503
304
|
----------
|
|
504
|
-
|
|
505
|
-
|
|
305
|
+
source : str
|
|
306
|
+
The source payload (e.g., path, inline data).
|
|
307
|
+
target_type : str
|
|
308
|
+
The type of the target (e.g., 'file', 'database').
|
|
309
|
+
target : str
|
|
310
|
+
The target destination (e.g., path, DSN).
|
|
311
|
+
source_format : str | None, optional
|
|
312
|
+
An optional source format hint (e.g., 'json', 'csv'). Default is
|
|
313
|
+
``None``.
|
|
314
|
+
target_format : str | None, optional
|
|
315
|
+
An optional target format hint (e.g., 'json', 'csv'). Default is
|
|
316
|
+
``None``.
|
|
317
|
+
format_explicit : bool, optional
|
|
318
|
+
Whether the format hint was explicitly provided. Default is ``False``.
|
|
319
|
+
output : str | None, optional
|
|
320
|
+
Path to write output data. Default is ``None``.
|
|
321
|
+
pretty : bool, optional
|
|
322
|
+
Whether to pretty-print output. Default is ``True``.
|
|
506
323
|
|
|
507
324
|
Returns
|
|
508
325
|
-------
|
|
509
326
|
int
|
|
510
327
|
Zero on success.
|
|
511
328
|
"""
|
|
512
|
-
|
|
513
|
-
explicit_format = _explicit_cli_format(args)
|
|
329
|
+
explicit_format = target_format if format_explicit else None
|
|
514
330
|
|
|
515
331
|
# Allow piping into load.
|
|
516
|
-
source_format = getattr(args, 'source_format', None)
|
|
517
332
|
source_value = cast(
|
|
518
333
|
str | Path | os.PathLike[str] | dict[str, Any] | list[dict[str, Any]],
|
|
519
|
-
|
|
520
|
-
|
|
334
|
+
cli_io.resolve_cli_payload(
|
|
335
|
+
source,
|
|
521
336
|
format_hint=source_format,
|
|
522
337
|
format_explicit=source_format is not None,
|
|
523
338
|
hydrate_files=False,
|
|
@@ -525,93 +340,81 @@ def load_handler(
|
|
|
525
340
|
)
|
|
526
341
|
|
|
527
342
|
# Allow piping out of load for file targets.
|
|
528
|
-
if
|
|
529
|
-
payload =
|
|
343
|
+
if target_type == 'file' and target == '-':
|
|
344
|
+
payload = cli_io.materialize_file_payload(
|
|
530
345
|
source_value,
|
|
531
346
|
format_hint=source_format,
|
|
532
347
|
format_explicit=source_format is not None,
|
|
533
348
|
)
|
|
534
|
-
|
|
349
|
+
cli_io.emit_json(payload, pretty=pretty)
|
|
535
350
|
return 0
|
|
536
351
|
|
|
537
352
|
result = load(
|
|
538
353
|
source_value,
|
|
539
|
-
|
|
540
|
-
|
|
354
|
+
target_type,
|
|
355
|
+
target,
|
|
541
356
|
file_format=explicit_format,
|
|
542
357
|
)
|
|
543
358
|
|
|
544
|
-
output_path =
|
|
545
|
-
|
|
359
|
+
output_path = output
|
|
360
|
+
cli_io.emit_or_write(
|
|
546
361
|
result,
|
|
547
362
|
output_path,
|
|
363
|
+
pretty=pretty,
|
|
548
364
|
success_message='Load result saved to',
|
|
549
|
-
)
|
|
550
|
-
_emit_json(result, pretty=pretty)
|
|
365
|
+
)
|
|
551
366
|
|
|
552
367
|
return 0
|
|
553
368
|
|
|
554
369
|
|
|
555
|
-
def
|
|
556
|
-
|
|
370
|
+
def render_handler(
|
|
371
|
+
*,
|
|
372
|
+
config: str | None = None,
|
|
373
|
+
spec: str | None = None,
|
|
374
|
+
table: str | None = None,
|
|
375
|
+
template: TemplateKey | None = None,
|
|
376
|
+
template_path: str | None = None,
|
|
377
|
+
output: str | None = None,
|
|
378
|
+
pretty: bool = True,
|
|
379
|
+
quiet: bool = False,
|
|
557
380
|
) -> int:
|
|
558
381
|
"""
|
|
559
|
-
|
|
382
|
+
Render SQL DDL statements from table schema specs.
|
|
560
383
|
|
|
561
384
|
Parameters
|
|
562
385
|
----------
|
|
563
|
-
|
|
564
|
-
|
|
386
|
+
config : str | None, optional
|
|
387
|
+
Path to a pipeline YAML configuration. Default is ``None``.
|
|
388
|
+
spec : str | None, optional
|
|
389
|
+
Path to a standalone table spec file. Default is ``None``.
|
|
390
|
+
table : str | None, optional
|
|
391
|
+
Table name filter. Default is ``None``.
|
|
392
|
+
template : TemplateKey | None, optional
|
|
393
|
+
The template key to use for rendering. Default is ``None``.
|
|
394
|
+
template_path : str | None, optional
|
|
395
|
+
Path to a custom template file. Default is ``None``.
|
|
396
|
+
output : str | None, optional
|
|
397
|
+
Path to write output SQL. Default is ``None``.
|
|
398
|
+
pretty : bool, optional
|
|
399
|
+
Whether to pretty-print output. Default is ``True``.
|
|
400
|
+
quiet : bool, optional
|
|
401
|
+
Whether to suppress non-error output. Default is ``False``.
|
|
565
402
|
|
|
566
403
|
Returns
|
|
567
404
|
-------
|
|
568
405
|
int
|
|
569
406
|
Zero on success.
|
|
570
407
|
"""
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
cfg = load_pipeline_config(args.config, substitute=True)
|
|
578
|
-
|
|
579
|
-
list_flag = getattr(args, 'list', False) or getattr(args, 'jobs', False)
|
|
580
|
-
run_target = (
|
|
581
|
-
getattr(args, 'run', None)
|
|
582
|
-
or getattr(args, 'job', None)
|
|
583
|
-
or getattr(args, 'pipeline', None)
|
|
584
|
-
)
|
|
585
|
-
|
|
586
|
-
if list_flag and not run_target:
|
|
587
|
-
print_json({'jobs': _pipeline_summary(cfg)['jobs']})
|
|
588
|
-
return 0
|
|
589
|
-
|
|
590
|
-
if run_target:
|
|
591
|
-
result = run(job=run_target, config_path=args.config)
|
|
592
|
-
print_json({'status': 'ok', 'result': result})
|
|
593
|
-
return 0
|
|
594
|
-
|
|
595
|
-
print_json(_pipeline_summary(cfg))
|
|
596
|
-
return 0
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
def render_handler(
|
|
600
|
-
args: argparse.Namespace,
|
|
601
|
-
) -> int:
|
|
602
|
-
"""Render SQL DDL statements from table schema specs."""
|
|
603
|
-
_, quiet = _presentation_flags(args)
|
|
604
|
-
|
|
605
|
-
template_value = getattr(args, 'template', 'ddl') or 'ddl'
|
|
606
|
-
template_path = getattr(args, 'template_path', None)
|
|
607
|
-
table_filter = getattr(args, 'table', None)
|
|
608
|
-
spec_path = getattr(args, 'spec', None)
|
|
609
|
-
config_path = getattr(args, 'config', None)
|
|
408
|
+
template_value: TemplateKey = template or 'ddl'
|
|
409
|
+
template_path_override = template_path
|
|
410
|
+
table_filter = table
|
|
411
|
+
spec_path = spec
|
|
412
|
+
config_path = config
|
|
610
413
|
|
|
611
414
|
# If the provided template points to a file, treat it as a path override.
|
|
612
|
-
file_override =
|
|
613
|
-
template_key = template_value
|
|
614
|
-
if
|
|
415
|
+
file_override = template_path_override
|
|
416
|
+
template_key: TemplateKey | None = template_value
|
|
417
|
+
if template_path_override is None:
|
|
615
418
|
candidate_path = Path(template_value)
|
|
616
419
|
if candidate_path.exists():
|
|
617
420
|
file_override = str(candidate_path)
|
|
@@ -644,131 +447,210 @@ def render_handler(
|
|
|
644
447
|
sql_text = (
|
|
645
448
|
'\n'.join(chunk.rstrip() for chunk in rendered_chunks).rstrip() + '\n'
|
|
646
449
|
)
|
|
450
|
+
rendered_output = sql_text if pretty else sql_text.rstrip('\n')
|
|
647
451
|
|
|
648
|
-
output_path =
|
|
452
|
+
output_path = output
|
|
649
453
|
if output_path and output_path != '-':
|
|
650
|
-
Path(output_path).write_text(
|
|
454
|
+
Path(output_path).write_text(rendered_output, encoding='utf-8')
|
|
651
455
|
if not quiet:
|
|
652
456
|
print(f'Rendered {len(specs)} schema(s) to {output_path}')
|
|
653
457
|
return 0
|
|
654
458
|
|
|
655
|
-
print(
|
|
459
|
+
print(rendered_output)
|
|
656
460
|
return 0
|
|
657
461
|
|
|
658
462
|
|
|
659
463
|
def run_handler(
|
|
660
|
-
|
|
464
|
+
*,
|
|
465
|
+
config: str,
|
|
466
|
+
job: str | None = None,
|
|
467
|
+
pipeline: str | None = None,
|
|
468
|
+
pretty: bool = True,
|
|
661
469
|
) -> int:
|
|
662
470
|
"""
|
|
663
471
|
Execute an ETL job end-to-end from a pipeline YAML configuration.
|
|
664
472
|
|
|
665
473
|
Parameters
|
|
666
474
|
----------
|
|
667
|
-
|
|
668
|
-
|
|
475
|
+
config : str
|
|
476
|
+
Path to the pipeline YAML configuration.
|
|
477
|
+
job : str | None, optional
|
|
478
|
+
Name of the job to run. If not provided, runs the entire pipeline.
|
|
479
|
+
Default is ``None``.
|
|
480
|
+
pipeline : str | None, optional
|
|
481
|
+
Alias for ``job``. Default is ``None``.
|
|
482
|
+
pretty : bool, optional
|
|
483
|
+
Whether to pretty-print output. Default is ``True``.
|
|
669
484
|
|
|
670
485
|
Returns
|
|
671
486
|
-------
|
|
672
487
|
int
|
|
673
488
|
Zero on success.
|
|
674
489
|
"""
|
|
675
|
-
cfg = load_pipeline_config(
|
|
490
|
+
cfg = load_pipeline_config(config, substitute=True)
|
|
676
491
|
|
|
677
|
-
job_name =
|
|
492
|
+
job_name = job or pipeline
|
|
678
493
|
if job_name:
|
|
679
|
-
result = run(job=job_name, config_path=
|
|
680
|
-
|
|
494
|
+
result = run(job=job_name, config_path=config)
|
|
495
|
+
cli_io.emit_json({'status': 'ok', 'result': result}, pretty=pretty)
|
|
681
496
|
return 0
|
|
682
497
|
|
|
683
|
-
|
|
498
|
+
cli_io.emit_json(_pipeline_summary(cfg), pretty=pretty)
|
|
684
499
|
return 0
|
|
685
500
|
|
|
686
501
|
|
|
502
|
+
TransformOperations = Mapping[
|
|
503
|
+
Literal['filter', 'map', 'select', 'sort', 'aggregate'],
|
|
504
|
+
Any,
|
|
505
|
+
]
|
|
506
|
+
|
|
507
|
+
|
|
687
508
|
def transform_handler(
|
|
688
|
-
|
|
509
|
+
*,
|
|
510
|
+
source: str,
|
|
511
|
+
operations: JSONData | str,
|
|
512
|
+
target: str | None = None,
|
|
513
|
+
source_format: str | None = None,
|
|
514
|
+
target_format: str | None = None,
|
|
515
|
+
pretty: bool = True,
|
|
516
|
+
format_explicit: bool = False,
|
|
689
517
|
) -> int:
|
|
690
518
|
"""
|
|
691
519
|
Transform data from a source.
|
|
692
520
|
|
|
693
521
|
Parameters
|
|
694
522
|
----------
|
|
695
|
-
|
|
696
|
-
|
|
523
|
+
source : str
|
|
524
|
+
The source payload (e.g., path, inline data).
|
|
525
|
+
operations : JSONData | str
|
|
526
|
+
The transformation operations (inline JSON or path).
|
|
527
|
+
target : str | None, optional
|
|
528
|
+
The target destination (e.g., path). Default is ``None``.
|
|
529
|
+
source_format : str | None, optional
|
|
530
|
+
An optional source format hint (e.g., 'json', 'csv'). Default is
|
|
531
|
+
``None``.
|
|
532
|
+
target_format : str | None, optional
|
|
533
|
+
An optional target format hint (e.g., 'json', 'csv'). Default is
|
|
534
|
+
``None``.
|
|
535
|
+
pretty : bool, optional
|
|
536
|
+
Whether to pretty-print output. Default is ``True``.
|
|
537
|
+
format_explicit : bool, optional
|
|
538
|
+
Whether the format hint was explicitly provided. Default is ``False``.
|
|
697
539
|
|
|
698
540
|
Returns
|
|
699
541
|
-------
|
|
700
542
|
int
|
|
701
543
|
Zero on success.
|
|
544
|
+
|
|
545
|
+
Raises
|
|
546
|
+
------
|
|
547
|
+
ValueError
|
|
548
|
+
If the operations payload is not a mapping.
|
|
702
549
|
"""
|
|
703
|
-
|
|
704
|
-
format_hint
|
|
705
|
-
format_explicit: bool = format_hint is not None
|
|
550
|
+
format_hint: str | None = source_format
|
|
551
|
+
format_explicit = format_hint is not None or format_explicit
|
|
706
552
|
|
|
707
553
|
payload = cast(
|
|
708
554
|
JSONData | str,
|
|
709
|
-
|
|
710
|
-
|
|
555
|
+
cli_io.resolve_cli_payload(
|
|
556
|
+
source,
|
|
711
557
|
format_hint=format_hint,
|
|
712
558
|
format_explicit=format_explicit,
|
|
713
559
|
),
|
|
714
560
|
)
|
|
715
561
|
|
|
716
|
-
|
|
562
|
+
operations_payload = cli_io.resolve_cli_payload(
|
|
563
|
+
operations,
|
|
564
|
+
format_hint=None,
|
|
565
|
+
format_explicit=format_explicit,
|
|
566
|
+
)
|
|
567
|
+
if not isinstance(operations_payload, dict):
|
|
568
|
+
raise ValueError('operations must resolve to a mapping of transforms')
|
|
717
569
|
|
|
718
|
-
|
|
719
|
-
data,
|
|
720
|
-
getattr(args, 'target', None),
|
|
721
|
-
success_message='Data transformed and saved to',
|
|
722
|
-
):
|
|
723
|
-
_emit_json(data, pretty=pretty)
|
|
570
|
+
data = transform(payload, cast(TransformOperations, operations_payload))
|
|
724
571
|
|
|
572
|
+
if target and target != '-':
|
|
573
|
+
File.write_file(target, data, file_format=target_format)
|
|
574
|
+
print(f'Data transformed and saved to {target}')
|
|
575
|
+
return 0
|
|
576
|
+
|
|
577
|
+
cli_io.emit_json(data, pretty=pretty)
|
|
725
578
|
return 0
|
|
726
579
|
|
|
727
580
|
|
|
728
581
|
def validate_handler(
|
|
729
|
-
|
|
582
|
+
*,
|
|
583
|
+
source: str,
|
|
584
|
+
rules: JSONData | str,
|
|
585
|
+
source_format: str | None = None,
|
|
586
|
+
target: str | None = None,
|
|
587
|
+
format_explicit: bool = False,
|
|
588
|
+
pretty: bool = True,
|
|
730
589
|
) -> int:
|
|
731
590
|
"""
|
|
732
591
|
Validate data from a source.
|
|
733
592
|
|
|
734
593
|
Parameters
|
|
735
594
|
----------
|
|
736
|
-
|
|
737
|
-
|
|
595
|
+
source : str
|
|
596
|
+
The source payload (e.g., path, inline data).
|
|
597
|
+
rules : JSONData | str
|
|
598
|
+
The validation rules (inline JSON or path).
|
|
599
|
+
source_format : str | None, optional
|
|
600
|
+
An optional source format hint (e.g., 'json', 'csv'). Default is
|
|
601
|
+
``None``.
|
|
602
|
+
target : str | None, optional
|
|
603
|
+
The target destination (e.g., path). Default is ``None``.
|
|
604
|
+
format_explicit : bool, optional
|
|
605
|
+
Whether the format hint was explicitly provided. Default is ``False``.
|
|
606
|
+
pretty : bool, optional
|
|
607
|
+
Whether to pretty-print output. Default is ``True``.
|
|
738
608
|
|
|
739
609
|
Returns
|
|
740
610
|
-------
|
|
741
611
|
int
|
|
742
612
|
Zero on success.
|
|
613
|
+
|
|
614
|
+
Raises
|
|
615
|
+
------
|
|
616
|
+
ValueError
|
|
617
|
+
If the rules payload is not a mapping.
|
|
743
618
|
"""
|
|
744
|
-
|
|
745
|
-
format_explicit: bool = getattr(args, '_format_explicit', False)
|
|
746
|
-
format_hint: str | None = getattr(args, 'source_format', None)
|
|
619
|
+
format_hint: str | None = source_format
|
|
747
620
|
payload = cast(
|
|
748
621
|
JSONData | str,
|
|
749
|
-
|
|
750
|
-
|
|
622
|
+
cli_io.resolve_cli_payload(
|
|
623
|
+
source,
|
|
751
624
|
format_hint=format_hint,
|
|
752
625
|
format_explicit=format_explicit,
|
|
753
626
|
),
|
|
754
627
|
)
|
|
755
|
-
result = validate(payload, args.rules)
|
|
756
628
|
|
|
757
|
-
|
|
758
|
-
|
|
629
|
+
rules_payload = cli_io.resolve_cli_payload(
|
|
630
|
+
rules,
|
|
631
|
+
format_hint=None,
|
|
632
|
+
format_explicit=format_explicit,
|
|
633
|
+
)
|
|
634
|
+
if not isinstance(rules_payload, dict):
|
|
635
|
+
raise ValueError('rules must resolve to a mapping of field rules')
|
|
636
|
+
|
|
637
|
+
field_rules = cast(Mapping[str, FieldRules], rules_payload)
|
|
638
|
+
result = validate(payload, field_rules)
|
|
639
|
+
|
|
640
|
+
if target and target != '-':
|
|
759
641
|
validated_data = result.get('data')
|
|
760
642
|
if validated_data is not None:
|
|
761
|
-
|
|
643
|
+
cli_io.write_json_output(
|
|
762
644
|
validated_data,
|
|
763
|
-
|
|
645
|
+
target,
|
|
764
646
|
success_message='Validation result saved to',
|
|
765
647
|
)
|
|
766
648
|
else:
|
|
767
649
|
print(
|
|
768
|
-
f'Validation failed, no data to save for {
|
|
650
|
+
f'Validation failed, no data to save for {target}',
|
|
769
651
|
file=sys.stderr,
|
|
770
652
|
)
|
|
771
653
|
else:
|
|
772
|
-
|
|
654
|
+
cli_io.emit_json(result, pretty=pretty)
|
|
773
655
|
|
|
774
656
|
return 0
|