etlplus 0.4.1__py3-none-any.whl → 0.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/cli/app.py +669 -430
- etlplus/cli/handlers.py +175 -196
- etlplus/cli/main.py +131 -74
- {etlplus-0.4.1.dist-info → etlplus-0.4.5.dist-info}/METADATA +44 -36
- {etlplus-0.4.1.dist-info → etlplus-0.4.5.dist-info}/RECORD +9 -9
- {etlplus-0.4.1.dist-info → etlplus-0.4.5.dist-info}/WHEEL +0 -0
- {etlplus-0.4.1.dist-info → etlplus-0.4.5.dist-info}/entry_points.txt +0 -0
- {etlplus-0.4.1.dist-info → etlplus-0.4.5.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.4.1.dist-info → etlplus-0.4.5.dist-info}/top_level.txt +0 -0
etlplus/cli/handlers.py
CHANGED
|
@@ -14,7 +14,6 @@ import os
|
|
|
14
14
|
import sys
|
|
15
15
|
from pathlib import Path
|
|
16
16
|
from typing import Any
|
|
17
|
-
from typing import Literal
|
|
18
17
|
from typing import cast
|
|
19
18
|
|
|
20
19
|
from ..config import PipelineConfig
|
|
@@ -45,53 +44,9 @@ __all__ = [
|
|
|
45
44
|
]
|
|
46
45
|
|
|
47
46
|
|
|
48
|
-
# SECTION: INTERNAL CONSTANTS =============================================== #
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
# Standard output/error format behavior states
|
|
52
|
-
_FORMAT_ERROR_STATES = {'error', 'fail', 'strict'}
|
|
53
|
-
_FORMAT_SILENT_STATES = {'ignore', 'silent'}
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
# SECTION: CONSTANTS ======================================================== #
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
FORMAT_ENV_KEY = 'ETLPLUS_FORMAT_BEHAVIOR'
|
|
60
|
-
|
|
61
|
-
|
|
62
47
|
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
63
48
|
|
|
64
49
|
|
|
65
|
-
def _emit_behavioral_notice(
|
|
66
|
-
message: str,
|
|
67
|
-
behavior: str,
|
|
68
|
-
*,
|
|
69
|
-
quiet: bool,
|
|
70
|
-
) -> None:
|
|
71
|
-
"""
|
|
72
|
-
Emit or raise format-behavior notices.
|
|
73
|
-
|
|
74
|
-
Parameters
|
|
75
|
-
----------
|
|
76
|
-
message : str
|
|
77
|
-
Warning message describing the ignored ``--format`` flag.
|
|
78
|
-
behavior : str
|
|
79
|
-
Effective format-behavior mode derived from CLI options and env.
|
|
80
|
-
quiet : bool
|
|
81
|
-
Whether non-essential warnings should be suppressed.
|
|
82
|
-
|
|
83
|
-
Raises
|
|
84
|
-
------
|
|
85
|
-
ValueError
|
|
86
|
-
If ``behavior`` maps to an error state.
|
|
87
|
-
"""
|
|
88
|
-
if behavior in _FORMAT_ERROR_STATES:
|
|
89
|
-
raise ValueError(message)
|
|
90
|
-
if behavior in _FORMAT_SILENT_STATES or quiet:
|
|
91
|
-
return
|
|
92
|
-
print(f'Warning: {message}', file=sys.stderr)
|
|
93
|
-
|
|
94
|
-
|
|
95
50
|
def _emit_json(
|
|
96
51
|
data: Any,
|
|
97
52
|
*,
|
|
@@ -120,62 +75,6 @@ def _emit_json(
|
|
|
120
75
|
print(dumped)
|
|
121
76
|
|
|
122
77
|
|
|
123
|
-
def _format_behavior(
|
|
124
|
-
strict: bool,
|
|
125
|
-
) -> str:
|
|
126
|
-
"""
|
|
127
|
-
Return the effective format-behavior mode.
|
|
128
|
-
|
|
129
|
-
Parameters
|
|
130
|
-
----------
|
|
131
|
-
strict : bool
|
|
132
|
-
Whether to enforce strict format behavior.
|
|
133
|
-
|
|
134
|
-
Returns
|
|
135
|
-
-------
|
|
136
|
-
str
|
|
137
|
-
The effective format-behavior mode.
|
|
138
|
-
"""
|
|
139
|
-
if strict:
|
|
140
|
-
return 'error'
|
|
141
|
-
env_value = os.getenv(FORMAT_ENV_KEY, 'warn')
|
|
142
|
-
return (env_value or 'warn').strip().lower()
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
def _handle_format_guard(
|
|
146
|
-
*,
|
|
147
|
-
io_context: Literal['source', 'target'],
|
|
148
|
-
resource_type: str,
|
|
149
|
-
format_explicit: bool,
|
|
150
|
-
strict: bool,
|
|
151
|
-
quiet: bool,
|
|
152
|
-
) -> None:
|
|
153
|
-
"""
|
|
154
|
-
Warn or raise when --format is used alongside file resources.
|
|
155
|
-
|
|
156
|
-
Parameters
|
|
157
|
-
----------
|
|
158
|
-
io_context : Literal['source', 'target']
|
|
159
|
-
Whether this is a source or target resource.
|
|
160
|
-
resource_type : str
|
|
161
|
-
The type of resource being processed.
|
|
162
|
-
format_explicit : bool
|
|
163
|
-
Whether the --format option was explicitly provided.
|
|
164
|
-
strict : bool
|
|
165
|
-
Whether to enforce strict format behavior.
|
|
166
|
-
quiet : bool
|
|
167
|
-
Whether to suppress warnings.
|
|
168
|
-
"""
|
|
169
|
-
if resource_type != 'file' or not format_explicit:
|
|
170
|
-
return
|
|
171
|
-
message = (
|
|
172
|
-
f'--format is ignored for file {io_context}s; '
|
|
173
|
-
'inferred from filename extension.'
|
|
174
|
-
)
|
|
175
|
-
behavior = _format_behavior(strict)
|
|
176
|
-
_emit_behavioral_notice(message, behavior, quiet=quiet)
|
|
177
|
-
|
|
178
|
-
|
|
179
78
|
def _infer_payload_format(
|
|
180
79
|
text: str,
|
|
181
80
|
) -> str:
|
|
@@ -233,28 +132,75 @@ def _list_sections(
|
|
|
233
132
|
return sections
|
|
234
133
|
|
|
235
134
|
|
|
236
|
-
def
|
|
135
|
+
def _explicit_cli_format(
|
|
136
|
+
args: argparse.Namespace,
|
|
137
|
+
) -> str | None:
|
|
138
|
+
"""Return the explicit CLI format hint when provided."""
|
|
139
|
+
|
|
140
|
+
if not getattr(args, '_format_explicit', False):
|
|
141
|
+
return None
|
|
142
|
+
for attr in ('format', 'target_format', 'source_format'):
|
|
143
|
+
value = getattr(args, attr, None)
|
|
144
|
+
if value is None:
|
|
145
|
+
continue
|
|
146
|
+
normalized = value.strip().lower()
|
|
147
|
+
if normalized:
|
|
148
|
+
return normalized
|
|
149
|
+
return None
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _materialize_file_payload(
|
|
237
153
|
source: object,
|
|
238
|
-
|
|
154
|
+
*,
|
|
155
|
+
format_hint: str | None,
|
|
156
|
+
format_explicit: bool,
|
|
157
|
+
) -> JSONData | object:
|
|
239
158
|
"""
|
|
240
|
-
Return
|
|
159
|
+
Return structured payloads when ``source`` references a file.
|
|
241
160
|
|
|
242
161
|
Parameters
|
|
243
162
|
----------
|
|
244
163
|
source : object
|
|
245
|
-
|
|
164
|
+
Input source of data, possibly a file path.
|
|
165
|
+
format_hint : str | None
|
|
166
|
+
Explicit format hint: 'json', 'csv', or None to infer.
|
|
167
|
+
format_explicit : bool
|
|
168
|
+
Whether an explicit format hint was provided.
|
|
246
169
|
|
|
247
170
|
Returns
|
|
248
171
|
-------
|
|
249
|
-
JSONData |
|
|
250
|
-
Parsed
|
|
172
|
+
JSONData | object
|
|
173
|
+
Parsed JSON data when ``source`` is a file; otherwise the original
|
|
174
|
+
``source`` object.
|
|
251
175
|
"""
|
|
252
|
-
if
|
|
176
|
+
if isinstance(source, (dict, list)):
|
|
253
177
|
return cast(JSONData, source)
|
|
178
|
+
if not isinstance(source, (str, os.PathLike)):
|
|
179
|
+
return source
|
|
180
|
+
|
|
254
181
|
path = Path(source)
|
|
255
|
-
|
|
182
|
+
|
|
183
|
+
normalized_hint = (format_hint or '').strip().lower()
|
|
184
|
+
fmt: FileFormat | None = None
|
|
185
|
+
|
|
186
|
+
if format_explicit and normalized_hint:
|
|
187
|
+
try:
|
|
188
|
+
fmt = FileFormat(normalized_hint)
|
|
189
|
+
except ValueError:
|
|
190
|
+
fmt = None
|
|
191
|
+
elif not format_explicit:
|
|
192
|
+
suffix = path.suffix.lower().lstrip('.')
|
|
193
|
+
if suffix:
|
|
194
|
+
try:
|
|
195
|
+
fmt = FileFormat(suffix)
|
|
196
|
+
except ValueError:
|
|
197
|
+
fmt = None
|
|
198
|
+
|
|
199
|
+
if fmt is None:
|
|
256
200
|
return source
|
|
257
|
-
|
|
201
|
+
if fmt == FileFormat.CSV:
|
|
202
|
+
return _read_csv_rows(path)
|
|
203
|
+
return File(path, fmt).read()
|
|
258
204
|
|
|
259
205
|
|
|
260
206
|
def _parse_text_payload(
|
|
@@ -365,6 +311,50 @@ def _read_stdin_text() -> str:
|
|
|
365
311
|
return sys.stdin.read()
|
|
366
312
|
|
|
367
313
|
|
|
314
|
+
def _resolve_cli_payload(
|
|
315
|
+
source: object,
|
|
316
|
+
*,
|
|
317
|
+
format_hint: str | None,
|
|
318
|
+
format_explicit: bool,
|
|
319
|
+
hydrate_files: bool = True,
|
|
320
|
+
) -> JSONData | object:
|
|
321
|
+
"""
|
|
322
|
+
Normalize CLI-provided payloads, honoring stdin and inline data.
|
|
323
|
+
|
|
324
|
+
Parameters
|
|
325
|
+
----------
|
|
326
|
+
source : object
|
|
327
|
+
Raw CLI value (path, inline payload, or ``'-'`` for stdin).
|
|
328
|
+
format_hint : str | None
|
|
329
|
+
Explicit format hint supplied by the CLI option.
|
|
330
|
+
format_explicit : bool
|
|
331
|
+
Flag indicating whether the format hint was explicitly provided.
|
|
332
|
+
hydrate_files : bool, optional
|
|
333
|
+
When ``True`` (default) materialize file paths into structured data.
|
|
334
|
+
When ``False``, keep the original path so downstream code can stream
|
|
335
|
+
from disk directly.
|
|
336
|
+
|
|
337
|
+
Returns
|
|
338
|
+
-------
|
|
339
|
+
JSONData | object
|
|
340
|
+
Parsed payload or the original source value when hydration is
|
|
341
|
+
disabled.
|
|
342
|
+
"""
|
|
343
|
+
|
|
344
|
+
if isinstance(source, (os.PathLike, str)) and str(source) == '-':
|
|
345
|
+
text = _read_stdin_text()
|
|
346
|
+
return _parse_text_payload(text, format_hint)
|
|
347
|
+
|
|
348
|
+
if not hydrate_files:
|
|
349
|
+
return source
|
|
350
|
+
|
|
351
|
+
return _materialize_file_payload(
|
|
352
|
+
source,
|
|
353
|
+
format_hint=format_hint,
|
|
354
|
+
format_explicit=format_explicit,
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
|
|
368
358
|
def _write_json_output(
|
|
369
359
|
data: Any,
|
|
370
360
|
output_path: str | None,
|
|
@@ -414,39 +404,28 @@ def cmd_extract(
|
|
|
414
404
|
int
|
|
415
405
|
Zero on success.
|
|
416
406
|
"""
|
|
417
|
-
pretty,
|
|
418
|
-
|
|
419
|
-
_handle_format_guard(
|
|
420
|
-
io_context='source',
|
|
421
|
-
resource_type=args.source_type,
|
|
422
|
-
format_explicit=getattr(args, '_format_explicit', False),
|
|
423
|
-
strict=getattr(args, 'strict_format', False),
|
|
424
|
-
quiet=quiet,
|
|
425
|
-
)
|
|
407
|
+
pretty, _ = _presentation_flags(args)
|
|
408
|
+
explicit_format = _explicit_cli_format(args)
|
|
426
409
|
|
|
427
410
|
if args.source == '-':
|
|
428
411
|
text = _read_stdin_text()
|
|
429
412
|
payload = _parse_text_payload(text, getattr(args, 'format', None))
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
getattr(args, 'output', None),
|
|
433
|
-
success_message='Data extracted and saved to',
|
|
434
|
-
):
|
|
435
|
-
_emit_json(payload, pretty=pretty)
|
|
413
|
+
_emit_json(payload, pretty=pretty)
|
|
414
|
+
|
|
436
415
|
return 0
|
|
437
416
|
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
)
|
|
417
|
+
result = extract(
|
|
418
|
+
args.source_type,
|
|
419
|
+
args.source,
|
|
420
|
+
file_format=explicit_format,
|
|
421
|
+
)
|
|
422
|
+
output_path = getattr(args, 'target', None)
|
|
423
|
+
if output_path is None:
|
|
424
|
+
output_path = getattr(args, 'output', None)
|
|
446
425
|
|
|
447
426
|
if not _write_json_output(
|
|
448
427
|
result,
|
|
449
|
-
|
|
428
|
+
output_path,
|
|
450
429
|
success_message='Data extracted and saved to',
|
|
451
430
|
):
|
|
452
431
|
_emit_json(result, pretty=pretty)
|
|
@@ -470,30 +449,31 @@ def cmd_validate(
|
|
|
470
449
|
int
|
|
471
450
|
Zero on success.
|
|
472
451
|
"""
|
|
473
|
-
pretty,
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
452
|
+
pretty, _quiet = _presentation_flags(args)
|
|
453
|
+
format_explicit: bool = getattr(args, '_format_explicit', False)
|
|
454
|
+
format_hint: str | None = getattr(args, 'source_format', None)
|
|
455
|
+
payload = cast(
|
|
456
|
+
JSONData | str,
|
|
457
|
+
_resolve_cli_payload(
|
|
458
|
+
args.source,
|
|
459
|
+
format_hint=format_hint,
|
|
460
|
+
format_explicit=format_explicit,
|
|
461
|
+
),
|
|
462
|
+
)
|
|
483
463
|
result = validate(payload, args.rules)
|
|
484
464
|
|
|
485
|
-
|
|
486
|
-
if
|
|
465
|
+
target_path = getattr(args, 'target', None)
|
|
466
|
+
if target_path:
|
|
487
467
|
validated_data = result.get('data')
|
|
488
468
|
if validated_data is not None:
|
|
489
469
|
_write_json_output(
|
|
490
470
|
validated_data,
|
|
491
|
-
|
|
471
|
+
target_path,
|
|
492
472
|
success_message='Validation result saved to',
|
|
493
473
|
)
|
|
494
474
|
else:
|
|
495
475
|
print(
|
|
496
|
-
f'Validation failed, no data to save for {
|
|
476
|
+
f'Validation failed, no data to save for {target_path}',
|
|
497
477
|
file=sys.stderr,
|
|
498
478
|
)
|
|
499
479
|
else:
|
|
@@ -518,22 +498,24 @@ def cmd_transform(
|
|
|
518
498
|
int
|
|
519
499
|
Zero on success.
|
|
520
500
|
"""
|
|
521
|
-
pretty,
|
|
501
|
+
pretty, _quiet = _presentation_flags(args)
|
|
502
|
+
format_hint: str | None = getattr(args, 'source_format', None)
|
|
503
|
+
format_explicit: bool = format_hint is not None
|
|
522
504
|
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
505
|
+
payload = cast(
|
|
506
|
+
JSONData | str,
|
|
507
|
+
_resolve_cli_payload(
|
|
508
|
+
args.source,
|
|
509
|
+
format_hint=format_hint,
|
|
510
|
+
format_explicit=format_explicit,
|
|
511
|
+
),
|
|
512
|
+
)
|
|
531
513
|
|
|
532
514
|
data = transform(payload, args.operations)
|
|
533
515
|
|
|
534
516
|
if not _write_json_output(
|
|
535
517
|
data,
|
|
536
|
-
getattr(args, '
|
|
518
|
+
getattr(args, 'target', None),
|
|
537
519
|
success_message='Data transformed and saved to',
|
|
538
520
|
):
|
|
539
521
|
_emit_json(data, pretty=pretty)
|
|
@@ -557,52 +539,43 @@ def cmd_load(
|
|
|
557
539
|
int
|
|
558
540
|
Zero on success.
|
|
559
541
|
"""
|
|
560
|
-
pretty,
|
|
561
|
-
|
|
562
|
-
_handle_format_guard(
|
|
563
|
-
io_context='target',
|
|
564
|
-
resource_type=args.target_type,
|
|
565
|
-
format_explicit=getattr(args, '_format_explicit', False),
|
|
566
|
-
strict=getattr(args, 'strict_format', False),
|
|
567
|
-
quiet=quiet,
|
|
568
|
-
)
|
|
542
|
+
pretty, _ = _presentation_flags(args)
|
|
543
|
+
explicit_format = _explicit_cli_format(args)
|
|
569
544
|
|
|
570
545
|
# Allow piping into load.
|
|
571
|
-
|
|
572
|
-
|
|
546
|
+
source_format = getattr(args, 'source_format', None)
|
|
547
|
+
source_value = cast(
|
|
548
|
+
str | Path | os.PathLike[str] | dict[str, Any] | list[dict[str, Any]],
|
|
549
|
+
_resolve_cli_payload(
|
|
550
|
+
args.source,
|
|
551
|
+
format_hint=source_format,
|
|
552
|
+
format_explicit=source_format is not None,
|
|
553
|
+
hydrate_files=False,
|
|
554
|
+
),
|
|
573
555
|
)
|
|
574
|
-
if args.source == '-':
|
|
575
|
-
text = _read_stdin_text()
|
|
576
|
-
source_value = cast(
|
|
577
|
-
str | dict[str, Any] | list[dict[str, Any]],
|
|
578
|
-
_parse_text_payload(
|
|
579
|
-
text,
|
|
580
|
-
getattr(args, 'input_format', None),
|
|
581
|
-
),
|
|
582
|
-
)
|
|
583
|
-
else:
|
|
584
|
-
source_value = args.source
|
|
585
556
|
|
|
586
557
|
# Allow piping out of load for file targets.
|
|
587
558
|
if args.target_type == 'file' and args.target == '-':
|
|
588
|
-
payload =
|
|
559
|
+
payload = _materialize_file_payload(
|
|
560
|
+
source_value,
|
|
561
|
+
format_hint=source_format,
|
|
562
|
+
format_explicit=source_format is not None,
|
|
563
|
+
)
|
|
589
564
|
_emit_json(payload, pretty=pretty)
|
|
590
565
|
return 0
|
|
591
566
|
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
args.target,
|
|
599
|
-
file_format=getattr(args, 'format', None),
|
|
600
|
-
)
|
|
567
|
+
result = load(
|
|
568
|
+
source_value,
|
|
569
|
+
args.target_type,
|
|
570
|
+
args.target,
|
|
571
|
+
file_format=explicit_format,
|
|
572
|
+
)
|
|
601
573
|
|
|
574
|
+
output_path = getattr(args, 'output', None)
|
|
602
575
|
if not _write_json_output(
|
|
603
576
|
result,
|
|
604
|
-
|
|
605
|
-
success_message='
|
|
577
|
+
output_path,
|
|
578
|
+
success_message='Load result saved to',
|
|
606
579
|
):
|
|
607
580
|
_emit_json(result, pretty=pretty)
|
|
608
581
|
|
|
@@ -627,13 +600,19 @@ def cmd_pipeline(
|
|
|
627
600
|
"""
|
|
628
601
|
cfg = load_pipeline_config(args.config, substitute=True)
|
|
629
602
|
|
|
630
|
-
|
|
603
|
+
list_flag = getattr(args, 'list', False) or getattr(args, 'jobs', False)
|
|
604
|
+
run_target = (
|
|
605
|
+
getattr(args, 'run', None)
|
|
606
|
+
or getattr(args, 'job', None)
|
|
607
|
+
or getattr(args, 'pipeline', None)
|
|
608
|
+
)
|
|
609
|
+
|
|
610
|
+
if list_flag and not run_target:
|
|
631
611
|
print_json({'jobs': _pipeline_summary(cfg)['jobs']})
|
|
632
612
|
return 0
|
|
633
613
|
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
result = run(job=run_job, config_path=args.config)
|
|
614
|
+
if run_target:
|
|
615
|
+
result = run(job=run_target, config_path=args.config)
|
|
637
616
|
print_json({'status': 'ok', 'result': result})
|
|
638
617
|
return 0
|
|
639
618
|
|