etlplus 0.4.0__py3-none-any.whl → 0.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,665 @@
1
+ """
2
+ :mod:`etlplus.cli.handlers` module.
3
+
4
+ Command handler functions for the ``etlplus`` command-line interface (CLI).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import argparse
10
+ import csv
11
+ import io
12
+ import json
13
+ import os
14
+ import sys
15
+ from pathlib import Path
16
+ from typing import Any
17
+ from typing import cast
18
+
19
+ from ..config import PipelineConfig
20
+ from ..config import load_pipeline_config
21
+ from ..enums import FileFormat
22
+ from ..extract import extract
23
+ from ..file import File
24
+ from ..load import load
25
+ from ..run import run
26
+ from ..transform import transform
27
+ from ..types import JSONData
28
+ from ..utils import json_type
29
+ from ..utils import print_json
30
+ from ..validate import validate
31
+
32
+ # SECTION: EXPORTS ========================================================== #
33
+
34
+
35
+ __all__ = [
36
+ # Functions
37
+ 'cmd_extract',
38
+ 'cmd_list',
39
+ 'cmd_load',
40
+ 'cmd_pipeline',
41
+ 'cmd_run',
42
+ 'cmd_transform',
43
+ 'cmd_validate',
44
+ ]
45
+
46
+
47
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
48
+
49
+
50
+ def _emit_json(
51
+ data: Any,
52
+ *,
53
+ pretty: bool,
54
+ ) -> None:
55
+ """
56
+ Emit JSON to stdout honoring the pretty/compact preference.
57
+
58
+ Parameters
59
+ ----------
60
+ data : Any
61
+ Arbitrary JSON-serializable payload.
62
+ pretty : bool
63
+ When ``True`` pretty-print via :func:`print_json`; otherwise emit a
64
+ compact JSON string.
65
+ """
66
+ if pretty:
67
+ print_json(data)
68
+ return
69
+
70
+ dumped = json.dumps(
71
+ data,
72
+ ensure_ascii=False,
73
+ separators=(',', ':'),
74
+ )
75
+ print(dumped)
76
+
77
+
78
+ def _infer_payload_format(
79
+ text: str,
80
+ ) -> str:
81
+ """
82
+ Infer JSON vs CSV from payload text.
83
+
84
+ Parameters
85
+ ----------
86
+ text : str
87
+ Incoming payload as plain text.
88
+
89
+ Returns
90
+ -------
91
+ str
92
+ ``'json'`` when the text starts with ``{``/``[``, else ``'csv'``.
93
+ """
94
+ stripped = text.lstrip()
95
+ if stripped.startswith('{') or stripped.startswith('['):
96
+ return 'json'
97
+ return 'csv'
98
+
99
+
100
+ def _list_sections(
101
+ cfg: PipelineConfig,
102
+ args: argparse.Namespace,
103
+ ) -> dict[str, Any]:
104
+ """
105
+ Build sectioned metadata output for the list command.
106
+
107
+ Parameters
108
+ ----------
109
+ cfg : PipelineConfig
110
+ The loaded pipeline configuration.
111
+ args : argparse.Namespace
112
+ Parsed command-line arguments.
113
+
114
+ Returns
115
+ -------
116
+ dict[str, Any]
117
+ Metadata output for the list command.
118
+ """
119
+ sections: dict[str, Any] = {}
120
+ if getattr(args, 'pipelines', False):
121
+ sections['pipelines'] = [cfg.name]
122
+ if getattr(args, 'sources', False):
123
+ sections['sources'] = [src.name for src in cfg.sources]
124
+ if getattr(args, 'targets', False):
125
+ sections['targets'] = [tgt.name for tgt in cfg.targets]
126
+ if getattr(args, 'transforms', False):
127
+ sections['transforms'] = [
128
+ getattr(trf, 'name', None) for trf in cfg.transforms
129
+ ]
130
+ if not sections:
131
+ sections['jobs'] = _pipeline_summary(cfg)['jobs']
132
+ return sections
133
+
134
+
135
+ def _explicit_cli_format(
136
+ args: argparse.Namespace,
137
+ ) -> str | None:
138
+ """Return the explicit CLI format hint when provided."""
139
+
140
+ if not getattr(args, '_format_explicit', False):
141
+ return None
142
+ for attr in ('format', 'target_format', 'source_format'):
143
+ value = getattr(args, attr, None)
144
+ if value is None:
145
+ continue
146
+ normalized = value.strip().lower()
147
+ if normalized:
148
+ return normalized
149
+ return None
150
+
151
+
152
+ def _materialize_file_payload(
153
+ source: object,
154
+ *,
155
+ format_hint: str | None,
156
+ format_explicit: bool,
157
+ ) -> JSONData | object:
158
+ """
159
+ Return structured payloads when ``source`` references a file.
160
+
161
+ Parameters
162
+ ----------
163
+ source : object
164
+ Input source of data, possibly a file path.
165
+ format_hint : str | None
166
+ Explicit format hint: 'json', 'csv', or None to infer.
167
+ format_explicit : bool
168
+ Whether an explicit format hint was provided.
169
+
170
+ Returns
171
+ -------
172
+ JSONData | object
173
+ Parsed JSON data when ``source`` is a file; otherwise the original
174
+ ``source`` object.
175
+ """
176
+ if isinstance(source, (dict, list)):
177
+ return cast(JSONData, source)
178
+ if not isinstance(source, (str, os.PathLike)):
179
+ return source
180
+
181
+ path = Path(source)
182
+
183
+ normalized_hint = (format_hint or '').strip().lower()
184
+ fmt: FileFormat | None = None
185
+
186
+ if format_explicit and normalized_hint:
187
+ try:
188
+ fmt = FileFormat(normalized_hint)
189
+ except ValueError:
190
+ fmt = None
191
+ elif not format_explicit:
192
+ suffix = path.suffix.lower().lstrip('.')
193
+ if suffix:
194
+ try:
195
+ fmt = FileFormat(suffix)
196
+ except ValueError:
197
+ fmt = None
198
+
199
+ if fmt is None:
200
+ return source
201
+ if fmt == FileFormat.CSV:
202
+ return _read_csv_rows(path)
203
+ return File(path, fmt).read()
204
+
205
+
206
+ def _parse_text_payload(
207
+ text: str,
208
+ fmt: str | None,
209
+ ) -> JSONData | str:
210
+ """
211
+ Parse JSON/CSV text into a Python payload.
212
+
213
+ Parameters
214
+ ----------
215
+ text : str
216
+ The input text payload.
217
+ fmt : str | None
218
+ Explicit format hint: 'json', 'csv', or None to infer.
219
+
220
+ Returns
221
+ -------
222
+ JSONData | str
223
+ The parsed payload as JSON data or raw text.
224
+ """
225
+
226
+ effective = (fmt or '').strip().lower() or _infer_payload_format(text)
227
+ if effective == 'json':
228
+ return cast(JSONData, json_type(text))
229
+ if effective == 'csv':
230
+ reader = csv.DictReader(io.StringIO(text))
231
+ return [dict(row) for row in reader]
232
+ return text
233
+
234
+
235
+ def _pipeline_summary(
236
+ cfg: PipelineConfig,
237
+ ) -> dict[str, Any]:
238
+ """
239
+ Return a human-friendly snapshot of a pipeline config.
240
+
241
+ Parameters
242
+ ----------
243
+ cfg : PipelineConfig
244
+ The loaded pipeline configuration.
245
+
246
+ Returns
247
+ -------
248
+ dict[str, Any]
249
+ A human-friendly snapshot of a pipeline config.
250
+ """
251
+ sources = [src.name for src in cfg.sources]
252
+ targets = [tgt.name for tgt in cfg.targets]
253
+ jobs = [job.name for job in cfg.jobs]
254
+ return {
255
+ 'name': cfg.name,
256
+ 'version': cfg.version,
257
+ 'sources': sources,
258
+ 'targets': targets,
259
+ 'jobs': jobs,
260
+ }
261
+
262
+
263
+ def _presentation_flags(
264
+ args: argparse.Namespace,
265
+ ) -> tuple[bool, bool]:
266
+ """Return presentation toggles from the parsed namespace.
267
+
268
+ Parameters
269
+ ----------
270
+ args : argparse.Namespace
271
+ Namespace produced by the CLI parser.
272
+
273
+ Returns
274
+ -------
275
+ tuple[bool, bool]
276
+ Pair of ``(pretty, quiet)`` flags with safe defaults.
277
+ """
278
+ return getattr(args, 'pretty', True), getattr(args, 'quiet', False)
279
+
280
+
281
+ def _read_csv_rows(
282
+ path: Path,
283
+ ) -> list[dict[str, str]]:
284
+ """
285
+ Read CSV rows into dictionaries.
286
+
287
+ Parameters
288
+ ----------
289
+ path : Path
290
+ Path to a CSV file.
291
+
292
+ Returns
293
+ -------
294
+ list[dict[str, str]]
295
+ List of dictionaries, each representing a row in the CSV file.
296
+ """
297
+ with path.open(newline='', encoding='utf-8') as handle:
298
+ reader = csv.DictReader(handle)
299
+ return [dict(row) for row in reader]
300
+
301
+
302
+ def _read_stdin_text() -> str:
303
+ """
304
+ Return every character from ``stdin`` as a single string.
305
+
306
+ Returns
307
+ -------
308
+ str
309
+ Entire ``stdin`` contents.
310
+ """
311
+ return sys.stdin.read()
312
+
313
+
314
+ def _resolve_cli_payload(
315
+ source: object,
316
+ *,
317
+ format_hint: str | None,
318
+ format_explicit: bool,
319
+ hydrate_files: bool = True,
320
+ ) -> JSONData | object:
321
+ """
322
+ Normalize CLI-provided payloads, honoring stdin and inline data.
323
+
324
+ Parameters
325
+ ----------
326
+ source : object
327
+ Raw CLI value (path, inline payload, or ``'-'`` for stdin).
328
+ format_hint : str | None
329
+ Explicit format hint supplied by the CLI option.
330
+ format_explicit : bool
331
+ Flag indicating whether the format hint was explicitly provided.
332
+ hydrate_files : bool, optional
333
+ When ``True`` (default) materialize file paths into structured data.
334
+ When ``False``, keep the original path so downstream code can stream
335
+ from disk directly.
336
+
337
+ Returns
338
+ -------
339
+ JSONData | object
340
+ Parsed payload or the original source value when hydration is
341
+ disabled.
342
+ """
343
+
344
+ if isinstance(source, (os.PathLike, str)) and str(source) == '-':
345
+ text = _read_stdin_text()
346
+ return _parse_text_payload(text, format_hint)
347
+
348
+ if not hydrate_files:
349
+ return source
350
+
351
+ return _materialize_file_payload(
352
+ source,
353
+ format_hint=format_hint,
354
+ format_explicit=format_explicit,
355
+ )
356
+
357
+
358
+ def _write_json_output(
359
+ data: Any,
360
+ output_path: str | None,
361
+ *,
362
+ success_message: str,
363
+ ) -> bool:
364
+ """
365
+ Optionally persist JSON data to disk.
366
+
367
+ Parameters
368
+ ----------
369
+ data : Any
370
+ Data to write.
371
+ output_path : str | None
372
+ Path to write the output to. None to print to stdout.
373
+ success_message : str
374
+ Message to print upon successful write.
375
+
376
+ Returns
377
+ -------
378
+ bool
379
+ True if output was written to a file, False if printed to stdout.
380
+ """
381
+ if not output_path or output_path == '-':
382
+ return False
383
+ File(Path(output_path), FileFormat.JSON).write_json(data)
384
+ print(f'{success_message} {output_path}')
385
+ return True
386
+
387
+
388
+ # SECTION: FUNCTIONS ======================================================== #
389
+
390
+
391
+ def cmd_extract(
392
+ args: argparse.Namespace,
393
+ ) -> int:
394
+ """
395
+ Extract data from a source.
396
+
397
+ Parameters
398
+ ----------
399
+ args : argparse.Namespace
400
+ Parsed command-line arguments.
401
+
402
+ Returns
403
+ -------
404
+ int
405
+ Zero on success.
406
+ """
407
+ pretty, _ = _presentation_flags(args)
408
+ explicit_format = _explicit_cli_format(args)
409
+
410
+ if args.source == '-':
411
+ text = _read_stdin_text()
412
+ payload = _parse_text_payload(text, getattr(args, 'format', None))
413
+ _emit_json(payload, pretty=pretty)
414
+
415
+ return 0
416
+
417
+ result = extract(
418
+ args.source_type,
419
+ args.source,
420
+ file_format=explicit_format,
421
+ )
422
+ output_path = getattr(args, 'target', None)
423
+ if output_path is None:
424
+ output_path = getattr(args, 'output', None)
425
+
426
+ if not _write_json_output(
427
+ result,
428
+ output_path,
429
+ success_message='Data extracted and saved to',
430
+ ):
431
+ _emit_json(result, pretty=pretty)
432
+
433
+ return 0
434
+
435
+
436
+ def cmd_validate(
437
+ args: argparse.Namespace,
438
+ ) -> int:
439
+ """
440
+ Validate data from a source.
441
+
442
+ Parameters
443
+ ----------
444
+ args : argparse.Namespace
445
+ Parsed command-line arguments.
446
+
447
+ Returns
448
+ -------
449
+ int
450
+ Zero on success.
451
+ """
452
+ pretty, _quiet = _presentation_flags(args)
453
+ format_explicit: bool = getattr(args, '_format_explicit', False)
454
+ format_hint: str | None = getattr(args, 'source_format', None)
455
+ payload = cast(
456
+ JSONData | str,
457
+ _resolve_cli_payload(
458
+ args.source,
459
+ format_hint=format_hint,
460
+ format_explicit=format_explicit,
461
+ ),
462
+ )
463
+ result = validate(payload, args.rules)
464
+
465
+ target_path = getattr(args, 'target', None)
466
+ if target_path:
467
+ validated_data = result.get('data')
468
+ if validated_data is not None:
469
+ _write_json_output(
470
+ validated_data,
471
+ target_path,
472
+ success_message='Validation result saved to',
473
+ )
474
+ else:
475
+ print(
476
+ f'Validation failed, no data to save for {target_path}',
477
+ file=sys.stderr,
478
+ )
479
+ else:
480
+ _emit_json(result, pretty=pretty)
481
+
482
+ return 0
483
+
484
+
485
+ def cmd_transform(
486
+ args: argparse.Namespace,
487
+ ) -> int:
488
+ """
489
+ Transform data from a source.
490
+
491
+ Parameters
492
+ ----------
493
+ args : argparse.Namespace
494
+ Parsed command-line arguments.
495
+
496
+ Returns
497
+ -------
498
+ int
499
+ Zero on success.
500
+ """
501
+ pretty, _quiet = _presentation_flags(args)
502
+ format_hint: str | None = getattr(args, 'source_format', None)
503
+ format_explicit: bool = format_hint is not None
504
+
505
+ payload = cast(
506
+ JSONData | str,
507
+ _resolve_cli_payload(
508
+ args.source,
509
+ format_hint=format_hint,
510
+ format_explicit=format_explicit,
511
+ ),
512
+ )
513
+
514
+ data = transform(payload, args.operations)
515
+
516
+ if not _write_json_output(
517
+ data,
518
+ getattr(args, 'target', None),
519
+ success_message='Data transformed and saved to',
520
+ ):
521
+ _emit_json(data, pretty=pretty)
522
+
523
+ return 0
524
+
525
+
526
+ def cmd_load(
527
+ args: argparse.Namespace,
528
+ ) -> int:
529
+ """
530
+ Load data into a target.
531
+
532
+ Parameters
533
+ ----------
534
+ args : argparse.Namespace
535
+ Parsed command-line arguments.
536
+
537
+ Returns
538
+ -------
539
+ int
540
+ Zero on success.
541
+ """
542
+ pretty, _ = _presentation_flags(args)
543
+ explicit_format = _explicit_cli_format(args)
544
+
545
+ # Allow piping into load.
546
+ source_format = getattr(args, 'source_format', None)
547
+ source_value = cast(
548
+ str | Path | os.PathLike[str] | dict[str, Any] | list[dict[str, Any]],
549
+ _resolve_cli_payload(
550
+ args.source,
551
+ format_hint=source_format,
552
+ format_explicit=source_format is not None,
553
+ hydrate_files=False,
554
+ ),
555
+ )
556
+
557
+ # Allow piping out of load for file targets.
558
+ if args.target_type == 'file' and args.target == '-':
559
+ payload = _materialize_file_payload(
560
+ source_value,
561
+ format_hint=source_format,
562
+ format_explicit=source_format is not None,
563
+ )
564
+ _emit_json(payload, pretty=pretty)
565
+ return 0
566
+
567
+ result = load(
568
+ source_value,
569
+ args.target_type,
570
+ args.target,
571
+ file_format=explicit_format,
572
+ )
573
+
574
+ output_path = getattr(args, 'output', None)
575
+ if not _write_json_output(
576
+ result,
577
+ output_path,
578
+ success_message='Load result saved to',
579
+ ):
580
+ _emit_json(result, pretty=pretty)
581
+
582
+ return 0
583
+
584
+
585
+ def cmd_pipeline(
586
+ args: argparse.Namespace,
587
+ ) -> int:
588
+ """
589
+ Inspect or run a pipeline YAML configuration.
590
+
591
+ Parameters
592
+ ----------
593
+ args : argparse.Namespace
594
+ Parsed command-line arguments.
595
+
596
+ Returns
597
+ -------
598
+ int
599
+ Zero on success.
600
+ """
601
+ cfg = load_pipeline_config(args.config, substitute=True)
602
+
603
+ list_flag = getattr(args, 'list', False) or getattr(args, 'jobs', False)
604
+ run_target = (
605
+ getattr(args, 'run', None)
606
+ or getattr(args, 'job', None)
607
+ or getattr(args, 'pipeline', None)
608
+ )
609
+
610
+ if list_flag and not run_target:
611
+ print_json({'jobs': _pipeline_summary(cfg)['jobs']})
612
+ return 0
613
+
614
+ if run_target:
615
+ result = run(job=run_target, config_path=args.config)
616
+ print_json({'status': 'ok', 'result': result})
617
+ return 0
618
+
619
+ print_json(_pipeline_summary(cfg))
620
+ return 0
621
+
622
+
623
+ def cmd_list(args: argparse.Namespace) -> int:
624
+ """
625
+ Print requested pipeline sections from a YAML configuration.
626
+
627
+ Parameters
628
+ ----------
629
+ args : argparse.Namespace
630
+ Parsed command-line arguments.
631
+
632
+ Returns
633
+ -------
634
+ int
635
+ Zero on success.
636
+ """
637
+ cfg = load_pipeline_config(args.config, substitute=True)
638
+ print_json(_list_sections(cfg, args))
639
+ return 0
640
+
641
+
642
+ def cmd_run(args: argparse.Namespace) -> int:
643
+ """
644
+ Execute an ETL job end-to-end from a pipeline YAML configuration.
645
+
646
+ Parameters
647
+ ----------
648
+ args : argparse.Namespace
649
+ Parsed command-line arguments.
650
+
651
+ Returns
652
+ -------
653
+ int
654
+ Zero on success.
655
+ """
656
+ cfg = load_pipeline_config(args.config, substitute=True)
657
+
658
+ job_name = getattr(args, 'job', None) or getattr(args, 'pipeline', None)
659
+ if job_name:
660
+ result = run(job=job_name, config_path=args.config)
661
+ print_json({'status': 'ok', 'result': result})
662
+ return 0
663
+
664
+ print_json(_pipeline_summary(cfg))
665
+ return 0