etlplus 0.4.6__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
etlplus/cli/handlers.py CHANGED
@@ -18,6 +18,8 @@ from typing import cast
18
18
 
19
19
  from ..config import PipelineConfig
20
20
  from ..config import load_pipeline_config
21
+ from ..database import load_table_spec
22
+ from ..database import render_tables
21
23
  from ..enums import FileFormat
22
24
  from ..extract import extract
23
25
  from ..file import File
@@ -34,19 +36,51 @@ from ..validate import validate
34
36
 
35
37
  __all__ = [
36
38
  # Functions
37
- 'cmd_extract',
38
- 'cmd_list',
39
- 'cmd_load',
40
- 'cmd_pipeline',
41
- 'cmd_run',
42
- 'cmd_transform',
43
- 'cmd_validate',
39
+ 'extract_handler',
40
+ 'check_handler',
41
+ 'load_handler',
42
+ 'pipeline_handler',
43
+ 'render_handler',
44
+ 'run_handler',
45
+ 'transform_handler',
46
+ 'validate_handler',
44
47
  ]
45
48
 
46
49
 
47
50
  # SECTION: INTERNAL FUNCTIONS =============================================== #
48
51
 
49
52
 
53
+ def _collect_table_specs(
54
+ config_path: str | None,
55
+ spec_path: str | None,
56
+ ) -> list[dict[str, Any]]:
57
+ """
58
+ Load table schemas from a pipeline config and/or standalone spec.
59
+
60
+ Parameters
61
+ ----------
62
+ config_path : str | None
63
+ Path to a pipeline YAML config file.
64
+ spec_path : str | None
65
+ Path to a standalone table spec file.
66
+
67
+ Returns
68
+ -------
69
+ list[dict[str, Any]]
70
+ Collected table specification mappings.
71
+ """
72
+ specs: list[dict[str, Any]] = []
73
+
74
+ if spec_path:
75
+ specs.append(load_table_spec(Path(spec_path)))
76
+
77
+ if config_path:
78
+ cfg = load_pipeline_config(config_path, substitute=True)
79
+ specs.extend(getattr(cfg, 'table_schemas', []))
80
+
81
+ return specs
82
+
83
+
50
84
  def _emit_json(
51
85
  data: Any,
52
86
  *,
@@ -75,6 +109,23 @@ def _emit_json(
75
109
  print(dumped)
76
110
 
77
111
 
112
+ def _explicit_cli_format(
113
+ args: argparse.Namespace,
114
+ ) -> str | None:
115
+ """Return the explicit CLI format hint when provided."""
116
+
117
+ if not getattr(args, '_format_explicit', False):
118
+ return None
119
+ for attr in ('format', 'target_format', 'source_format'):
120
+ value = getattr(args, attr, None)
121
+ if value is None:
122
+ continue
123
+ normalized = value.strip().lower()
124
+ if normalized:
125
+ return normalized
126
+ return None
127
+
128
+
78
129
  def _infer_payload_format(
79
130
  text: str,
80
131
  ) -> str:
@@ -97,12 +148,12 @@ def _infer_payload_format(
97
148
  return 'csv'
98
149
 
99
150
 
100
- def _list_sections(
151
+ def _check_sections(
101
152
  cfg: PipelineConfig,
102
153
  args: argparse.Namespace,
103
154
  ) -> dict[str, Any]:
104
155
  """
105
- Build sectioned metadata output for the list command.
156
+ Build sectioned metadata output for the check command.
106
157
 
107
158
  Parameters
108
159
  ----------
@@ -114,9 +165,11 @@ def _list_sections(
114
165
  Returns
115
166
  -------
116
167
  dict[str, Any]
117
- Metadata output for the list command.
168
+ Metadata output for the check command.
118
169
  """
119
170
  sections: dict[str, Any] = {}
171
+ if getattr(args, 'jobs', False):
172
+ sections['jobs'] = _pipeline_summary(cfg)['jobs']
120
173
  if getattr(args, 'pipelines', False):
121
174
  sections['pipelines'] = [cfg.name]
122
175
  if getattr(args, 'sources', False):
@@ -132,23 +185,6 @@ def _list_sections(
132
185
  return sections
133
186
 
134
187
 
135
- def _explicit_cli_format(
136
- args: argparse.Namespace,
137
- ) -> str | None:
138
- """Return the explicit CLI format hint when provided."""
139
-
140
- if not getattr(args, '_format_explicit', False):
141
- return None
142
- for attr in ('format', 'target_format', 'source_format'):
143
- value = getattr(args, attr, None)
144
- if value is None:
145
- continue
146
- normalized = value.strip().lower()
147
- if normalized:
148
- return normalized
149
- return None
150
-
151
-
152
188
  def _materialize_file_payload(
153
189
  source: object,
154
190
  *,
@@ -222,7 +258,6 @@ def _parse_text_payload(
222
258
  JSONData | str
223
259
  The parsed payload as JSON data or raw text.
224
260
  """
225
-
226
261
  effective = (fmt or '').strip().lower() or _infer_payload_format(text)
227
262
  if effective == 'json':
228
263
  return cast(JSONData, json_type(text))
@@ -263,7 +298,8 @@ def _pipeline_summary(
263
298
  def _presentation_flags(
264
299
  args: argparse.Namespace,
265
300
  ) -> tuple[bool, bool]:
266
- """Return presentation toggles from the parsed namespace.
301
+ """
302
+ Return presentation toggles from the parsed namespace.
267
303
 
268
304
  Parameters
269
305
  ----------
@@ -340,7 +376,6 @@ def _resolve_cli_payload(
340
376
  Parsed payload or the original source value when hydration is
341
377
  disabled.
342
378
  """
343
-
344
379
  if isinstance(source, (os.PathLike, str)) and str(source) == '-':
345
380
  text = _read_stdin_text()
346
381
  return _parse_text_payload(text, format_hint)
@@ -388,11 +423,11 @@ def _write_json_output(
388
423
  # SECTION: FUNCTIONS ======================================================== #
389
424
 
390
425
 
391
- def cmd_extract(
426
+ def check_handler(
392
427
  args: argparse.Namespace,
393
428
  ) -> int:
394
429
  """
395
- Extract data from a source.
430
+ Print requested pipeline sections from a YAML configuration.
396
431
 
397
432
  Parameters
398
433
  ----------
@@ -404,40 +439,20 @@ def cmd_extract(
404
439
  int
405
440
  Zero on success.
406
441
  """
407
- pretty, _ = _presentation_flags(args)
408
- explicit_format = _explicit_cli_format(args)
409
-
410
- if args.source == '-':
411
- text = _read_stdin_text()
412
- payload = _parse_text_payload(text, getattr(args, 'format', None))
413
- _emit_json(payload, pretty=pretty)
414
-
442
+ cfg = load_pipeline_config(args.config, substitute=True)
443
+ if getattr(args, 'summary', False):
444
+ print_json(_pipeline_summary(cfg))
415
445
  return 0
416
446
 
417
- result = extract(
418
- args.source_type,
419
- args.source,
420
- file_format=explicit_format,
421
- )
422
- output_path = getattr(args, 'target', None)
423
- if output_path is None:
424
- output_path = getattr(args, 'output', None)
425
-
426
- if not _write_json_output(
427
- result,
428
- output_path,
429
- success_message='Data extracted and saved to',
430
- ):
431
- _emit_json(result, pretty=pretty)
432
-
447
+ print_json(_check_sections(cfg, args))
433
448
  return 0
434
449
 
435
450
 
436
- def cmd_validate(
451
+ def extract_handler(
437
452
  args: argparse.Namespace,
438
453
  ) -> int:
439
454
  """
440
- Validate data from a source.
455
+ Extract data from a source.
441
456
 
442
457
  Parameters
443
458
  ----------
@@ -449,81 +464,36 @@ def cmd_validate(
449
464
  int
450
465
  Zero on success.
451
466
  """
452
- pretty, _quiet = _presentation_flags(args)
453
- format_explicit: bool = getattr(args, '_format_explicit', False)
454
- format_hint: str | None = getattr(args, 'source_format', None)
455
- payload = cast(
456
- JSONData | str,
457
- _resolve_cli_payload(
458
- args.source,
459
- format_hint=format_hint,
460
- format_explicit=format_explicit,
461
- ),
462
- )
463
- result = validate(payload, args.rules)
464
-
465
- target_path = getattr(args, 'target', None)
466
- if target_path:
467
- validated_data = result.get('data')
468
- if validated_data is not None:
469
- _write_json_output(
470
- validated_data,
471
- target_path,
472
- success_message='Validation result saved to',
473
- )
474
- else:
475
- print(
476
- f'Validation failed, no data to save for {target_path}',
477
- file=sys.stderr,
478
- )
479
- else:
480
- _emit_json(result, pretty=pretty)
481
-
482
- return 0
483
-
484
-
485
- def cmd_transform(
486
- args: argparse.Namespace,
487
- ) -> int:
488
- """
489
- Transform data from a source.
467
+ pretty, _ = _presentation_flags(args)
468
+ explicit_format = _explicit_cli_format(args)
490
469
 
491
- Parameters
492
- ----------
493
- args : argparse.Namespace
494
- Parsed command-line arguments.
470
+ if args.source == '-':
471
+ text = _read_stdin_text()
472
+ payload = _parse_text_payload(text, getattr(args, 'format', None))
473
+ _emit_json(payload, pretty=pretty)
495
474
 
496
- Returns
497
- -------
498
- int
499
- Zero on success.
500
- """
501
- pretty, _quiet = _presentation_flags(args)
502
- format_hint: str | None = getattr(args, 'source_format', None)
503
- format_explicit: bool = format_hint is not None
475
+ return 0
504
476
 
505
- payload = cast(
506
- JSONData | str,
507
- _resolve_cli_payload(
508
- args.source,
509
- format_hint=format_hint,
510
- format_explicit=format_explicit,
511
- ),
477
+ result = extract(
478
+ args.source_type,
479
+ args.source,
480
+ file_format=explicit_format,
512
481
  )
513
-
514
- data = transform(payload, args.operations)
482
+ output_path = getattr(args, 'target', None)
483
+ if output_path is None:
484
+ output_path = getattr(args, 'output', None)
515
485
 
516
486
  if not _write_json_output(
517
- data,
518
- getattr(args, 'target', None),
519
- success_message='Data transformed and saved to',
487
+ result,
488
+ output_path,
489
+ success_message='Data extracted and saved to',
520
490
  ):
521
- _emit_json(data, pretty=pretty)
491
+ _emit_json(result, pretty=pretty)
522
492
 
523
493
  return 0
524
494
 
525
495
 
526
- def cmd_load(
496
+ def load_handler(
527
497
  args: argparse.Namespace,
528
498
  ) -> int:
529
499
  """
@@ -582,7 +552,7 @@ def cmd_load(
582
552
  return 0
583
553
 
584
554
 
585
- def cmd_pipeline(
555
+ def pipeline_handler(
586
556
  args: argparse.Namespace,
587
557
  ) -> int:
588
558
  """
@@ -598,6 +568,12 @@ def cmd_pipeline(
598
568
  int
599
569
  Zero on success.
600
570
  """
571
+ print(
572
+ 'DEPRECATED: use "etlplus check --summary|--jobs" or '
573
+ '"etlplus run --job/--pipeline" instead of "etlplus pipeline".',
574
+ file=sys.stderr,
575
+ )
576
+
601
577
  cfg = load_pipeline_config(args.config, substitute=True)
602
578
 
603
579
  list_flag = getattr(args, 'list', False) or getattr(args, 'jobs', False)
@@ -620,9 +596,71 @@ def cmd_pipeline(
620
596
  return 0
621
597
 
622
598
 
623
- def cmd_list(args: argparse.Namespace) -> int:
599
+ def render_handler(
600
+ args: argparse.Namespace,
601
+ ) -> int:
602
+ """Render SQL DDL statements from table schema specs."""
603
+ _, quiet = _presentation_flags(args)
604
+
605
+ template_value = getattr(args, 'template', 'ddl') or 'ddl'
606
+ template_path = getattr(args, 'template_path', None)
607
+ table_filter = getattr(args, 'table', None)
608
+ spec_path = getattr(args, 'spec', None)
609
+ config_path = getattr(args, 'config', None)
610
+
611
+ # If the provided template points to a file, treat it as a path override.
612
+ file_override = template_path
613
+ template_key = template_value
614
+ if template_path is None:
615
+ candidate_path = Path(template_value)
616
+ if candidate_path.exists():
617
+ file_override = str(candidate_path)
618
+ template_key = None
619
+
620
+ specs = _collect_table_specs(config_path, spec_path)
621
+ if table_filter:
622
+ specs = [
623
+ spec
624
+ for spec in specs
625
+ if str(spec.get('table')) == table_filter
626
+ or str(spec.get('name', '')) == table_filter
627
+ ]
628
+
629
+ if not specs:
630
+ target_desc = table_filter or 'table_schemas'
631
+ print(
632
+ 'No table schemas found for '
633
+ f'{target_desc}. Provide --spec or a pipeline --config with '
634
+ 'table_schemas.',
635
+ file=sys.stderr,
636
+ )
637
+ return 1
638
+
639
+ rendered_chunks = render_tables(
640
+ specs,
641
+ template=template_key,
642
+ template_path=file_override,
643
+ )
644
+ sql_text = (
645
+ '\n'.join(chunk.rstrip() for chunk in rendered_chunks).rstrip() + '\n'
646
+ )
647
+
648
+ output_path = getattr(args, 'output', None)
649
+ if output_path and output_path != '-':
650
+ Path(output_path).write_text(sql_text, encoding='utf-8')
651
+ if not quiet:
652
+ print(f'Rendered {len(specs)} schema(s) to {output_path}')
653
+ return 0
654
+
655
+ print(sql_text)
656
+ return 0
657
+
658
+
659
+ def run_handler(
660
+ args: argparse.Namespace,
661
+ ) -> int:
624
662
  """
625
- Print requested pipeline sections from a YAML configuration.
663
+ Execute an ETL job end-to-end from a pipeline YAML configuration.
626
664
 
627
665
  Parameters
628
666
  ----------
@@ -635,13 +673,22 @@ def cmd_list(args: argparse.Namespace) -> int:
635
673
  Zero on success.
636
674
  """
637
675
  cfg = load_pipeline_config(args.config, substitute=True)
638
- print_json(_list_sections(cfg, args))
676
+
677
+ job_name = getattr(args, 'job', None) or getattr(args, 'pipeline', None)
678
+ if job_name:
679
+ result = run(job=job_name, config_path=args.config)
680
+ print_json({'status': 'ok', 'result': result})
681
+ return 0
682
+
683
+ print_json(_pipeline_summary(cfg))
639
684
  return 0
640
685
 
641
686
 
642
- def cmd_run(args: argparse.Namespace) -> int:
687
+ def transform_handler(
688
+ args: argparse.Namespace,
689
+ ) -> int:
643
690
  """
644
- Execute an ETL job end-to-end from a pipeline YAML configuration.
691
+ Transform data from a source.
645
692
 
646
693
  Parameters
647
694
  ----------
@@ -653,13 +700,75 @@ def cmd_run(args: argparse.Namespace) -> int:
653
700
  int
654
701
  Zero on success.
655
702
  """
656
- cfg = load_pipeline_config(args.config, substitute=True)
703
+ pretty, _ = _presentation_flags(args)
704
+ format_hint: str | None = getattr(args, 'source_format', None)
705
+ format_explicit: bool = format_hint is not None
657
706
 
658
- job_name = getattr(args, 'job', None) or getattr(args, 'pipeline', None)
659
- if job_name:
660
- result = run(job=job_name, config_path=args.config)
661
- print_json({'status': 'ok', 'result': result})
662
- return 0
707
+ payload = cast(
708
+ JSONData | str,
709
+ _resolve_cli_payload(
710
+ args.source,
711
+ format_hint=format_hint,
712
+ format_explicit=format_explicit,
713
+ ),
714
+ )
715
+
716
+ data = transform(payload, args.operations)
717
+
718
+ if not _write_json_output(
719
+ data,
720
+ getattr(args, 'target', None),
721
+ success_message='Data transformed and saved to',
722
+ ):
723
+ _emit_json(data, pretty=pretty)
724
+
725
+ return 0
726
+
727
+
728
+ def validate_handler(
729
+ args: argparse.Namespace,
730
+ ) -> int:
731
+ """
732
+ Validate data from a source.
733
+
734
+ Parameters
735
+ ----------
736
+ args : argparse.Namespace
737
+ Parsed command-line arguments.
738
+
739
+ Returns
740
+ -------
741
+ int
742
+ Zero on success.
743
+ """
744
+ pretty, _ = _presentation_flags(args)
745
+ format_explicit: bool = getattr(args, '_format_explicit', False)
746
+ format_hint: str | None = getattr(args, 'source_format', None)
747
+ payload = cast(
748
+ JSONData | str,
749
+ _resolve_cli_payload(
750
+ args.source,
751
+ format_hint=format_hint,
752
+ format_explicit=format_explicit,
753
+ ),
754
+ )
755
+ result = validate(payload, args.rules)
756
+
757
+ target_path = getattr(args, 'target', None)
758
+ if target_path:
759
+ validated_data = result.get('data')
760
+ if validated_data is not None:
761
+ _write_json_output(
762
+ validated_data,
763
+ target_path,
764
+ success_message='Validation result saved to',
765
+ )
766
+ else:
767
+ print(
768
+ f'Validation failed, no data to save for {target_path}',
769
+ file=sys.stderr,
770
+ )
771
+ else:
772
+ _emit_json(result, pretty=pretty)
663
773
 
664
- print_json(_pipeline_summary(cfg))
665
774
  return 0