etlplus 0.4.7__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
etlplus/cli/handlers.py CHANGED
@@ -18,6 +18,8 @@ from typing import cast
18
18
 
19
19
  from ..config import PipelineConfig
20
20
  from ..config import load_pipeline_config
21
+ from ..database import load_table_spec
22
+ from ..database import render_tables
21
23
  from ..enums import FileFormat
22
24
  from ..extract import extract
23
25
  from ..file import File
@@ -34,19 +36,51 @@ from ..validate import validate
34
36
 
35
37
  __all__ = [
36
38
  # Functions
37
- 'cmd_extract',
38
- 'cmd_list',
39
- 'cmd_load',
40
- 'cmd_pipeline',
41
- 'cmd_run',
42
- 'cmd_transform',
43
- 'cmd_validate',
39
+ 'extract_handler',
40
+ 'check_handler',
41
+ 'load_handler',
42
+ 'pipeline_handler',
43
+ 'render_handler',
44
+ 'run_handler',
45
+ 'transform_handler',
46
+ 'validate_handler',
44
47
  ]
45
48
 
46
49
 
47
50
  # SECTION: INTERNAL FUNCTIONS =============================================== #
48
51
 
49
52
 
53
+ def _collect_table_specs(
54
+ config_path: str | None,
55
+ spec_path: str | None,
56
+ ) -> list[dict[str, Any]]:
57
+ """
58
+ Load table schemas from a pipeline config and/or standalone spec.
59
+
60
+ Parameters
61
+ ----------
62
+ config_path : str | None
63
+ Path to a pipeline YAML config file.
64
+ spec_path : str | None
65
+ Path to a standalone table spec file.
66
+
67
+ Returns
68
+ -------
69
+ list[dict[str, Any]]
70
+ Collected table specification mappings.
71
+ """
72
+ specs: list[dict[str, Any]] = []
73
+
74
+ if spec_path:
75
+ specs.append(load_table_spec(Path(spec_path)))
76
+
77
+ if config_path:
78
+ cfg = load_pipeline_config(config_path, substitute=True)
79
+ specs.extend(getattr(cfg, 'table_schemas', []))
80
+
81
+ return specs
82
+
83
+
50
84
  def _emit_json(
51
85
  data: Any,
52
86
  *,
@@ -75,6 +109,23 @@ def _emit_json(
75
109
  print(dumped)
76
110
 
77
111
 
112
+ def _explicit_cli_format(
113
+ args: argparse.Namespace,
114
+ ) -> str | None:
115
+ """Return the explicit CLI format hint when provided."""
116
+
117
+ if not getattr(args, '_format_explicit', False):
118
+ return None
119
+ for attr in ('format', 'target_format', 'source_format'):
120
+ value = getattr(args, attr, None)
121
+ if value is None:
122
+ continue
123
+ normalized = value.strip().lower()
124
+ if normalized:
125
+ return normalized
126
+ return None
127
+
128
+
78
129
  def _infer_payload_format(
79
130
  text: str,
80
131
  ) -> str:
@@ -97,12 +148,12 @@ def _infer_payload_format(
97
148
  return 'csv'
98
149
 
99
150
 
100
- def _list_sections(
151
+ def _check_sections(
101
152
  cfg: PipelineConfig,
102
153
  args: argparse.Namespace,
103
154
  ) -> dict[str, Any]:
104
155
  """
105
- Build sectioned metadata output for the list command.
156
+ Build sectioned metadata output for the check command.
106
157
 
107
158
  Parameters
108
159
  ----------
@@ -114,9 +165,11 @@ def _list_sections(
114
165
  Returns
115
166
  -------
116
167
  dict[str, Any]
117
- Metadata output for the list command.
168
+ Metadata output for the check command.
118
169
  """
119
170
  sections: dict[str, Any] = {}
171
+ if getattr(args, 'jobs', False):
172
+ sections['jobs'] = _pipeline_summary(cfg)['jobs']
120
173
  if getattr(args, 'pipelines', False):
121
174
  sections['pipelines'] = [cfg.name]
122
175
  if getattr(args, 'sources', False):
@@ -132,23 +185,6 @@ def _list_sections(
132
185
  return sections
133
186
 
134
187
 
135
- def _explicit_cli_format(
136
- args: argparse.Namespace,
137
- ) -> str | None:
138
- """Return the explicit CLI format hint when provided."""
139
-
140
- if not getattr(args, '_format_explicit', False):
141
- return None
142
- for attr in ('format', 'target_format', 'source_format'):
143
- value = getattr(args, attr, None)
144
- if value is None:
145
- continue
146
- normalized = value.strip().lower()
147
- if normalized:
148
- return normalized
149
- return None
150
-
151
-
152
188
  def _materialize_file_payload(
153
189
  source: object,
154
190
  *,
@@ -222,7 +258,6 @@ def _parse_text_payload(
222
258
  JSONData | str
223
259
  The parsed payload as JSON data or raw text.
224
260
  """
225
-
226
261
  effective = (fmt or '').strip().lower() or _infer_payload_format(text)
227
262
  if effective == 'json':
228
263
  return cast(JSONData, json_type(text))
@@ -263,7 +298,8 @@ def _pipeline_summary(
263
298
  def _presentation_flags(
264
299
  args: argparse.Namespace,
265
300
  ) -> tuple[bool, bool]:
266
- """Return presentation toggles from the parsed namespace.
301
+ """
302
+ Return presentation toggles from the parsed namespace.
267
303
 
268
304
  Parameters
269
305
  ----------
@@ -340,7 +376,6 @@ def _resolve_cli_payload(
340
376
  Parsed payload or the original source value when hydration is
341
377
  disabled.
342
378
  """
343
-
344
379
  if isinstance(source, (os.PathLike, str)) and str(source) == '-':
345
380
  text = _read_stdin_text()
346
381
  return _parse_text_payload(text, format_hint)
@@ -388,11 +423,11 @@ def _write_json_output(
388
423
  # SECTION: FUNCTIONS ======================================================== #
389
424
 
390
425
 
391
- def cmd_extract(
426
+ def check_handler(
392
427
  args: argparse.Namespace,
393
428
  ) -> int:
394
429
  """
395
- Extract data from a source.
430
+ Print requested pipeline sections from a YAML configuration.
396
431
 
397
432
  Parameters
398
433
  ----------
@@ -404,40 +439,20 @@ def cmd_extract(
404
439
  int
405
440
  Zero on success.
406
441
  """
407
- pretty, _ = _presentation_flags(args)
408
- explicit_format = _explicit_cli_format(args)
409
-
410
- if args.source == '-':
411
- text = _read_stdin_text()
412
- payload = _parse_text_payload(text, getattr(args, 'format', None))
413
- _emit_json(payload, pretty=pretty)
414
-
442
+ cfg = load_pipeline_config(args.config, substitute=True)
443
+ if getattr(args, 'summary', False):
444
+ print_json(_pipeline_summary(cfg))
415
445
  return 0
416
446
 
417
- result = extract(
418
- args.source_type,
419
- args.source,
420
- file_format=explicit_format,
421
- )
422
- output_path = getattr(args, 'target', None)
423
- if output_path is None:
424
- output_path = getattr(args, 'output', None)
425
-
426
- if not _write_json_output(
427
- result,
428
- output_path,
429
- success_message='Data extracted and saved to',
430
- ):
431
- _emit_json(result, pretty=pretty)
432
-
447
+ print_json(_check_sections(cfg, args))
433
448
  return 0
434
449
 
435
450
 
436
- def cmd_validate(
451
+ def extract_handler(
437
452
  args: argparse.Namespace,
438
453
  ) -> int:
439
454
  """
440
- Validate data from a source.
455
+ Extract data from a source.
441
456
 
442
457
  Parameters
443
458
  ----------
@@ -449,81 +464,36 @@ def cmd_validate(
449
464
  int
450
465
  Zero on success.
451
466
  """
452
- pretty, _quiet = _presentation_flags(args)
453
- format_explicit: bool = getattr(args, '_format_explicit', False)
454
- format_hint: str | None = getattr(args, 'source_format', None)
455
- payload = cast(
456
- JSONData | str,
457
- _resolve_cli_payload(
458
- args.source,
459
- format_hint=format_hint,
460
- format_explicit=format_explicit,
461
- ),
462
- )
463
- result = validate(payload, args.rules)
464
-
465
- target_path = getattr(args, 'target', None)
466
- if target_path:
467
- validated_data = result.get('data')
468
- if validated_data is not None:
469
- _write_json_output(
470
- validated_data,
471
- target_path,
472
- success_message='Validation result saved to',
473
- )
474
- else:
475
- print(
476
- f'Validation failed, no data to save for {target_path}',
477
- file=sys.stderr,
478
- )
479
- else:
480
- _emit_json(result, pretty=pretty)
481
-
482
- return 0
483
-
484
-
485
- def cmd_transform(
486
- args: argparse.Namespace,
487
- ) -> int:
488
- """
489
- Transform data from a source.
467
+ pretty, _ = _presentation_flags(args)
468
+ explicit_format = _explicit_cli_format(args)
490
469
 
491
- Parameters
492
- ----------
493
- args : argparse.Namespace
494
- Parsed command-line arguments.
470
+ if args.source == '-':
471
+ text = _read_stdin_text()
472
+ payload = _parse_text_payload(text, getattr(args, 'format', None))
473
+ _emit_json(payload, pretty=pretty)
495
474
 
496
- Returns
497
- -------
498
- int
499
- Zero on success.
500
- """
501
- pretty, _quiet = _presentation_flags(args)
502
- format_hint: str | None = getattr(args, 'source_format', None)
503
- format_explicit: bool = format_hint is not None
475
+ return 0
504
476
 
505
- payload = cast(
506
- JSONData | str,
507
- _resolve_cli_payload(
508
- args.source,
509
- format_hint=format_hint,
510
- format_explicit=format_explicit,
511
- ),
477
+ result = extract(
478
+ args.source_type,
479
+ args.source,
480
+ file_format=explicit_format,
512
481
  )
513
-
514
- data = transform(payload, args.operations)
482
+ output_path = getattr(args, 'target', None)
483
+ if output_path is None:
484
+ output_path = getattr(args, 'output', None)
515
485
 
516
486
  if not _write_json_output(
517
- data,
518
- getattr(args, 'target', None),
519
- success_message='Data transformed and saved to',
487
+ result,
488
+ output_path,
489
+ success_message='Data extracted and saved to',
520
490
  ):
521
- _emit_json(data, pretty=pretty)
491
+ _emit_json(result, pretty=pretty)
522
492
 
523
493
  return 0
524
494
 
525
495
 
526
- def cmd_load(
496
+ def load_handler(
527
497
  args: argparse.Namespace,
528
498
  ) -> int:
529
499
  """
@@ -582,7 +552,7 @@ def cmd_load(
582
552
  return 0
583
553
 
584
554
 
585
- def cmd_pipeline(
555
+ def pipeline_handler(
586
556
  args: argparse.Namespace,
587
557
  ) -> int:
588
558
  """
@@ -598,6 +568,12 @@ def cmd_pipeline(
598
568
  int
599
569
  Zero on success.
600
570
  """
571
+ print(
572
+ 'DEPRECATED: use "etlplus check --summary|--jobs" or '
573
+ '"etlplus run --job/--pipeline" instead of "etlplus pipeline".',
574
+ file=sys.stderr,
575
+ )
576
+
601
577
  cfg = load_pipeline_config(args.config, substitute=True)
602
578
 
603
579
  list_flag = getattr(args, 'list', False) or getattr(args, 'jobs', False)
@@ -620,9 +596,71 @@ def cmd_pipeline(
620
596
  return 0
621
597
 
622
598
 
623
- def cmd_list(args: argparse.Namespace) -> int:
599
+ def render_handler(
600
+ args: argparse.Namespace,
601
+ ) -> int:
602
+ """Render SQL DDL statements from table schema specs."""
603
+ _, quiet = _presentation_flags(args)
604
+
605
+ template_value = getattr(args, 'template', 'ddl') or 'ddl'
606
+ template_path = getattr(args, 'template_path', None)
607
+ table_filter = getattr(args, 'table', None)
608
+ spec_path = getattr(args, 'spec', None)
609
+ config_path = getattr(args, 'config', None)
610
+
611
+ # If the provided template points to a file, treat it as a path override.
612
+ file_override = template_path
613
+ template_key = template_value
614
+ if template_path is None:
615
+ candidate_path = Path(template_value)
616
+ if candidate_path.exists():
617
+ file_override = str(candidate_path)
618
+ template_key = None
619
+
620
+ specs = _collect_table_specs(config_path, spec_path)
621
+ if table_filter:
622
+ specs = [
623
+ spec
624
+ for spec in specs
625
+ if str(spec.get('table')) == table_filter
626
+ or str(spec.get('name', '')) == table_filter
627
+ ]
628
+
629
+ if not specs:
630
+ target_desc = table_filter or 'table_schemas'
631
+ print(
632
+ 'No table schemas found for '
633
+ f'{target_desc}. Provide --spec or a pipeline --config with '
634
+ 'table_schemas.',
635
+ file=sys.stderr,
636
+ )
637
+ return 1
638
+
639
+ rendered_chunks = render_tables(
640
+ specs,
641
+ template=template_key,
642
+ template_path=file_override,
643
+ )
644
+ sql_text = (
645
+ '\n'.join(chunk.rstrip() for chunk in rendered_chunks).rstrip() + '\n'
646
+ )
647
+
648
+ output_path = getattr(args, 'output', None)
649
+ if output_path and output_path != '-':
650
+ Path(output_path).write_text(sql_text, encoding='utf-8')
651
+ if not quiet:
652
+ print(f'Rendered {len(specs)} schema(s) to {output_path}')
653
+ return 0
654
+
655
+ print(sql_text)
656
+ return 0
657
+
658
+
659
+ def run_handler(
660
+ args: argparse.Namespace,
661
+ ) -> int:
624
662
  """
625
- Print requested pipeline sections from a YAML configuration.
663
+ Execute an ETL job end-to-end from a pipeline YAML configuration.
626
664
 
627
665
  Parameters
628
666
  ----------
@@ -635,13 +673,22 @@ def cmd_list(args: argparse.Namespace) -> int:
635
673
  Zero on success.
636
674
  """
637
675
  cfg = load_pipeline_config(args.config, substitute=True)
638
- print_json(_list_sections(cfg, args))
676
+
677
+ job_name = getattr(args, 'job', None) or getattr(args, 'pipeline', None)
678
+ if job_name:
679
+ result = run(job=job_name, config_path=args.config)
680
+ print_json({'status': 'ok', 'result': result})
681
+ return 0
682
+
683
+ print_json(_pipeline_summary(cfg))
639
684
  return 0
640
685
 
641
686
 
642
- def cmd_run(args: argparse.Namespace) -> int:
687
+ def transform_handler(
688
+ args: argparse.Namespace,
689
+ ) -> int:
643
690
  """
644
- Execute an ETL job end-to-end from a pipeline YAML configuration.
691
+ Transform data from a source.
645
692
 
646
693
  Parameters
647
694
  ----------
@@ -653,13 +700,75 @@ def cmd_run(args: argparse.Namespace) -> int:
653
700
  int
654
701
  Zero on success.
655
702
  """
656
- cfg = load_pipeline_config(args.config, substitute=True)
703
+ pretty, _ = _presentation_flags(args)
704
+ format_hint: str | None = getattr(args, 'source_format', None)
705
+ format_explicit: bool = format_hint is not None
657
706
 
658
- job_name = getattr(args, 'job', None) or getattr(args, 'pipeline', None)
659
- if job_name:
660
- result = run(job=job_name, config_path=args.config)
661
- print_json({'status': 'ok', 'result': result})
662
- return 0
707
+ payload = cast(
708
+ JSONData | str,
709
+ _resolve_cli_payload(
710
+ args.source,
711
+ format_hint=format_hint,
712
+ format_explicit=format_explicit,
713
+ ),
714
+ )
715
+
716
+ data = transform(payload, args.operations)
717
+
718
+ if not _write_json_output(
719
+ data,
720
+ getattr(args, 'target', None),
721
+ success_message='Data transformed and saved to',
722
+ ):
723
+ _emit_json(data, pretty=pretty)
724
+
725
+ return 0
726
+
727
+
728
+ def validate_handler(
729
+ args: argparse.Namespace,
730
+ ) -> int:
731
+ """
732
+ Validate data from a source.
733
+
734
+ Parameters
735
+ ----------
736
+ args : argparse.Namespace
737
+ Parsed command-line arguments.
738
+
739
+ Returns
740
+ -------
741
+ int
742
+ Zero on success.
743
+ """
744
+ pretty, _ = _presentation_flags(args)
745
+ format_explicit: bool = getattr(args, '_format_explicit', False)
746
+ format_hint: str | None = getattr(args, 'source_format', None)
747
+ payload = cast(
748
+ JSONData | str,
749
+ _resolve_cli_payload(
750
+ args.source,
751
+ format_hint=format_hint,
752
+ format_explicit=format_explicit,
753
+ ),
754
+ )
755
+ result = validate(payload, args.rules)
756
+
757
+ target_path = getattr(args, 'target', None)
758
+ if target_path:
759
+ validated_data = result.get('data')
760
+ if validated_data is not None:
761
+ _write_json_output(
762
+ validated_data,
763
+ target_path,
764
+ success_message='Validation result saved to',
765
+ )
766
+ else:
767
+ print(
768
+ f'Validation failed, no data to save for {target_path}',
769
+ file=sys.stderr,
770
+ )
771
+ else:
772
+ _emit_json(result, pretty=pretty)
663
773
 
664
- print_json(_pipeline_summary(cfg))
665
774
  return 0
etlplus/cli/main.py CHANGED
@@ -24,13 +24,14 @@ from ..enums import FileFormat
24
24
  from ..utils import json_type
25
25
  from .app import PROJECT_URL
26
26
  from .app import app
27
- from .handlers import cmd_extract
28
- from .handlers import cmd_list
29
- from .handlers import cmd_load
30
- from .handlers import cmd_pipeline
31
- from .handlers import cmd_run
32
- from .handlers import cmd_transform
33
- from .handlers import cmd_validate
27
+ from .handlers import check_handler
28
+ from .handlers import extract_handler
29
+ from .handlers import load_handler
30
+ from .handlers import pipeline_handler
31
+ from .handlers import render_handler
32
+ from .handlers import run_handler
33
+ from .handlers import transform_handler
34
+ from .handlers import validate_handler
34
35
 
35
36
  # SECTION: EXPORTS ========================================================== #
36
37
 
@@ -328,7 +329,7 @@ def create_parser() -> argparse.ArgumentParser:
328
329
  ),
329
330
  )
330
331
  _add_format_options(extract_parser, context='source')
331
- extract_parser.set_defaults(func=cmd_extract)
332
+ extract_parser.set_defaults(func=extract_handler)
332
333
 
333
334
  validate_parser = subparsers.add_parser(
334
335
  'validate',
@@ -345,7 +346,7 @@ def create_parser() -> argparse.ArgumentParser:
345
346
  default={},
346
347
  help='Validation rules as JSON string',
347
348
  )
348
- validate_parser.set_defaults(func=cmd_validate)
349
+ validate_parser.set_defaults(func=validate_handler)
349
350
 
350
351
  transform_parser = subparsers.add_parser(
351
352
  'transform',
@@ -393,7 +394,7 @@ def create_parser() -> argparse.ArgumentParser:
393
394
  'File targets infer format from the extension.'
394
395
  ),
395
396
  )
396
- transform_parser.set_defaults(func=cmd_transform)
397
+ transform_parser.set_defaults(func=transform_handler)
397
398
 
398
399
  load_parser = subparsers.add_parser(
399
400
  'load',
@@ -417,13 +418,14 @@ def create_parser() -> argparse.ArgumentParser:
417
418
  ),
418
419
  )
419
420
  _add_format_options(load_parser, context='target')
420
- load_parser.set_defaults(func=cmd_load)
421
+ load_parser.set_defaults(func=load_handler)
421
422
 
422
423
  pipe_parser = subparsers.add_parser(
423
424
  'pipeline',
424
425
  help=(
425
- 'Inspect or run pipeline YAML (see '
426
- f'{PROJECT_URL}/blob/main/docs/pipeline-guide.md)'
426
+ 'DEPRECATED: use "list" (for summary/jobs) or "run" (to execute); '
427
+ 'see '
428
+ f'{PROJECT_URL}/blob/main/docs/pipeline-guide.md'
427
429
  ),
428
430
  formatter_class=argparse.ArgumentDefaultsHelpFormatter,
429
431
  )
@@ -438,35 +440,83 @@ def create_parser() -> argparse.ArgumentParser:
438
440
  metavar='JOB',
439
441
  help='Run a specific job by name',
440
442
  )
441
- pipe_parser.set_defaults(func=cmd_pipeline)
443
+ pipe_parser.set_defaults(func=pipeline_handler)
442
444
 
443
- list_parser = subparsers.add_parser(
444
- 'list',
445
- help='List ETL pipeline metadata',
445
+ render_parser = subparsers.add_parser(
446
+ 'render',
447
+ help='Render SQL DDL from table schema specs',
446
448
  formatter_class=argparse.ArgumentDefaultsHelpFormatter,
447
449
  )
448
- _add_config_option(list_parser)
450
+ render_parser.add_argument(
451
+ '--config',
452
+ help='Pipeline YAML containing table_schemas',
453
+ )
454
+ render_parser.add_argument(
455
+ '-o',
456
+ '--output',
457
+ help='Write SQL to this path (stdout when omitted)',
458
+ )
459
+ render_parser.add_argument(
460
+ '--spec',
461
+ help='Standalone table spec file (.yml/.yaml/.json)',
462
+ )
463
+ render_parser.add_argument(
464
+ '--table',
465
+ help='Render only the table matching this name',
466
+ )
467
+ render_parser.add_argument(
468
+ '--template',
469
+ default='ddl',
470
+ help='Template key (ddl/view) or path to a Jinja template file',
471
+ )
472
+ render_parser.add_argument(
473
+ '--template-path',
474
+ dest='template_path',
475
+ help=(
476
+ 'Explicit path to a Jinja template file (overrides template key).'
477
+ ),
478
+ )
479
+ render_parser.set_defaults(func=render_handler)
480
+
481
+ check_parser = subparsers.add_parser(
482
+ 'check',
483
+ help='Inspect ETL pipeline metadata',
484
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
485
+ )
486
+ _add_config_option(check_parser)
487
+ _add_boolean_flag(
488
+ check_parser,
489
+ name='jobs',
490
+ help_text='List ETL jobs',
491
+ )
449
492
  _add_boolean_flag(
450
- list_parser,
493
+ check_parser,
451
494
  name='pipelines',
452
495
  help_text='List ETL pipelines',
453
496
  )
454
497
  _add_boolean_flag(
455
- list_parser,
498
+ check_parser,
456
499
  name='sources',
457
500
  help_text='List data sources',
458
501
  )
459
502
  _add_boolean_flag(
460
- list_parser,
503
+ check_parser,
504
+ name='summary',
505
+ help_text=(
506
+ 'Show pipeline summary (name, version, sources, targets, jobs)'
507
+ ),
508
+ )
509
+ _add_boolean_flag(
510
+ check_parser,
461
511
  name='targets',
462
512
  help_text='List data targets',
463
513
  )
464
514
  _add_boolean_flag(
465
- list_parser,
515
+ check_parser,
466
516
  name='transforms',
467
517
  help_text='List data transforms',
468
518
  )
469
- list_parser.set_defaults(func=cmd_list)
519
+ check_parser.set_defaults(func=check_handler)
470
520
 
471
521
  run_parser = subparsers.add_parser(
472
522
  'run',
@@ -487,7 +537,7 @@ def create_parser() -> argparse.ArgumentParser:
487
537
  '--pipeline',
488
538
  help='Name of the pipeline to run',
489
539
  )
490
- run_parser.set_defaults(func=cmd_run)
540
+ run_parser.set_defaults(func=run_handler)
491
541
 
492
542
  return parser
493
543