etlplus 0.4.0__py3-none-any.whl → 0.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
etlplus/cli.py DELETED
@@ -1,1186 +0,0 @@
1
- """
2
- :mod:`etlplus.cli` module.
3
-
4
- Entry point for the ``etlplus`` command-line Interface (CLI).
5
-
6
- This module wires subcommands via ``argparse`` using
7
- ``set_defaults(func=...)`` so dispatch is clean and extensible.
8
-
9
- Subcommands
10
- -----------
11
- - ``extract``: extract data from files, databases, or REST APIs
12
- - ``validate``: validate data against rules
13
- - ``transform``: transform records
14
- - ``load``: load data to files, databases, or REST APIs
15
- """
16
-
17
- from __future__ import annotations
18
-
19
- import argparse
20
- import csv
21
- import os
22
- import sys
23
- from collections.abc import Sequence
24
- from pathlib import Path
25
- from typing import Any
26
- from typing import Literal
27
- from typing import cast
28
-
29
- import typer
30
-
31
- from . import __version__
32
- from .config import PipelineConfig
33
- from .config import load_pipeline_config
34
- from .enums import DataConnectorType
35
- from .enums import FileFormat
36
- from .extract import extract
37
- from .file import File
38
- from .load import load
39
- from .run import run
40
- from .transform import transform
41
- from .types import JSONData
42
- from .utils import json_type
43
- from .utils import print_json
44
- from .validate import validate
45
-
46
- # SECTION: CONSTANTS ======================================================= #
47
-
48
-
49
- CLI_DESCRIPTION = '\n'.join(
50
- [
51
- 'ETLPlus - A Swiss Army knife for simple ETL operations.',
52
- '',
53
- ' Provide a subcommand and options. Examples:',
54
- '',
55
- ' etlplus extract file in.csv -o out.json',
56
- ' etlplus validate in.json --rules \'{"required": ["id"]}\'',
57
- ' etlplus transform in.json --operations \'{"select": ["id"]}\'',
58
- ' etlplus load in.json file out.json',
59
- '',
60
- ' Enforce error if --format is provided for files. Examples:',
61
- '',
62
- ' etlplus extract file in.csv --format csv --strict-format',
63
- ' etlplus load in.json file out.csv --format csv --strict-format',
64
- ],
65
- )
66
-
67
- CLI_EPILOG = '\n'.join(
68
- [
69
- 'Environment:',
70
- (
71
- ' ETLPLUS_FORMAT_BEHAVIOR controls behavior when '
72
- '--format is provided for files.'
73
- ),
74
- ' Values:',
75
- ' - error|fail|strict: treat as error',
76
- ' - warn (default): print a warning',
77
- ' - ignore|silent: no message',
78
- '',
79
- 'Note:',
80
- ' --strict-format overrides the environment behavior.',
81
- ],
82
- )
83
-
84
- FORMAT_ENV_KEY = 'ETLPLUS_FORMAT_BEHAVIOR'
85
-
86
- PROJECT_URL = 'https://github.com/Dagitali/ETLPlus'
87
-
88
-
89
- # SECTION: INTERNAL CONSTANTS =============================================== #
90
-
91
-
92
- _FORMAT_ERROR_STATES = {'error', 'fail', 'strict'}
93
- _FORMAT_SILENT_STATES = {'ignore', 'silent'}
94
-
95
- _SOURCE_CHOICES = set(DataConnectorType.choices())
96
- _FORMAT_CHOICES = set(FileFormat.choices())
97
-
98
-
99
- # SECTION: TYPE ALIASES ===================================================== #
100
-
101
-
102
- type FormatContext = Literal['source', 'target']
103
-
104
-
105
- # SECTION: INTERNAL CLASSES ================================================= #
106
-
107
-
108
- class _FormatAction(argparse.Action):
109
- """Argparse action that records when ``--format`` is provided."""
110
-
111
- def __call__(
112
- self,
113
- parser: argparse.ArgumentParser,
114
- namespace: argparse.Namespace,
115
- values: str | Sequence[Any] | None,
116
- option_string: str | None = None,
117
- ) -> None: # pragma: no cover - argparse wiring
118
- setattr(namespace, self.dest, values)
119
- namespace._format_explicit = True
120
-
121
-
122
- # SECTION: INTERNAL FUNCTIONS =============================================== #
123
-
124
-
125
- def _add_format_options(
126
- parser: argparse.ArgumentParser,
127
- *,
128
- context: FormatContext,
129
- ) -> None:
130
- """
131
- Attach shared ``--format`` options to extract/load parsers.
132
-
133
- Parameters
134
- ----------
135
- parser : argparse.ArgumentParser
136
- Parser to add options to.
137
- context : FormatContext
138
- Whether this is a source or target resource.
139
- """
140
- parser.set_defaults(_format_explicit=False)
141
- parser.add_argument(
142
- '--strict-format',
143
- action='store_true',
144
- help=(
145
- 'Treat providing --format for file '
146
- f'{context}s as an error (overrides environment behavior)'
147
- ),
148
- )
149
- parser.add_argument(
150
- '--format',
151
- choices=list(FileFormat.choices()),
152
- default='json',
153
- action=_FormatAction,
154
- help=(
155
- f'Format of the {context} when not a file. For file {context}s '
156
- 'this option is ignored and the format is inferred from the '
157
- 'filename extension.'
158
- ),
159
- )
160
-
161
-
162
- def _emit_behavioral_notice(
163
- message: str,
164
- behavior: str,
165
- ) -> None:
166
- """
167
- Print or raise based on the configured behavior.
168
-
169
- Parameters
170
- ----------
171
- message : str
172
- The message to emit.
173
- behavior : str
174
- The effective format-behavior mode.
175
-
176
- Raises
177
- ------
178
- ValueError
179
- If the behavior is in the error states.
180
- """
181
- if behavior in _FORMAT_ERROR_STATES:
182
- raise ValueError(message)
183
- if behavior in _FORMAT_SILENT_STATES:
184
- return
185
- print(f'Warning: {message}', file=sys.stderr)
186
-
187
-
188
- def _format_behavior(
189
- strict: bool,
190
- ) -> str:
191
- """
192
- Return the effective format-behavior mode.
193
-
194
- Parameters
195
- ----------
196
- strict : bool
197
- Whether to enforce strict format behavior.
198
-
199
- Returns
200
- -------
201
- str
202
- The effective format-behavior mode.
203
- """
204
- if strict:
205
- return 'error'
206
- env_value = os.getenv(FORMAT_ENV_KEY, 'warn')
207
- return (env_value or 'warn').strip().lower()
208
-
209
-
210
- def _handle_format_guard(
211
- *,
212
- io_context: Literal['source', 'target'],
213
- resource_type: str,
214
- format_explicit: bool,
215
- strict: bool,
216
- ) -> None:
217
- """
218
- Warn or raise when --format is used alongside file resources.
219
-
220
- Parameters
221
- ----------
222
- io_context : Literal['source', 'target']
223
- Whether this is a source or target resource.
224
- resource_type : str
225
- The type of resource being processed.
226
- format_explicit : bool
227
- Whether the --format option was explicitly provided.
228
- strict : bool
229
- Whether to enforce strict format behavior.
230
- """
231
- if resource_type != 'file' or not format_explicit:
232
- return
233
- message = (
234
- f'--format is ignored for file {io_context}s; '
235
- 'inferred from filename extension.'
236
- )
237
- behavior = _format_behavior(strict)
238
- _emit_behavioral_notice(message, behavior)
239
-
240
-
241
- def _list_sections(
242
- cfg: PipelineConfig,
243
- args: argparse.Namespace,
244
- ) -> dict[str, Any]:
245
- """
246
- Build sectioned metadata output for the list command.
247
-
248
- Parameters
249
- ----------
250
- cfg : PipelineConfig
251
- The loaded pipeline configuration.
252
- args : argparse.Namespace
253
- Parsed command-line arguments.
254
-
255
- Returns
256
- -------
257
- dict[str, Any]
258
- Metadata output for the list command.
259
- """
260
- sections: dict[str, Any] = {}
261
- if getattr(args, 'pipelines', False):
262
- sections['pipelines'] = [cfg.name]
263
- if getattr(args, 'sources', False):
264
- sections['sources'] = [src.name for src in cfg.sources]
265
- if getattr(args, 'targets', False):
266
- sections['targets'] = [tgt.name for tgt in cfg.targets]
267
- if getattr(args, 'transforms', False):
268
- sections['transforms'] = [
269
- getattr(trf, 'name', None) for trf in cfg.transforms
270
- ]
271
- if not sections:
272
- sections['jobs'] = _pipeline_summary(cfg)['jobs']
273
- return sections
274
-
275
-
276
- def _materialize_csv_payload(
277
- source: object,
278
- ) -> JSONData | str:
279
- """
280
- Return parsed CSV rows when ``source`` points at a CSV file.
281
-
282
- Parameters
283
- ----------
284
- source : object
285
- The source of data.
286
-
287
- Returns
288
- -------
289
- JSONData | str
290
- Parsed CSV rows or the original source if not a CSV file.
291
- """
292
- if not isinstance(source, str):
293
- return cast(JSONData, source)
294
- path = Path(source)
295
- if path.suffix.lower() != '.csv' or not path.is_file():
296
- return source
297
- return _read_csv_rows(path)
298
-
299
-
300
- def _ns(**kwargs: object) -> argparse.Namespace:
301
- """Create an :class:`argparse.Namespace` for legacy command handlers."""
302
-
303
- return argparse.Namespace(**kwargs)
304
-
305
-
306
- def _pipeline_summary(
307
- cfg: PipelineConfig,
308
- ) -> dict[str, Any]:
309
- """
310
- Return a human-friendly snapshot of a pipeline config.
311
-
312
- Parameters
313
- ----------
314
- cfg : PipelineConfig
315
- The loaded pipeline configuration.
316
-
317
- Returns
318
- -------
319
- dict[str, Any]
320
- A human-friendly snapshot of a pipeline config.
321
- """
322
- sources = [src.name for src in cfg.sources]
323
- targets = [tgt.name for tgt in cfg.targets]
324
- jobs = [job.name for job in cfg.jobs]
325
- return {
326
- 'name': cfg.name,
327
- 'version': cfg.version,
328
- 'sources': sources,
329
- 'targets': targets,
330
- 'jobs': jobs,
331
- }
332
-
333
-
334
- def _read_csv_rows(
335
- path: Path,
336
- ) -> list[dict[str, str]]:
337
- """
338
- Read CSV rows into dictionaries.
339
-
340
- Parameters
341
- ----------
342
- path : Path
343
- Path to a CSV file.
344
-
345
- Returns
346
- -------
347
- list[dict[str, str]]
348
- List of dictionaries, each representing a row in the CSV file.
349
- """
350
- with path.open(newline='', encoding='utf-8') as handle:
351
- reader = csv.DictReader(handle)
352
- return [dict(row) for row in reader]
353
-
354
-
355
- def _validate_choice(value: str, choices: set[str], *, label: str) -> str:
356
- """Validate a string against allowed choices for nice CLI errors."""
357
-
358
- v = (value or '').strip()
359
- if v in choices:
360
- return v
361
- allowed = ', '.join(sorted(choices))
362
- raise typer.BadParameter(
363
- f"Invalid {label} '{value}'. Choose from: {allowed}",
364
- )
365
-
366
-
367
- def _write_json_output(
368
- data: Any,
369
- output_path: str | None,
370
- *,
371
- success_message: str,
372
- ) -> bool:
373
- """
374
- Optionally persist JSON data to disk.
375
-
376
- Parameters
377
- ----------
378
- data : Any
379
- Data to write.
380
- output_path : str | None
381
- Path to write the output to. None to print to stdout.
382
- success_message : str
383
- Message to print upon successful write.
384
-
385
- Returns
386
- -------
387
- bool
388
- True if output was written to a file, False if printed to stdout.
389
- """
390
- if not output_path:
391
- return False
392
- File(Path(output_path), FileFormat.JSON).write_json(data)
393
- print(f'{success_message} {output_path}')
394
- return True
395
-
396
-
397
- # SECTION: FUNCTIONS ======================================================== #
398
-
399
-
400
- # -- Command Handlers -- #
401
-
402
-
403
- def cmd_extract(
404
- args: argparse.Namespace,
405
- ) -> int:
406
- """
407
- Extract data from a source.
408
-
409
- Parameters
410
- ----------
411
- args : argparse.Namespace
412
- Parsed command-line arguments.
413
-
414
- Returns
415
- -------
416
- int
417
- Zero on success.
418
- """
419
- _handle_format_guard(
420
- io_context='source',
421
- resource_type=args.source_type,
422
- format_explicit=getattr(args, '_format_explicit', False),
423
- strict=getattr(args, 'strict_format', False),
424
- )
425
-
426
- if args.source_type == 'file':
427
- result = extract(args.source_type, args.source)
428
- else:
429
- result = extract(
430
- args.source_type,
431
- args.source,
432
- file_format=getattr(args, 'format', None),
433
- )
434
-
435
- if not _write_json_output(
436
- result,
437
- getattr(args, 'output', None),
438
- success_message='Data extracted and saved to',
439
- ):
440
- print_json(result)
441
-
442
- return 0
443
-
444
-
445
- def cmd_validate(
446
- args: argparse.Namespace,
447
- ) -> int:
448
- """
449
- Validate data from a source.
450
-
451
- Parameters
452
- ----------
453
- args : argparse.Namespace
454
- Parsed command-line arguments.
455
-
456
- Returns
457
- -------
458
- int
459
- Zero on success.
460
- """
461
- payload = _materialize_csv_payload(args.source)
462
- result = validate(payload, args.rules)
463
-
464
- output_path = getattr(args, 'output', None)
465
- if output_path:
466
- validated_data = result.get('data')
467
- if validated_data is not None:
468
- _write_json_output(
469
- validated_data,
470
- output_path,
471
- success_message='Validation result saved to',
472
- )
473
- else:
474
- print(
475
- f'Validation failed, no data to save for {output_path}',
476
- file=sys.stderr,
477
- )
478
- else:
479
- print_json(result)
480
-
481
- return 0
482
-
483
-
484
- def cmd_transform(
485
- args: argparse.Namespace,
486
- ) -> int:
487
- """
488
- Transform data from a source.
489
-
490
- Parameters
491
- ----------
492
- args : argparse.Namespace
493
- Parsed command-line arguments.
494
-
495
- Returns
496
- -------
497
- int
498
- Zero on success.
499
- """
500
- payload = _materialize_csv_payload(args.source)
501
- data = transform(payload, args.operations)
502
-
503
- if not _write_json_output(
504
- data,
505
- getattr(args, 'output', None),
506
- success_message='Data transformed and saved to',
507
- ):
508
- print_json(data)
509
-
510
- return 0
511
-
512
-
513
- def cmd_load(
514
- args: argparse.Namespace,
515
- ) -> int:
516
- """
517
- Load data into a target.
518
-
519
- Parameters
520
- ----------
521
- args : argparse.Namespace
522
- Parsed command-line arguments.
523
-
524
- Returns
525
- -------
526
- int
527
- Zero on success.
528
- """
529
- _handle_format_guard(
530
- io_context='target',
531
- resource_type=args.target_type,
532
- format_explicit=getattr(args, '_format_explicit', False),
533
- strict=getattr(args, 'strict_format', False),
534
- )
535
-
536
- if args.target_type == 'file':
537
- result = load(args.source, args.target_type, args.target)
538
- else:
539
- result = load(
540
- args.source,
541
- args.target_type,
542
- args.target,
543
- file_format=getattr(args, 'format', None),
544
- )
545
-
546
- if not _write_json_output(
547
- result,
548
- getattr(args, 'output', None),
549
- success_message='Data loaded and saved to',
550
- ):
551
- print_json(result)
552
-
553
- return 0
554
-
555
-
556
- def cmd_pipeline(args: argparse.Namespace) -> int:
557
- """
558
- Inspect or run a pipeline YAML configuration.
559
-
560
- --list prints job names; --run JOB executes a job end-to-end.
561
-
562
- Parameters
563
- ----------
564
- args : argparse.Namespace
565
- Parsed command-line arguments.
566
-
567
- Returns
568
- -------
569
- int
570
- Zero on success.
571
- """
572
- cfg = load_pipeline_config(args.config, substitute=True)
573
-
574
- if getattr(args, 'list', False) and not getattr(args, 'run', None):
575
- print_json({'jobs': _pipeline_summary(cfg)['jobs']})
576
- return 0
577
-
578
- run_job = getattr(args, 'run', None)
579
- if run_job:
580
- result = run(job=run_job, config_path=args.config)
581
- print_json({'status': 'ok', 'result': result})
582
- return 0
583
-
584
- print_json(_pipeline_summary(cfg))
585
- return 0
586
-
587
-
588
- def cmd_list(args: argparse.Namespace) -> int:
589
- """
590
- Print ETL job names from a pipeline YAML configuration.
591
-
592
- Parameters
593
- ----------
594
- args : argparse.Namespace
595
- Parsed command-line arguments.
596
-
597
- Returns
598
- -------
599
- int
600
- Zero on success.
601
- """
602
- cfg = load_pipeline_config(args.config, substitute=True)
603
- print_json(_list_sections(cfg, args))
604
- return 0
605
-
606
-
607
- def cmd_run(args: argparse.Namespace) -> int:
608
- """
609
- Execute an ETL job end-to-end from a pipeline YAML configuration.
610
-
611
- Parameters
612
- ----------
613
- args : argparse.Namespace
614
- Parsed command-line arguments.
615
-
616
- Returns
617
- -------
618
- int
619
- Zero on success.
620
- """
621
- cfg = load_pipeline_config(args.config, substitute=True)
622
-
623
- job_name = getattr(args, 'job', None) or getattr(args, 'pipeline', None)
624
- if job_name:
625
- result = run(job=job_name, config_path=args.config)
626
- print_json({'status': 'ok', 'result': result})
627
- return 0
628
-
629
- print_json(_pipeline_summary(cfg))
630
- return 0
631
-
632
-
633
- # -- Parser -- #
634
-
635
-
636
- def create_parser() -> argparse.ArgumentParser:
637
- """
638
- Create the argument parser for the CLI.
639
-
640
- Returns
641
- -------
642
- argparse.ArgumentParser
643
- Configured parser with subcommands for the CLI.
644
- """
645
- parser = argparse.ArgumentParser(
646
- prog='etlplus',
647
- description=CLI_DESCRIPTION,
648
- epilog=CLI_EPILOG,
649
- formatter_class=argparse.RawDescriptionHelpFormatter,
650
- # formatter_class=argparse.ArgumentDefaultsHelpFormatter,
651
- )
652
-
653
- parser.add_argument(
654
- '-V',
655
- '--version',
656
- action='version',
657
- version=f'%(prog)s {__version__}',
658
- )
659
-
660
- subparsers = parser.add_subparsers(
661
- dest='command',
662
- help='Available commands',
663
- )
664
-
665
- # Define "extract" command.
666
- extract_parser = subparsers.add_parser(
667
- 'extract',
668
- help=('Extract data from sources (files, databases, REST APIs)'),
669
- formatter_class=argparse.ArgumentDefaultsHelpFormatter,
670
- )
671
- extract_parser.add_argument(
672
- 'source_type',
673
- choices=list(DataConnectorType.choices()),
674
- help='Type of source to extract from',
675
- )
676
- extract_parser.add_argument(
677
- 'source',
678
- help=(
679
- 'Source location '
680
- '(file path, database connection string, or API URL)'
681
- ),
682
- )
683
- extract_parser.add_argument(
684
- '-o',
685
- '--output',
686
- help='Output file to save extracted data (JSON format)',
687
- )
688
- _add_format_options(extract_parser, context='source')
689
- extract_parser.set_defaults(func=cmd_extract)
690
-
691
- # Define "validate" command.
692
- validate_parser = subparsers.add_parser(
693
- 'validate',
694
- help='Validate data from sources',
695
- formatter_class=argparse.ArgumentDefaultsHelpFormatter,
696
- )
697
- validate_parser.add_argument(
698
- 'source',
699
- help='Data source to validate (file path or JSON string)',
700
- )
701
- validate_parser.add_argument(
702
- '--rules',
703
- type=json_type,
704
- default={},
705
- help='Validation rules as JSON string',
706
- )
707
- validate_parser.set_defaults(func=cmd_validate)
708
-
709
- # Define "transform" command.
710
- transform_parser = subparsers.add_parser(
711
- 'transform',
712
- help='Transform data',
713
- formatter_class=argparse.ArgumentDefaultsHelpFormatter,
714
- )
715
- transform_parser.add_argument(
716
- 'source',
717
- help='Data source to transform (file path or JSON string)',
718
- )
719
- transform_parser.add_argument(
720
- '--operations',
721
- type=json_type,
722
- default={},
723
- help='Transformation operations as JSON string',
724
- )
725
- transform_parser.add_argument(
726
- '-o',
727
- '--output',
728
- help='Output file to save transformed data',
729
- )
730
- transform_parser.set_defaults(func=cmd_transform)
731
-
732
- # Define "load" command.
733
- load_parser = subparsers.add_parser(
734
- 'load',
735
- help='Load data to targets (files, databases, REST APIs)',
736
- formatter_class=argparse.ArgumentDefaultsHelpFormatter,
737
- )
738
- load_parser.add_argument(
739
- 'source',
740
- help='Data source to load (file path or JSON string)',
741
- )
742
- load_parser.add_argument(
743
- 'target_type',
744
- choices=list(DataConnectorType.choices()),
745
- help='Type of target to load to',
746
- )
747
- load_parser.add_argument(
748
- 'target',
749
- help=(
750
- 'Target location '
751
- '(file path, database connection string, or API URL)'
752
- ),
753
- )
754
- _add_format_options(load_parser, context='target')
755
- load_parser.set_defaults(func=cmd_load)
756
-
757
- # Define "pipeline" command (reads YAML config).
758
- pipe_parser = subparsers.add_parser(
759
- 'pipeline',
760
- help=(
761
- 'Inspect or run pipeline YAML (see '
762
- f'{PROJECT_URL}/blob/main/docs/pipeline-guide.md)'
763
- ),
764
- formatter_class=argparse.ArgumentDefaultsHelpFormatter,
765
- )
766
- pipe_parser.add_argument(
767
- '--config',
768
- required=True,
769
- help='Path to pipeline YAML configuration file',
770
- )
771
- pipe_parser.add_argument(
772
- '--list',
773
- action='store_true',
774
- help='List available job names and exit',
775
- )
776
- pipe_parser.add_argument(
777
- '--run',
778
- metavar='JOB',
779
- help='Run a specific job by name',
780
- )
781
- pipe_parser.set_defaults(func=cmd_pipeline)
782
-
783
- # Define "list" command.
784
- list_parser = subparsers.add_parser(
785
- 'list',
786
- help='List ETL pipeline metadata',
787
- formatter_class=argparse.ArgumentDefaultsHelpFormatter,
788
- )
789
- list_parser.add_argument(
790
- '--config',
791
- required=True,
792
- help='Path to pipeline YAML configuration file',
793
- )
794
- list_parser.add_argument(
795
- '--pipelines',
796
- action='store_true',
797
- help='List ETL pipelines',
798
- )
799
- list_parser.add_argument(
800
- '--sources',
801
- action='store_true',
802
- help='List data sources',
803
- )
804
- list_parser.add_argument(
805
- '--targets',
806
- action='store_true',
807
- help='List data targets',
808
- )
809
- list_parser.add_argument(
810
- '--transforms',
811
- action='store_true',
812
- help='List data transforms',
813
- )
814
- list_parser.set_defaults(func=cmd_list)
815
-
816
- # Define "run" command.
817
- run_parser = subparsers.add_parser(
818
- 'run',
819
- help=(
820
- 'Run an ETL pipeline '
821
- f'(see {PROJECT_URL}/blob/main/docs/run-module.md)'
822
- ),
823
- formatter_class=argparse.ArgumentDefaultsHelpFormatter,
824
- )
825
- run_parser.add_argument(
826
- '--config',
827
- required=True,
828
- help='Path to pipeline YAML configuration file',
829
- )
830
- run_parser.add_argument(
831
- '-j',
832
- '--job',
833
- help='Name of the job to run',
834
- )
835
- run_parser.add_argument(
836
- '-p',
837
- '--pipeline',
838
- help='Name of the pipeline to run',
839
- )
840
- run_parser.set_defaults(func=cmd_run)
841
-
842
- return parser
843
-
844
-
845
- # -- Main -- #
846
-
847
-
848
- def main(
849
- argv: list[str] | None = None,
850
- ) -> int:
851
- """
852
- Handle CLI's main entry point.
853
-
854
- Parameters
855
- ----------
856
- argv : list[str] | None, optional
857
- List of command-line arguments. If ``None``, uses ``sys.argv``.
858
-
859
- Returns
860
- -------
861
- int
862
- Zero on success, non-zero on error.
863
-
864
- Raises
865
- ------
866
- SystemExit
867
- Re-raises SystemExit exceptions to preserve exit codes.
868
-
869
- Notes
870
- -----
871
- This function uses Typer (Click) for parsing/dispatch, but preserves the
872
- existing `cmd_*` handlers by adapting parsed arguments into an
873
- :class:`argparse.Namespace`.
874
- """
875
- argv = sys.argv[1:] if argv is None else argv
876
- command = typer.main.get_command(app)
877
-
878
- try:
879
- result = command.main(
880
- args=list(argv),
881
- prog_name='etlplus',
882
- standalone_mode=False,
883
- )
884
- return int(result or 0)
885
-
886
- except typer.Exit as exc:
887
- return int(exc.exit_code)
888
-
889
- except typer.Abort:
890
- return 1
891
-
892
- except KeyboardInterrupt:
893
- # Conventional exit code for SIGINT
894
- return 130
895
-
896
- except SystemExit as e:
897
- print(f'Error: {e}', file=sys.stderr)
898
- raise e
899
-
900
- except (OSError, TypeError, ValueError) as e:
901
- print(f'Error: {e}', file=sys.stderr)
902
- return 1
903
-
904
-
905
- # SECTION: TYPER APP ======================================================== #
906
-
907
-
908
- app = typer.Typer(
909
- name='etlplus',
910
- help='ETLPlus - A Swiss Army knife for simple ETL operations.',
911
- add_completion=True,
912
- )
913
-
914
-
915
- @app.callback(invoke_without_command=True)
916
- def _root(
917
- ctx: typer.Context,
918
- version: bool = typer.Option(
919
- False,
920
- '-V',
921
- '--version',
922
- is_eager=True,
923
- help='Show the version and exit.',
924
- ),
925
- ) -> None:
926
- """Root command callback to show help or version."""
927
-
928
- if version:
929
- typer.echo(f'etlplus {__version__}')
930
- raise typer.Exit(0)
931
-
932
- if ctx.invoked_subcommand is None:
933
- typer.echo(ctx.get_help())
934
- raise typer.Exit(0)
935
-
936
-
937
- @app.command('extract')
938
- def extract_cmd(
939
- source_type: str = typer.Argument(
940
- ...,
941
- help='Type of source to extract from',
942
- ),
943
- source: str = typer.Argument(
944
- ...,
945
- help=(
946
- 'Source location '
947
- '(file path, database connection string, or API URL)'
948
- ),
949
- ),
950
- output: str | None = typer.Option(
951
- None,
952
- '-o',
953
- '--output',
954
- help='Output file to save extracted data (JSON format)',
955
- ),
956
- strict_format: bool = typer.Option(
957
- False,
958
- '--strict-format',
959
- help=(
960
- 'Treat providing --format for file sources as an error '
961
- '(overrides environment behavior)'
962
- ),
963
- ),
964
- source_format: str | None = typer.Option(
965
- None,
966
- '--format',
967
- help=(
968
- 'Format of the source when not a file. For file sources this '
969
- 'option is ignored and the format is inferred from the filename '
970
- 'extension.'
971
- ),
972
- ),
973
- ) -> int:
974
- """Typer front-end for :func:`cmd_extract`."""
975
-
976
- source_type = _validate_choice(
977
- source_type,
978
- _SOURCE_CHOICES,
979
- label='source_type',
980
- )
981
- if source_format is not None:
982
- source_format = _validate_choice(
983
- source_format,
984
- _FORMAT_CHOICES,
985
- label='format',
986
- )
987
-
988
- ns = _ns(
989
- command='extract',
990
- source_type=source_type,
991
- source=source,
992
- output=output,
993
- strict_format=strict_format,
994
- format=(source_format or 'json'),
995
- _format_explicit=(source_format is not None),
996
- )
997
- return int(cmd_extract(ns))
998
-
999
-
1000
- @app.command('validate')
1001
- def validate_cmd(
1002
- source: str = typer.Argument(
1003
- ...,
1004
- help='Data source to validate (file path or JSON string)',
1005
- ),
1006
- rules: str = typer.Option(
1007
- '{}',
1008
- '--rules',
1009
- help='Validation rules as JSON string',
1010
- ),
1011
- ) -> int:
1012
- """Typer front-end for :func:`cmd_validate`."""
1013
-
1014
- ns = _ns(command='validate', source=source, rules=json_type(rules))
1015
- return int(cmd_validate(ns))
1016
-
1017
-
1018
- @app.command('transform')
1019
- def transform_cmd(
1020
- source: str = typer.Argument(
1021
- ...,
1022
- help='Data source to transform (file path or JSON string)',
1023
- ),
1024
- operations: str = typer.Option(
1025
- '{}',
1026
- '--operations',
1027
- help='Transformation operations as JSON string',
1028
- ),
1029
- output: str | None = typer.Option(
1030
- None,
1031
- '-o',
1032
- '--output',
1033
- help='Output file to save transformed data',
1034
- ),
1035
- ) -> int:
1036
- """Typer front-end for :func:`cmd_transform`."""
1037
-
1038
- ns = _ns(
1039
- command='transform',
1040
- source=source,
1041
- operations=json_type(operations),
1042
- output=output,
1043
- )
1044
- return int(cmd_transform(ns))
1045
-
1046
-
1047
- @app.command('load')
1048
- def load_cmd(
1049
- source: str = typer.Argument(
1050
- ...,
1051
- help='Data source to load (file path or JSON string)',
1052
- ),
1053
- target_type: str = typer.Argument(..., help='Type of target to load to'),
1054
- target: str = typer.Argument(
1055
- ...,
1056
- help=(
1057
- 'Target location '
1058
- '(file path, database connection string, or API URL)'
1059
- ),
1060
- ),
1061
- strict_format: bool = typer.Option(
1062
- False,
1063
- '--strict-format',
1064
- help=(
1065
- 'Treat providing --format for file targets as an error '
1066
- '(overrides environment behavior)'
1067
- ),
1068
- ),
1069
- target_format: str | None = typer.Option(
1070
- None,
1071
- '--format',
1072
- help=(
1073
- 'Format of the target when not a file. For file targets this '
1074
- 'option is ignored and the format is inferred from the filename '
1075
- 'extension.'
1076
- ),
1077
- ),
1078
- ) -> int:
1079
- """Typer front-end for :func:`cmd_load`."""
1080
-
1081
- target_type = _validate_choice(
1082
- target_type,
1083
- _SOURCE_CHOICES,
1084
- label='target_type',
1085
- )
1086
- if target_format is not None:
1087
- target_format = _validate_choice(
1088
- target_format,
1089
- _FORMAT_CHOICES,
1090
- label='format',
1091
- )
1092
-
1093
- ns = _ns(
1094
- command='load',
1095
- source=source,
1096
- target_type=target_type,
1097
- target=target,
1098
- strict_format=strict_format,
1099
- format=(target_format or 'json'),
1100
- _format_explicit=(target_format is not None),
1101
- )
1102
- return int(cmd_load(ns))
1103
-
1104
-
1105
- @app.command('pipeline')
1106
- def pipeline_cmd(
1107
- config: str = typer.Option(
1108
- ...,
1109
- '--config',
1110
- help='Path to pipeline YAML configuration file',
1111
- ),
1112
- list_: bool = typer.Option(
1113
- False,
1114
- '--list',
1115
- help='List available job names and exit',
1116
- ),
1117
- run_job: str | None = typer.Option(
1118
- None,
1119
- '--run',
1120
- metavar='JOB',
1121
- help='Run a specific job by name',
1122
- ),
1123
- ) -> int:
1124
- """Typer front-end for :func:`cmd_pipeline`."""
1125
-
1126
- ns = _ns(command='pipeline', config=config, list=list_, run=run_job)
1127
- return int(cmd_pipeline(ns))
1128
-
1129
-
1130
- @app.command('list')
1131
- def list_cmd(
1132
- config: str = typer.Option(
1133
- ...,
1134
- '--config',
1135
- help='Path to pipeline YAML configuration file',
1136
- ),
1137
- pipelines: bool = typer.Option(
1138
- False,
1139
- '--pipelines',
1140
- help='List ETL pipelines',
1141
- ),
1142
- sources: bool = typer.Option(False, '--sources', help='List data sources'),
1143
- targets: bool = typer.Option(False, '--targets', help='List data targets'),
1144
- transforms: bool = typer.Option(
1145
- False,
1146
- '--transforms',
1147
- help='List data transforms',
1148
- ),
1149
- ) -> int:
1150
- """Typer front-end for :func:`cmd_list`."""
1151
-
1152
- ns = _ns(
1153
- command='list',
1154
- config=config,
1155
- pipelines=pipelines,
1156
- sources=sources,
1157
- targets=targets,
1158
- transforms=transforms,
1159
- )
1160
- return int(cmd_list(ns))
1161
-
1162
-
1163
- @app.command('run')
1164
- def run_cmd(
1165
- config: str = typer.Option(
1166
- ...,
1167
- '--config',
1168
- help='Path to pipeline YAML configuration file',
1169
- ),
1170
- job: str | None = typer.Option(
1171
- None,
1172
- '-j',
1173
- '--job',
1174
- help='Name of the job to run',
1175
- ),
1176
- pipeline: str | None = typer.Option(
1177
- None,
1178
- '-p',
1179
- '--pipeline',
1180
- help='Name of the pipeline to run',
1181
- ),
1182
- ) -> int:
1183
- """Typer front-end for :func:`cmd_run`."""
1184
-
1185
- ns = _ns(command='run', config=config, job=job, pipeline=pipeline)
1186
- return int(cmd_run(ns))