etlplus 0.5.2__py3-none-any.whl → 0.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
etlplus/cli/app.py DELETED
@@ -1,1367 +0,0 @@
1
- """
2
- :mod:`etlplus.cli.app` module.
3
-
4
- Defines the main `Typer` application for the ``etlplus`` command-line
5
- interface (CLI).
6
-
7
- Typer-First Interface
8
- ---------------------
9
- The CLI is implemented using `Typer` (Click) for parsing, help text, and
10
- subcommand dispatch. The Typer layer focuses on ergonomics (git-style
11
- subcommands, optional inference of resource types, stdin/stdout piping, and
12
- quality-of-life flags), while delegating business logic to the existing
13
- ``cmd_*`` handlers.
14
-
15
- Namespace Adapter
16
- -----------------
17
- The command handlers continue to accept an ``argparse.Namespace`` for
18
- backwards compatibility with existing ``cmd_*`` functions and tests. The
19
- Typer commands adapt parsed arguments into an ``argparse.Namespace`` and then
20
- call the corresponding ``cmd_*`` handler.
21
-
22
- Subcommands
23
- -----------
24
- - ``extract``: extract data from files, databases, or REST APIs
25
- - ``validate``: validate data against rules
26
- - ``transform``: transform records
27
- - ``load``: load data to files, databases, or REST APIs
28
- - ``render``: render SQL DDL from table schema specs
29
-
30
- Notes
31
- -----
32
- - Use ``-`` to read from stdin or to write to stdout.
33
- - Commands ``extract`` and ``transform`` support the command-line option
34
- ``--from`` to override inferred resource types.
35
- - Commands ``transform`` and ``load`` support the command-line option ``--to``
36
- to override inferred resource types.
37
- """
38
-
39
- # Pylint struggles with large CLI surfaces that legitimately require
40
- # numerous arguments in a single module.
41
- # pylint: disable=too-many-lines
42
- # pylint: disable=too-many-arguments,too-many-positional-arguments
43
-
44
- from __future__ import annotations
45
-
46
- import argparse
47
- import sys
48
- from collections.abc import Collection
49
- from dataclasses import dataclass
50
- from pathlib import Path
51
- from typing import Annotated
52
- from typing import Final
53
-
54
- import typer
55
-
56
- from .. import __version__
57
- from ..enums import DataConnectorType
58
- from ..enums import FileFormat
59
- from ..utils import json_type
60
- from .handlers import cmd_extract
61
- from .handlers import cmd_list
62
- from .handlers import cmd_load
63
- from .handlers import cmd_pipeline
64
- from .handlers import cmd_render
65
- from .handlers import cmd_run
66
- from .handlers import cmd_transform
67
- from .handlers import cmd_validate
68
-
69
- # SECTION: EXPORTS ========================================================== #
70
-
71
-
72
- __all__ = [
73
- # Apps
74
- 'app',
75
- ]
76
-
77
-
78
- # SECTION: INTERNAL CONSTANTS =============================================== #
79
-
80
-
81
- _DB_SCHEMES = (
82
- 'postgres://',
83
- 'postgresql://',
84
- 'mysql://',
85
- )
86
-
87
- _SOURCE_CHOICES: Final[frozenset[str]] = frozenset(DataConnectorType.choices())
88
- _FORMAT_CHOICES: Final[frozenset[str]] = frozenset(FileFormat.choices())
89
-
90
-
91
- # SECTION: CONSTANTS ======================================================== #
92
-
93
-
94
- CLI_DESCRIPTION: Final[str] = '\n'.join(
95
- [
96
- 'ETLPlus - A Swiss Army knife for simple ETL operations.',
97
- '',
98
- ' Provide a subcommand and options. Examples:',
99
- '',
100
- ' etlplus extract in.csv > out.json',
101
- ' etlplus validate in.json --rules \'{"required": ["id"]}\'',
102
- (
103
- ' etlplus transform --from file in.json '
104
- '--operations \'{"select": ["id"]}\' --to file -o out.json'
105
- ),
106
- ' etlplus extract in.csv | etlplus load --to file out.json',
107
- ' cat data.json | etlplus load --to api https://example.com/data',
108
- '',
109
- ' Override format inference when extensions are misleading:',
110
- '',
111
- ' etlplus extract data.txt --source-format csv',
112
- ' etlplus load payload.bin --target-format json',
113
- ],
114
- )
115
-
116
- CLI_EPILOG: Final[str] = '\n'.join(
117
- [
118
- 'Tip:',
119
- ' --source-format and --target-format override format inference '
120
- 'based on filename extensions when needed.',
121
- ],
122
- )
123
-
124
- PROJECT_URL: Final[str] = 'https://github.com/Dagitali/ETLPlus'
125
-
126
-
127
- # SECTION: TYPE ALIASES ==================================================== #
128
-
129
-
130
- SourceInputArg = Annotated[
131
- str,
132
- typer.Argument(
133
- ...,
134
- metavar='SOURCE',
135
- help=(
136
- 'Extract from SOURCE. Use --from/--source-type to override the '
137
- 'inferred connector when needed.'
138
- ),
139
- ),
140
- ]
141
-
142
- StreamingSourceArg = Annotated[
143
- str,
144
- typer.Argument(
145
- ...,
146
- metavar='SOURCE',
147
- help=(
148
- 'Data source to transform or validate (path, JSON payload, or '
149
- '- for stdin).'
150
- ),
151
- ),
152
- ]
153
-
154
- TargetInputArg = Annotated[
155
- str,
156
- typer.Argument(
157
- ...,
158
- metavar='TARGET',
159
- help=(
160
- 'Load JSON data from stdin into TARGET. Use --to/--target-type '
161
- 'to override connector inference when needed. Source data must '
162
- 'be piped into stdin.'
163
- ),
164
- ),
165
- ]
166
-
167
- SourceOverrideOption = Annotated[
168
- str | None,
169
- typer.Option(
170
- '--source-type',
171
- metavar='CONNECTOR',
172
- show_default=False,
173
- rich_help_panel='I/O overrides',
174
- help='Override the inferred source type (file, database, api).',
175
- ),
176
- ]
177
-
178
- TargetOverrideOption = Annotated[
179
- str | None,
180
- typer.Option(
181
- '--target-type',
182
- metavar='CONNECTOR',
183
- show_default=False,
184
- rich_help_panel='I/O overrides',
185
- help='Override the inferred target type (file, database, api).',
186
- ),
187
- ]
188
-
189
- SourceFormatOption = Annotated[
190
- str | None,
191
- typer.Option(
192
- '--source-format',
193
- metavar='FORMAT',
194
- show_default=False,
195
- rich_help_panel='Format overrides',
196
- help=(
197
- 'Input payload format when SOURCE is - or an inline payload. '
198
- 'File sources infer format from the extension.'
199
- ),
200
- ),
201
- ]
202
-
203
- StdinFormatOption = Annotated[
204
- str | None,
205
- typer.Option(
206
- '--source-format',
207
- metavar='FORMAT',
208
- show_default=False,
209
- rich_help_panel='Format overrides',
210
- help='Input payload format when reading from stdin (default: json).',
211
- ),
212
- ]
213
-
214
- TargetFormatOption = Annotated[
215
- str | None,
216
- typer.Option(
217
- '--target-format',
218
- metavar='FORMAT',
219
- show_default=False,
220
- rich_help_panel='Format overrides',
221
- help=(
222
- 'Payload format when TARGET is - or a non-file connector. File '
223
- 'targets infer format from the extension.'
224
- ),
225
- ),
226
- ]
227
-
228
- OperationsJSONOption = Annotated[
229
- str,
230
- typer.Option(
231
- '--operations',
232
- help='Transformation operations as JSON string.',
233
- ),
234
- ]
235
-
236
- RulesJSONOption = Annotated[
237
- str,
238
- typer.Option(
239
- '--rules',
240
- help='Validation rules as JSON string.',
241
- ),
242
- ]
243
-
244
- TargetPathOption = Annotated[
245
- str | None,
246
- typer.Option(
247
- '--target',
248
- metavar='PATH',
249
- help='Target file for transformed or validated output (- for stdout).',
250
- ),
251
- ]
252
-
253
- PipelineConfigOption = Annotated[
254
- str,
255
- typer.Option(
256
- ...,
257
- '--config',
258
- metavar='PATH',
259
- help='Path to pipeline YAML configuration file.',
260
- ),
261
- ]
262
-
263
- RenderConfigOption = Annotated[
264
- str | None,
265
- typer.Option(
266
- '--config',
267
- metavar='PATH',
268
- help='Pipeline YAML that includes table_schemas for rendering.',
269
- show_default=False,
270
- ),
271
- ]
272
-
273
- RenderOutputOption = Annotated[
274
- str | None,
275
- typer.Option(
276
- '--output',
277
- '-o',
278
- metavar='PATH',
279
- help='Write rendered SQL to PATH (default: stdout).',
280
- ),
281
- ]
282
-
283
- RenderSpecOption = Annotated[
284
- str | None,
285
- typer.Option(
286
- '--spec',
287
- metavar='PATH',
288
- help='Standalone table spec file (.yml/.yaml/.json).',
289
- show_default=False,
290
- ),
291
- ]
292
-
293
- RenderTableOption = Annotated[
294
- str | None,
295
- typer.Option(
296
- '--table',
297
- metavar='NAME',
298
- help='Filter to a single table name from table_schemas.',
299
- ),
300
- ]
301
-
302
- RenderTemplateOption = Annotated[
303
- str,
304
- typer.Option(
305
- '--template',
306
- '-t',
307
- metavar='KEY|PATH',
308
- help='Template key (ddl/view) or path to a Jinja template file.',
309
- show_default=True,
310
- ),
311
- ]
312
-
313
- RenderTemplatePathOption = Annotated[
314
- str | None,
315
- typer.Option(
316
- '--template-path',
317
- metavar='PATH',
318
- help=(
319
- 'Explicit path to a Jinja template file (overrides template key).'
320
- ),
321
- ),
322
- ]
323
-
324
-
325
- # SECTION: DATA CLASSES ===================================================== #
326
-
327
-
328
- @dataclass(slots=True)
329
- class CliState:
330
- """Mutable container for runtime CLI toggles."""
331
-
332
- pretty: bool = True
333
- quiet: bool = False
334
- verbose: bool = False
335
-
336
-
337
- # SECTION: INTERNAL FUNCTIONS =============================================== #
338
-
339
-
340
- def _ensure_state(
341
- ctx: typer.Context,
342
- ) -> CliState:
343
- """
344
- Return the :class:`CliState` stored on the :mod:`typer` context.
345
-
346
- Parameters
347
- ----------
348
- ctx : typer.Context
349
- Typer execution context provided to the command.
350
-
351
- Returns
352
- -------
353
- CliState
354
- Mutable CLI flag container stored on ``ctx``.
355
- """
356
- if not isinstance(getattr(ctx, 'obj', None), CliState):
357
- ctx.obj = CliState()
358
- return ctx.obj
359
-
360
-
361
- def _format_namespace_kwargs(
362
- *,
363
- format_value: str | None,
364
- default: str,
365
- ) -> dict[str, object]:
366
- """
367
- Return common namespace kwargs for format handling.
368
-
369
- Parameters
370
- ----------
371
- format_value : str | None
372
- User-provided format value from the CLI option.
373
- default : str
374
- Default format to use when none is provided.
375
-
376
- Returns
377
- -------
378
- dict[str, object]
379
- Keyword arguments for format-related namespace attributes.
380
- """
381
- return {
382
- 'format': (format_value or default),
383
- '_format_explicit': (format_value is not None),
384
- }
385
-
386
-
387
- def _infer_resource_type(
388
- value: str,
389
- ) -> str:
390
- """
391
- Infer the resource type from a path, URL, or DSN string.
392
-
393
- Parameters
394
- ----------
395
- value : str
396
- Raw CLI argument that represents a source or target.
397
-
398
- Returns
399
- -------
400
- str
401
- One of ``file``, ``database``, or ``api`` based on heuristics.
402
-
403
- Raises
404
- ------
405
- ValueError
406
- If the resource type could not be inferred.
407
- """
408
- val = (value or '').strip()
409
- low = val.lower()
410
-
411
- match (val, low):
412
- case ('-', _):
413
- return 'file'
414
- case (_, inferred) if inferred.startswith(('http://', 'https://')):
415
- return 'api'
416
- case (_, inferred) if inferred.startswith(_DB_SCHEMES):
417
- return 'database'
418
-
419
- path = Path(val)
420
- if path.exists() or path.suffix:
421
- return 'file'
422
-
423
- raise ValueError(
424
- 'Could not infer resource type. Use --from/--to to specify it.',
425
- )
426
-
427
-
428
- def _infer_resource_type_or_exit(
429
- value: str,
430
- ) -> str:
431
- """Infer a resource type and map ``ValueError`` to ``BadParameter``.
432
-
433
- Parameters
434
- ----------
435
- value : str
436
- CLI value describing a source/target.
437
-
438
- Returns
439
- -------
440
- str
441
- Inferred resource type.
442
-
443
- Raises
444
- ------
445
- typer.BadParameter
446
- If heuristics fail to infer a resource type.
447
- """
448
- try:
449
- return _infer_resource_type(value)
450
- except ValueError as exc: # pragma: no cover - exercised indirectly
451
- raise typer.BadParameter(str(exc)) from exc
452
-
453
-
454
- def _infer_resource_type_soft(
455
- value: str | None,
456
- ) -> str | None:
457
- """
458
- Make a best-effort inference that tolerates inline payloads.
459
-
460
- Parameters
461
- ----------
462
- value : str | None
463
- CLI value describing a source/target.
464
-
465
- Returns
466
- -------
467
- str | None
468
- Inferred resource type, or ``None`` if inference failed.
469
- """
470
- if value is None:
471
- return None
472
- try:
473
- return _infer_resource_type(value)
474
- except ValueError:
475
- return None
476
-
477
-
478
- def _log_inferred_resource(
479
- state: CliState,
480
- *,
481
- role: str,
482
- value: str,
483
- resource_type: str | None,
484
- ) -> None:
485
- """
486
- Emit a uniform verbose message for inferred resource types.
487
-
488
- Parameters
489
- ----------
490
- state : CliState
491
- Current CLI state stored on the Typer context.
492
- role : str
493
- Friendly label for the resource (e.g., ``source`` or ``target``).
494
- value : str
495
- Resource value provided on the CLI.
496
- resource_type : str | None
497
- Inferred resource type or ``None`` if not inferred.
498
- """
499
- if not state.verbose or resource_type is None:
500
- return
501
- print(
502
- f'Inferred {role}_type={resource_type} for {role}={value}',
503
- file=sys.stderr,
504
- )
505
-
506
-
507
- def _ns(
508
- **kwargs: object,
509
- ) -> argparse.Namespace:
510
- """Build an :class:`argparse.Namespace` for the legacy handlers.
511
-
512
- Parameters
513
- ----------
514
- **kwargs : object
515
- Attributes applied to the resulting namespace.
516
-
517
- Returns
518
- -------
519
- argparse.Namespace
520
- Namespace compatible with the ``cmd_*`` handler signatures.
521
- """
522
- return argparse.Namespace(**kwargs)
523
-
524
-
525
- def _optional_choice(
526
- value: str | None,
527
- choices: Collection[str],
528
- *,
529
- label: str,
530
- ) -> str | None:
531
- """
532
- Validate optional CLI choice inputs while preserving ``None``.
533
-
534
- Parameters
535
- ----------
536
- value : str | None
537
- Candidate value provided by the CLI option.
538
- choices : Collection[str]
539
- Allowed options for the parameter.
540
- label : str
541
- Friendly label rendered in error messages.
542
-
543
- Returns
544
- -------
545
- str | None
546
- Sanitized choice or ``None`` when the option is omitted.
547
- """
548
- if value is None:
549
- return None
550
- return _validate_choice(value, choices, label=label)
551
-
552
-
553
- def _resolve_resource_type(
554
- *,
555
- explicit_type: str | None,
556
- override_type: str | None,
557
- value: str,
558
- label: str,
559
- conflict_error: str | None = None,
560
- legacy_file_error: str | None = None,
561
- ) -> str:
562
- """
563
- Resolve resource type preference order and validate it.
564
-
565
- Parameters
566
- ----------
567
- explicit_type : str | None
568
- Explicit resource type provided by the user.
569
- override_type : str | None
570
- Resource type provided by an overriding option.
571
- value : str
572
- Resource value to infer type from if no explicit or override type is
573
- given.
574
- label : str
575
- Friendly label for error messages.
576
- conflict_error : str | None
577
- Error message to raise if there is a conflict between explicit and
578
- override types.
579
- legacy_file_error : str | None
580
- Error message to raise if the explicit type is a legacy 'file' type.
581
-
582
- Returns
583
- -------
584
- str
585
- Resolved and validated resource type.
586
-
587
- Raises
588
- ------
589
- typer.BadParameter
590
- If there is a conflict between explicit and override types, or if the
591
- explicit type is a legacy 'file' type.
592
- """
593
- if explicit_type is not None:
594
- if override_type is not None and conflict_error:
595
- raise typer.BadParameter(conflict_error)
596
- if legacy_file_error and explicit_type.strip().lower() == 'file':
597
- raise typer.BadParameter(legacy_file_error)
598
- candidate = explicit_type
599
- else:
600
- candidate = override_type or _infer_resource_type_or_exit(value)
601
- return _validate_choice(candidate, _SOURCE_CHOICES, label=label)
602
-
603
-
604
- def _stateful_namespace(
605
- state: CliState,
606
- *,
607
- command: str,
608
- **kwargs: object,
609
- ) -> argparse.Namespace:
610
- """
611
- Attach CLI state toggles to a handler namespace.
612
-
613
- Parameters
614
- ----------
615
- state : CliState
616
- Current CLI state stored on the Typer context.
617
- command : str
618
- Logical command name (e.g., ``extract``).
619
- **kwargs : object
620
- Additional attributes required by the handler.
621
-
622
- Returns
623
- -------
624
- argparse.Namespace
625
- Namespace compatible with the ``cmd_*`` handler signatures.
626
- """
627
- return _ns(
628
- command=command,
629
- pretty=state.pretty,
630
- quiet=state.quiet,
631
- verbose=state.verbose,
632
- **kwargs,
633
- )
634
-
635
-
636
- def _validate_choice(
637
- value: str,
638
- choices: Collection[str],
639
- *,
640
- label: str,
641
- ) -> str:
642
- """
643
- Validate CLI input against a whitelist of choices.
644
-
645
- Parameters
646
- ----------
647
- value : str
648
- Candidate value from the CLI option or argument.
649
- choices: Collection[str]
650
- Allowed values for the option.
651
- label : str
652
- Friendly label rendered in the validation error message.
653
-
654
- Returns
655
- -------
656
- str
657
- Sanitized and validated value.
658
-
659
- Raises
660
- ------
661
- typer.BadParameter
662
- If ``value`` is not present in ``choices``.
663
- """
664
- v = (value or '').strip()
665
- if v in choices:
666
- return v
667
- allowed = ', '.join(sorted(choices))
668
- raise typer.BadParameter(
669
- f"Invalid {label} '{value}'. Choose from: {allowed}",
670
- )
671
-
672
-
673
- # SECTION: TYPER APP ======================================================== #
674
-
675
-
676
- # Typer application instance (subcommands are registered below).
677
- app = typer.Typer(
678
- name='etlplus',
679
- # help='ETLPlus - A Swiss Army knife for simple ETL operations.',
680
- help=CLI_DESCRIPTION,
681
- epilog=CLI_EPILOG,
682
- add_completion=True,
683
- no_args_is_help=False,
684
- rich_markup_mode='markdown',
685
- )
686
-
687
-
688
- @app.callback(invoke_without_command=True)
689
- def _root(
690
- ctx: typer.Context,
691
- version: bool = typer.Option(
692
- False,
693
- '--version',
694
- '-V',
695
- is_eager=True,
696
- help='Show the version and exit.',
697
- ),
698
- pretty: bool = typer.Option(
699
- True,
700
- '--pretty/--no-pretty',
701
- help='Pretty-print JSON output (default: pretty).',
702
- ),
703
- quiet: bool = typer.Option(
704
- False,
705
- '--quiet',
706
- '-q',
707
- help='Suppress warnings and non-essential output.',
708
- ),
709
- verbose: bool = typer.Option(
710
- False,
711
- '--verbose',
712
- '-v',
713
- help='Emit extra diagnostics to stderr.',
714
- ),
715
- ) -> None:
716
- """
717
- Seed the Typer context with runtime flags and handle root-only options.
718
-
719
- Parameters
720
- ----------
721
- ctx : typer.Context
722
- Typer execution context provided to the command.
723
- version : bool
724
- If True, print the etlplus version and exit.
725
- pretty : bool
726
- Whether to pretty-print JSON output.
727
- quiet : bool
728
- Whether to suppress warnings and non-essential output.
729
- verbose : bool
730
- Whether to emit extra diagnostics to stderr.
731
-
732
- Raises
733
- ------
734
- typer.Exit
735
- If ``--version`` is provided or no subcommand is invoked.
736
- """
737
- ctx.obj = CliState(pretty=pretty, quiet=quiet, verbose=verbose)
738
-
739
- if version:
740
- typer.echo(f'etlplus {__version__}')
741
- raise typer.Exit(0)
742
-
743
- if ctx.invoked_subcommand is None and not ctx.resilient_parsing:
744
- typer.echo(ctx.command.get_help(ctx))
745
- raise typer.Exit(0)
746
-
747
-
748
- @app.command('extract')
749
- def extract_cmd(
750
- ctx: typer.Context,
751
- source: SourceInputArg,
752
- source_format: SourceFormatOption | None = None,
753
- source_type: SourceOverrideOption | None = None,
754
- ) -> int:
755
- """
756
- Extract data from files, databases, or REST APIs.
757
-
758
- Parameters
759
- ----------
760
- ctx : typer.Context
761
- Typer execution context provided to the command.
762
- source : SourceInputArg
763
- Data source (file path, URL, DSN, or ``-`` for stdin).
764
- source_format : SourceFormatOption | None, optional
765
- Payload format when not a file.
766
- source_type : SourceOverrideOption | None, optional
767
- Override the inferred source type.
768
-
769
- Returns
770
- -------
771
- int
772
- Zero on success.
773
-
774
- Examples
775
- --------
776
- - Extract from a file (type inferred):
777
- etlplus extract in.csv
778
- - Extract from a file (explicit via flag):
779
- etlplus extract --from file in.csv
780
- - Extract from an API:
781
- etlplus extract https://example.com/data.json
782
- etlplus extract --from api https://example.com/data.json
783
- - Extract from a database DSN:
784
- etlplus extract --from database postgresql://user:pass@host/db
785
- - Pipe into transform/load:
786
- etlplus extract in.csv \
787
- | etlplus transform --operations '{"select":["a"]}'
788
-
789
- Notes
790
- -----
791
- - The ``extract`` command always writes JSON to stdout.
792
- - CSV output is unsupported for this command.
793
- - Use shell redirection (``>``) or pipelines to persist the output.
794
- """
795
- state = _ensure_state(ctx)
796
-
797
- source_type = _optional_choice(
798
- source_type,
799
- _SOURCE_CHOICES,
800
- label='source_type',
801
- )
802
- source_format = _optional_choice(
803
- source_format,
804
- _FORMAT_CHOICES,
805
- label='source_format',
806
- )
807
-
808
- resolved_source = source
809
- resolved_source_type = source_type or _infer_resource_type_or_exit(
810
- resolved_source,
811
- )
812
-
813
- _log_inferred_resource(
814
- state,
815
- role='source',
816
- value=resolved_source,
817
- resource_type=resolved_source_type,
818
- )
819
-
820
- format_kwargs = _format_namespace_kwargs(
821
- format_value=source_format,
822
- default='json',
823
- )
824
- ns = _stateful_namespace(
825
- state,
826
- command='extract',
827
- source_type=resolved_source_type,
828
- source=resolved_source,
829
- **format_kwargs,
830
- )
831
- return int(cmd_extract(ns))
832
-
833
-
834
- @app.command('list')
835
- def list_cmd(
836
- ctx: typer.Context,
837
- config: PipelineConfigOption,
838
- jobs: bool = typer.Option(
839
- False,
840
- '--jobs',
841
- help='List available job names and exit',
842
- ),
843
- pipelines: bool = typer.Option(
844
- False,
845
- '--pipelines',
846
- help='List ETL pipelines',
847
- ),
848
- sources: bool = typer.Option(
849
- False,
850
- '--sources',
851
- help='List data sources',
852
- ),
853
- summary: bool = typer.Option(
854
- False,
855
- '--summary',
856
- help='Show pipeline summary (name, version, sources, targets, jobs)',
857
- ),
858
- targets: bool = typer.Option(
859
- False,
860
- '--targets',
861
- help='List data targets',
862
- ),
863
- transforms: bool = typer.Option(
864
- False,
865
- '--transforms',
866
- help='List data transforms',
867
- ),
868
- ) -> int:
869
- """
870
- Print ETL entities from a pipeline YAML configuration.
871
-
872
- Parameters
873
- ----------
874
- ctx : typer.Context
875
- Typer execution context provided to the command.
876
- config : PipelineConfigOption
877
- Path to pipeline YAML configuration file.
878
- jobs : bool, optional
879
- If True, list available job names and exit.
880
- pipelines : bool, optional
881
- If True, list ETL pipelines.
882
- sources : bool, optional
883
- If True, list data sources.
884
- summary : bool, optional
885
- If True, show pipeline summary (name, version, sources, targets, jobs).
886
- targets : bool, optional
887
- If True, list data targets.
888
- transforms : bool, optional
889
- If True, list data transforms.
890
-
891
- Returns
892
- -------
893
- int
894
- Zero on success.
895
- """
896
- state = _ensure_state(ctx)
897
- ns = _stateful_namespace(
898
- state,
899
- command='list',
900
- config=config,
901
- summary=summary,
902
- pipelines=pipelines,
903
- jobs=jobs,
904
- sources=sources,
905
- targets=targets,
906
- transforms=transforms,
907
- )
908
- return int(cmd_list(ns))
909
-
910
-
911
- @app.command('load')
912
- def load_cmd(
913
- ctx: typer.Context,
914
- target: TargetInputArg,
915
- source_format: StdinFormatOption | None = None,
916
- target_format: TargetFormatOption | None = None,
917
- target_type: TargetOverrideOption | None = None,
918
- ) -> int:
919
- """
920
- Load data into a file, database, or REST API.
921
-
922
- Parameters
923
- ----------
924
- ctx : typer.Context
925
- Typer execution context provided to the command.
926
- target : TargetInputArg
927
- Load destination (file path, URL/DSN, or ``-`` for stdout).
928
- source_format : StdinFormatOption | None, optional
929
- Hint for parsing stdin payloads (json or csv).
930
- target_format : TargetFormatOption | None, optional
931
- Payload format when not a file target (or when TARGET is ``-``).
932
- target_type : TargetOverrideOption | None, optional
933
- Override the inferred target type.
934
-
935
- Returns
936
- -------
937
- int
938
- Zero on success.
939
-
940
- Examples
941
- --------
942
- - Pipe into a file:
943
- etlplus extract in.csv \
944
- | etlplus transform --operations '{"select":["a"]}' \
945
- | etlplus load --to file out.json
946
- - Read from stdin and write to a file:
947
- etlplus load out.json
948
- - Write to stdout:
949
- etlplus load --to file -
950
-
951
- Notes
952
- -----
953
- - The ``load`` command reads JSON from stdin.
954
- - CSV input is unsupported unless ``--source-format csv`` is provided.
955
- - Convert upstream before piping into ``load`` when working with other
956
- formats.
957
- """
958
- state = _ensure_state(ctx)
959
-
960
- source_format = _optional_choice(
961
- source_format,
962
- _FORMAT_CHOICES,
963
- label='source_format',
964
- )
965
- target_type = _optional_choice(
966
- target_type,
967
- _SOURCE_CHOICES,
968
- label='target_type',
969
- )
970
- target_format = _optional_choice(
971
- target_format,
972
- _FORMAT_CHOICES,
973
- label='target_format',
974
- )
975
-
976
- resolved_target = target
977
- resolved_target_type = target_type or _infer_resource_type_or_exit(
978
- resolved_target,
979
- )
980
-
981
- resolved_source_value = '-'
982
- resolved_source_type = _infer_resource_type_soft(resolved_source_value)
983
-
984
- _log_inferred_resource(
985
- state,
986
- role='source',
987
- value=resolved_source_value,
988
- resource_type=resolved_source_type,
989
- )
990
- _log_inferred_resource(
991
- state,
992
- role='target',
993
- value=resolved_target,
994
- resource_type=resolved_target_type,
995
- )
996
-
997
- format_kwargs = _format_namespace_kwargs(
998
- format_value=target_format,
999
- default='json',
1000
- )
1001
- ns = _stateful_namespace(
1002
- state,
1003
- command='load',
1004
- source=resolved_source_value,
1005
- source_format=source_format,
1006
- target_type=resolved_target_type,
1007
- target=resolved_target,
1008
- **format_kwargs,
1009
- )
1010
- return int(cmd_load(ns))
1011
-
1012
-
1013
- @app.command('pipeline')
1014
- def pipeline_cmd(
1015
- ctx: typer.Context,
1016
- config: PipelineConfigOption,
1017
- job: str | None = typer.Option(
1018
- None,
1019
- '--job',
1020
- metavar='JOB',
1021
- help='Run a specific job by name',
1022
- ),
1023
- jobs: bool = typer.Option(
1024
- False,
1025
- '--jobs',
1026
- help='List available job names and exit',
1027
- ),
1028
- pipeline: str | None = typer.Option(
1029
- None,
1030
- '--pipeline',
1031
- help='Run a specific pipeline by name',
1032
- ),
1033
- ) -> int:
1034
- """
1035
- Deprecated wrapper to inspect or run a pipeline YAML configuration.
1036
-
1037
- Parameters
1038
- ----------
1039
- ctx : typer.Context
1040
- Typer execution context provided to the command.
1041
- config : PipelineConfigOption
1042
- Path to pipeline YAML configuration file.
1043
- job : str | None, optional
1044
- Name of a specific job to run.
1045
- jobs : bool, optional
1046
- If True, list available job names and exit.
1047
- pipeline : str | None, optional
1048
- Name of a specific pipeline to run.
1049
-
1050
- Returns
1051
- -------
1052
- int
1053
- Zero on success.
1054
- """
1055
- state = _ensure_state(ctx)
1056
- run_target = job or pipeline
1057
- ns = _stateful_namespace(
1058
- state,
1059
- command='pipeline',
1060
- config=config,
1061
- list=jobs,
1062
- run=run_target,
1063
- )
1064
- return int(cmd_pipeline(ns))
1065
-
1066
-
1067
- @app.command('render')
1068
- def render_cmd(
1069
- ctx: typer.Context,
1070
- config: RenderConfigOption = None,
1071
- spec: RenderSpecOption = None,
1072
- table: RenderTableOption = None,
1073
- template: RenderTemplateOption = 'ddl',
1074
- template_path: RenderTemplatePathOption = None,
1075
- output: RenderOutputOption = None,
1076
- ) -> int:
1077
- """
1078
- Render SQL DDL from table schemas defined in YAML/JSON configs.
1079
-
1080
- Parameters
1081
- ----------
1082
- ctx : typer.Context
1083
- Typer execution context provided to the command.
1084
- config : RenderConfigOption, optional
1085
- Pipeline YAML containing ``table_schemas`` entries.
1086
- spec : RenderSpecOption, optional
1087
- Standalone table spec file (.yml/.yaml/.json).
1088
- table : RenderTableOption, optional
1089
- Filter to a single table name within the available specs.
1090
- template : RenderTemplateOption, optional
1091
- Built-in template key or template file path.
1092
- template_path : RenderTemplatePathOption, optional
1093
- Explicit template file path to render with.
1094
- output : RenderOutputOption, optional
1095
- Path to write SQL to (stdout when omitted).
1096
-
1097
- Returns
1098
- -------
1099
- int
1100
- Zero on success.
1101
- """
1102
- state = _ensure_state(ctx)
1103
- ns = _stateful_namespace(
1104
- state,
1105
- command='render',
1106
- config=config,
1107
- spec=spec,
1108
- table=table,
1109
- template=template,
1110
- template_path=template_path,
1111
- output=output,
1112
- )
1113
- return int(cmd_render(ns))
1114
-
1115
-
1116
- @app.command('run')
1117
- def run_cmd(
1118
- ctx: typer.Context,
1119
- config: PipelineConfigOption,
1120
- job: str | None = typer.Option(
1121
- None,
1122
- '-j',
1123
- '--job',
1124
- help='Name of the job to run',
1125
- ),
1126
- pipeline: str | None = typer.Option(
1127
- None,
1128
- '-p',
1129
- '--pipeline',
1130
- help='Name of the pipeline to run',
1131
- ),
1132
- ) -> int:
1133
- """
1134
- Execute an ETL job or pipeline from a YAML configuration.
1135
-
1136
- Parameters
1137
- ----------
1138
- ctx : typer.Context
1139
- Typer execution context provided to the command.
1140
- config : PipelineConfigOption
1141
- Path to pipeline YAML configuration file.
1142
- job : str | None, optional
1143
- Name of the job to run.
1144
- pipeline : str | None, optional
1145
- Name of the pipeline to run.
1146
-
1147
- Returns
1148
- -------
1149
- int
1150
- Zero on success.
1151
- """
1152
- state = _ensure_state(ctx)
1153
- ns = _stateful_namespace(
1154
- state,
1155
- command='run',
1156
- config=config,
1157
- job=job,
1158
- pipeline=pipeline,
1159
- )
1160
- return int(cmd_run(ns))
1161
-
1162
-
1163
- @app.command('transform')
1164
- def transform_cmd(
1165
- ctx: typer.Context,
1166
- operations: OperationsJSONOption = '{}',
1167
- source: StreamingSourceArg = '-',
1168
- source_format: SourceFormatOption | None = None,
1169
- source_type: SourceOverrideOption | None = None,
1170
- target: TargetPathOption | None = None,
1171
- target_format: TargetFormatOption | None = None,
1172
- target_type: TargetOverrideOption | None = None,
1173
- ) -> int:
1174
- """
1175
- Transform records using JSON-described operations.
1176
-
1177
- Parameters
1178
- ----------
1179
- ctx : typer.Context
1180
- Typer execution context provided to the command.
1181
- operations : OperationsJSONOption, optional
1182
- Transformation operations as a JSON string.
1183
- source : StreamingSourceArg, optional
1184
- Data source (file path or ``-`` for stdin).
1185
- source_format : SourceFormatOption | None, optional
1186
- Input payload format when not a file (or when SOURCE is -).
1187
- source_type : SourceOverrideOption | None, optional
1188
- Override the inferred source type.
1189
- target : TargetPathOption | None, optional
1190
- Optional output path. Use ``-`` for stdout.
1191
- target_format : TargetFormatOption | None, optional
1192
- Output payload format when not a file target (or when OUTPUT is -).
1193
- Accepts ``--target-format``.
1194
- target_type : TargetOverrideOption | None, optional
1195
- Override the inferred target type.
1196
-
1197
- Returns
1198
- -------
1199
- int
1200
- Zero on success.
1201
-
1202
- Examples
1203
- --------
1204
- - Transform data from a file and write to another file:
1205
- etlplus transform --from file in.json \
1206
- --operations '{"select": ["id", "name"]}' \
1207
- --to file out.json
1208
- - Transform data from stdin and write to stdout:
1209
- cat in.json \
1210
- | etlplus transform \
1211
- --operations '{"filter": {"field": "age", "gt": 30}}'
1212
- - Transform data from a file and write to stdout:
1213
- etlplus transform --from file in.csv \
1214
- --source-format csv \
1215
- --operations '{"select": ["id", "email"]}'
1216
- - Transform data from stdin and write to a file:
1217
- cat in.json \
1218
- | etlplus transform --operations '{"sort": ["-created_at"]}' \
1219
- --to file out.json
1220
-
1221
- Notes
1222
- -----
1223
- - The ``transform`` command reads JSON from stdin when SOURCE is ``-``.
1224
- - CSV input is unsupported for this command.
1225
- - Convert upstream before piping into ``transform``.
1226
- """
1227
- state = _ensure_state(ctx)
1228
-
1229
- source_format = _optional_choice(
1230
- source_format,
1231
- _FORMAT_CHOICES,
1232
- label='source_format',
1233
- )
1234
- source_type = _optional_choice(
1235
- source_type,
1236
- _SOURCE_CHOICES,
1237
- label='source_type',
1238
- )
1239
- target_format = _optional_choice(
1240
- target_format,
1241
- _FORMAT_CHOICES,
1242
- label='target_format',
1243
- )
1244
- target_format_kwargs = _format_namespace_kwargs(
1245
- format_value=target_format,
1246
- default='json',
1247
- )
1248
- target_type = _optional_choice(
1249
- target_type,
1250
- _SOURCE_CHOICES,
1251
- label='target_type',
1252
- )
1253
-
1254
- resolved_source_type = source_type or _infer_resource_type_soft(source)
1255
- resolved_source_value = source if source is not None else '-'
1256
- resolved_target_value = target if target is not None else '-'
1257
-
1258
- if resolved_source_type is not None:
1259
- resolved_source_type = _validate_choice(
1260
- resolved_source_type,
1261
- _SOURCE_CHOICES,
1262
- label='source_type',
1263
- )
1264
-
1265
- resolved_target_type = _resolve_resource_type(
1266
- explicit_type=None,
1267
- override_type=target_type,
1268
- value=resolved_target_value,
1269
- label='target_type',
1270
- )
1271
-
1272
- _log_inferred_resource(
1273
- state,
1274
- role='source',
1275
- value=resolved_source_value,
1276
- resource_type=resolved_source_type,
1277
- )
1278
- _log_inferred_resource(
1279
- state,
1280
- role='target',
1281
- value=resolved_target_value,
1282
- resource_type=resolved_target_type,
1283
- )
1284
-
1285
- ns = _stateful_namespace(
1286
- state,
1287
- command='transform',
1288
- source=resolved_source_value,
1289
- source_type=resolved_source_type,
1290
- operations=json_type(operations),
1291
- target=resolved_target_value,
1292
- source_format=source_format,
1293
- target_type=resolved_target_type,
1294
- target_format=target_format_kwargs['format'],
1295
- **target_format_kwargs,
1296
- )
1297
- return int(cmd_transform(ns))
1298
-
1299
-
1300
- @app.command('validate')
1301
- def validate_cmd(
1302
- ctx: typer.Context,
1303
- rules: RulesJSONOption = '{}',
1304
- source: StreamingSourceArg = '-',
1305
- source_format: SourceFormatOption | None = None,
1306
- source_type: SourceOverrideOption | None = None,
1307
- target: TargetPathOption | None = None,
1308
- ) -> int:
1309
- """
1310
- Validate data against JSON-described rules.
1311
-
1312
- Parameters
1313
- ----------
1314
- ctx : typer.Context
1315
- Typer execution context provided to the command.
1316
- rules : RulesJSONOption, optional
1317
- Validation rules as a JSON string.
1318
- source : StreamingSourceArg, optional
1319
- Data source (file path or ``-`` for stdin).
1320
- source_format : SourceFormatOption | None, optional
1321
- Optional stdin format hint (JSON or CSV) when SOURCE is ``-``.
1322
- source_type : SourceOverrideOption | None, optional
1323
- Override the inferred source type when heuristics fail.
1324
- target : TargetPathOption | None, optional
1325
- Optional output path. Use ``-`` for stdout.
1326
-
1327
- Returns
1328
- -------
1329
- int
1330
- Zero on success.
1331
- """
1332
- source_format = _optional_choice(
1333
- source_format,
1334
- _FORMAT_CHOICES,
1335
- label='source_format',
1336
- )
1337
- source_type = _optional_choice(
1338
- source_type,
1339
- _SOURCE_CHOICES,
1340
- label='source_type',
1341
- )
1342
- source_format_kwargs = _format_namespace_kwargs(
1343
- format_value=source_format,
1344
- default='json',
1345
- )
1346
-
1347
- state = _ensure_state(ctx)
1348
- resolved_source_type = source_type or _infer_resource_type_soft(source)
1349
-
1350
- _log_inferred_resource(
1351
- state,
1352
- role='source',
1353
- value=source,
1354
- resource_type=resolved_source_type,
1355
- )
1356
-
1357
- ns = _stateful_namespace(
1358
- state,
1359
- command='validate',
1360
- source=source,
1361
- source_type=resolved_source_type,
1362
- rules=json_type(rules), # convert CLI string to dict
1363
- target=target,
1364
- source_format=source_format,
1365
- **source_format_kwargs,
1366
- )
1367
- return int(cmd_validate(ns))