etlplus 0.4.7__py3-none-any.whl → 0.8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
etlplus/cli/app.py DELETED
@@ -1,1239 +0,0 @@
1
- """
2
- :mod:`etlplus.cli.app` module.
3
-
4
- Defines the main `Typer` application for the ``etlplus`` command-line
5
- interface (CLI).
6
-
7
- Typer-First Interface
8
- ---------------------
9
- The CLI is implemented using `Typer` (Click) for parsing, help text, and
10
- subcommand dispatch. The Typer layer focuses on ergonomics (git-style
11
- subcommands, optional inference of resource types, stdin/stdout piping, and
12
- quality-of-life flags), while delegating business logic to the existing
13
- ``cmd_*`` handlers.
14
-
15
- Namespace Adapter
16
- -----------------
17
- The command handlers continue to accept an ``argparse.Namespace`` for
18
- backwards compatibility with existing ``cmd_*`` functions and tests. The
19
- Typer commands adapt parsed arguments into an ``argparse.Namespace`` and then
20
- call the corresponding ``cmd_*`` handler.
21
-
22
- Subcommands
23
- -----------
24
- - ``extract``: extract data from files, databases, or REST APIs
25
- - ``validate``: validate data against rules
26
- - ``transform``: transform records
27
- - ``load``: load data to files, databases, or REST APIs
28
-
29
- Notes
30
- -----
31
- - Use ``-`` to read from stdin or to write to stdout.
32
- - Commands ``extract`` and ``transform`` support the command-line option
33
- ``--from`` to override inferred resource types.
34
- - Commands ``transform`` and ``load`` support the command-line option ``--to``
35
- to override inferred resource types.
36
- """
37
-
38
- # Pylint struggles with large CLI surfaces that legitimately require
39
- # numerous arguments in a single module.
40
- # pylint: disable=too-many-lines
41
- # pylint: disable=too-many-arguments,too-many-positional-arguments
42
-
43
- from __future__ import annotations
44
-
45
- import argparse
46
- import sys
47
- from collections.abc import Collection
48
- from dataclasses import dataclass
49
- from pathlib import Path
50
- from typing import Annotated
51
- from typing import Final
52
-
53
- import typer
54
-
55
- from .. import __version__
56
- from ..enums import DataConnectorType
57
- from ..enums import FileFormat
58
- from ..utils import json_type
59
- from .handlers import cmd_extract
60
- from .handlers import cmd_list
61
- from .handlers import cmd_load
62
- from .handlers import cmd_pipeline
63
- from .handlers import cmd_run
64
- from .handlers import cmd_transform
65
- from .handlers import cmd_validate
66
-
67
- # SECTION: EXPORTS ========================================================== #
68
-
69
-
70
- __all__ = [
71
- # Apps
72
- 'app',
73
- ]
74
-
75
-
76
- # SECTION: INTERNAL CONSTANTS =============================================== #
77
-
78
-
79
- _DB_SCHEMES = (
80
- 'postgres://',
81
- 'postgresql://',
82
- 'mysql://',
83
- )
84
-
85
- _SOURCE_CHOICES: Final[frozenset[str]] = frozenset(DataConnectorType.choices())
86
- _FORMAT_CHOICES: Final[frozenset[str]] = frozenset(FileFormat.choices())
87
-
88
-
89
- # SECTION: CONSTANTS ======================================================== #
90
-
91
-
92
- CLI_DESCRIPTION: Final[str] = '\n'.join(
93
- [
94
- 'ETLPlus - A Swiss Army knife for simple ETL operations.',
95
- '',
96
- ' Provide a subcommand and options. Examples:',
97
- '',
98
- ' etlplus extract in.csv > out.json',
99
- ' etlplus validate in.json --rules \'{"required": ["id"]}\'',
100
- (
101
- ' etlplus transform --from file in.json '
102
- '--operations \'{"select": ["id"]}\' --to file -o out.json'
103
- ),
104
- ' etlplus extract in.csv | etlplus load --to file out.json',
105
- ' cat data.json | etlplus load --to api https://example.com/data',
106
- '',
107
- ' Override format inference when extensions are misleading:',
108
- '',
109
- ' etlplus extract data.txt --source-format csv',
110
- ' etlplus load payload.bin --target-format json',
111
- ],
112
- )
113
-
114
- CLI_EPILOG: Final[str] = '\n'.join(
115
- [
116
- 'Tip:',
117
- ' --source-format and --target-format override format inference '
118
- 'based on filename extensions when needed.',
119
- ],
120
- )
121
-
122
- PROJECT_URL: Final[str] = 'https://github.com/Dagitali/ETLPlus'
123
-
124
-
125
- # SECTION: TYPE ALIASES ==================================================== #
126
-
127
-
128
- SourceInputArg = Annotated[
129
- str,
130
- typer.Argument(
131
- ...,
132
- metavar='SOURCE',
133
- help=(
134
- 'Extract from SOURCE. Use --from/--source-type to override the '
135
- 'inferred connector when needed.'
136
- ),
137
- ),
138
- ]
139
-
140
- StreamingSourceArg = Annotated[
141
- str,
142
- typer.Argument(
143
- ...,
144
- metavar='SOURCE',
145
- help=(
146
- 'Data source to transform or validate (path, JSON payload, or '
147
- '- for stdin).'
148
- ),
149
- ),
150
- ]
151
-
152
- TargetInputArg = Annotated[
153
- str,
154
- typer.Argument(
155
- ...,
156
- metavar='TARGET',
157
- help=(
158
- 'Load JSON data from stdin into TARGET. Use --to/--target-type '
159
- 'to override connector inference when needed. Source data must '
160
- 'be piped into stdin.'
161
- ),
162
- ),
163
- ]
164
-
165
- SourceOverrideOption = Annotated[
166
- str | None,
167
- typer.Option(
168
- '--source-type',
169
- metavar='CONNECTOR',
170
- show_default=False,
171
- rich_help_panel='I/O overrides',
172
- help='Override the inferred source type (file, database, api).',
173
- ),
174
- ]
175
-
176
- TargetOverrideOption = Annotated[
177
- str | None,
178
- typer.Option(
179
- '--target-type',
180
- metavar='CONNECTOR',
181
- show_default=False,
182
- rich_help_panel='I/O overrides',
183
- help='Override the inferred target type (file, database, api).',
184
- ),
185
- ]
186
-
187
- SourceFormatOption = Annotated[
188
- str | None,
189
- typer.Option(
190
- '--source-format',
191
- metavar='FORMAT',
192
- show_default=False,
193
- rich_help_panel='Format overrides',
194
- help=(
195
- 'Input payload format when SOURCE is - or an inline payload. '
196
- 'File sources infer format from the extension.'
197
- ),
198
- ),
199
- ]
200
-
201
- StdinFormatOption = Annotated[
202
- str | None,
203
- typer.Option(
204
- '--source-format',
205
- metavar='FORMAT',
206
- show_default=False,
207
- rich_help_panel='Format overrides',
208
- help='Input payload format when reading from stdin (default: json).',
209
- ),
210
- ]
211
-
212
- TargetFormatOption = Annotated[
213
- str | None,
214
- typer.Option(
215
- '--target-format',
216
- metavar='FORMAT',
217
- show_default=False,
218
- rich_help_panel='Format overrides',
219
- help=(
220
- 'Payload format when TARGET is - or a non-file connector. File '
221
- 'targets infer format from the extension.'
222
- ),
223
- ),
224
- ]
225
-
226
- OperationsJSONOption = Annotated[
227
- str,
228
- typer.Option(
229
- '--operations',
230
- help='Transformation operations as JSON string.',
231
- ),
232
- ]
233
-
234
- RulesJSONOption = Annotated[
235
- str,
236
- typer.Option(
237
- '--rules',
238
- help='Validation rules as JSON string.',
239
- ),
240
- ]
241
-
242
- TargetPathOption = Annotated[
243
- str | None,
244
- typer.Option(
245
- '--target',
246
- metavar='PATH',
247
- help='Target file for transformed or validated output (- for stdout).',
248
- ),
249
- ]
250
-
251
- PipelineConfigOption = Annotated[
252
- str,
253
- typer.Option(
254
- ...,
255
- '--config',
256
- metavar='PATH',
257
- help='Path to pipeline YAML configuration file.',
258
- ),
259
- ]
260
-
261
-
262
- # SECTION: DATA CLASSES ===================================================== #
263
-
264
-
265
- @dataclass(slots=True)
266
- class CliState:
267
- """Mutable container for runtime CLI toggles."""
268
-
269
- pretty: bool = True
270
- quiet: bool = False
271
- verbose: bool = False
272
-
273
-
274
- # SECTION: INTERNAL FUNCTIONS =============================================== #
275
-
276
-
277
- def _ensure_state(
278
- ctx: typer.Context,
279
- ) -> CliState:
280
- """
281
- Return the :class:`CliState` stored on the :mod:`typer` context.
282
-
283
- Parameters
284
- ----------
285
- ctx : typer.Context
286
- Typer execution context provided to the command.
287
-
288
- Returns
289
- -------
290
- CliState
291
- Mutable CLI flag container stored on ``ctx``.
292
- """
293
- if not isinstance(getattr(ctx, 'obj', None), CliState):
294
- ctx.obj = CliState()
295
- return ctx.obj
296
-
297
-
298
- def _format_namespace_kwargs(
299
- *,
300
- format_value: str | None,
301
- default: str,
302
- ) -> dict[str, object]:
303
- """
304
- Return common namespace kwargs for format handling.
305
-
306
- Parameters
307
- ----------
308
- format_value : str | None
309
- User-provided format value from the CLI option.
310
- default : str
311
- Default format to use when none is provided.
312
-
313
- Returns
314
- -------
315
- dict[str, object]
316
- Keyword arguments for format-related namespace attributes.
317
- """
318
- return {
319
- 'format': (format_value or default),
320
- '_format_explicit': (format_value is not None),
321
- }
322
-
323
-
324
- def _infer_resource_type(
325
- value: str,
326
- ) -> str:
327
- """
328
- Infer the resource type from a path, URL, or DSN string.
329
-
330
- Parameters
331
- ----------
332
- value : str
333
- Raw CLI argument that represents a source or target.
334
-
335
- Returns
336
- -------
337
- str
338
- One of ``file``, ``database``, or ``api`` based on heuristics.
339
-
340
- Raises
341
- ------
342
- ValueError
343
- If the resource type could not be inferred.
344
- """
345
- val = (value or '').strip()
346
- low = val.lower()
347
-
348
- match (val, low):
349
- case ('-', _):
350
- return 'file'
351
- case (_, inferred) if inferred.startswith(('http://', 'https://')):
352
- return 'api'
353
- case (_, inferred) if inferred.startswith(_DB_SCHEMES):
354
- return 'database'
355
-
356
- path = Path(val)
357
- if path.exists() or path.suffix:
358
- return 'file'
359
-
360
- raise ValueError(
361
- 'Could not infer resource type. Use --from/--to to specify it.',
362
- )
363
-
364
-
365
- def _infer_resource_type_or_exit(
366
- value: str,
367
- ) -> str:
368
- """Infer a resource type and map ``ValueError`` to ``BadParameter``.
369
-
370
- Parameters
371
- ----------
372
- value : str
373
- CLI value describing a source/target.
374
-
375
- Returns
376
- -------
377
- str
378
- Inferred resource type.
379
-
380
- Raises
381
- ------
382
- typer.BadParameter
383
- If heuristics fail to infer a resource type.
384
- """
385
- try:
386
- return _infer_resource_type(value)
387
- except ValueError as exc: # pragma: no cover - exercised indirectly
388
- raise typer.BadParameter(str(exc)) from exc
389
-
390
-
391
- def _infer_resource_type_soft(
392
- value: str | None,
393
- ) -> str | None:
394
- """
395
- Make a best-effort inference that tolerates inline payloads.
396
-
397
- Parameters
398
- ----------
399
- value : str | None
400
- CLI value describing a source/target.
401
-
402
- Returns
403
- -------
404
- str | None
405
- Inferred resource type, or ``None`` if inference failed.
406
- """
407
- if value is None:
408
- return None
409
- try:
410
- return _infer_resource_type(value)
411
- except ValueError:
412
- return None
413
-
414
-
415
- def _log_inferred_resource(
416
- state: CliState,
417
- *,
418
- role: str,
419
- value: str,
420
- resource_type: str | None,
421
- ) -> None:
422
- """
423
- Emit a uniform verbose message for inferred resource types.
424
-
425
- Parameters
426
- ----------
427
- state : CliState
428
- Current CLI state stored on the Typer context.
429
- role : str
430
- Friendly label for the resource (e.g., ``source`` or ``target``).
431
- value : str
432
- Resource value provided on the CLI.
433
- resource_type : str | None
434
- Inferred resource type or ``None`` if not inferred.
435
- """
436
- if not state.verbose or resource_type is None:
437
- return
438
- print(
439
- f'Inferred {role}_type={resource_type} for {role}={value}',
440
- file=sys.stderr,
441
- )
442
-
443
-
444
- def _ns(
445
- **kwargs: object,
446
- ) -> argparse.Namespace:
447
- """Build an :class:`argparse.Namespace` for the legacy handlers.
448
-
449
- Parameters
450
- ----------
451
- **kwargs : object
452
- Attributes applied to the resulting namespace.
453
-
454
- Returns
455
- -------
456
- argparse.Namespace
457
- Namespace compatible with the ``cmd_*`` handler signatures.
458
- """
459
- return argparse.Namespace(**kwargs)
460
-
461
-
462
- def _optional_choice(
463
- value: str | None,
464
- choices: Collection[str],
465
- *,
466
- label: str,
467
- ) -> str | None:
468
- """
469
- Validate optional CLI choice inputs while preserving ``None``.
470
-
471
- Parameters
472
- ----------
473
- value : str | None
474
- Candidate value provided by the CLI option.
475
- choices : Collection[str]
476
- Allowed options for the parameter.
477
- label : str
478
- Friendly label rendered in error messages.
479
-
480
- Returns
481
- -------
482
- str | None
483
- Sanitized choice or ``None`` when the option is omitted.
484
- """
485
- if value is None:
486
- return None
487
- return _validate_choice(value, choices, label=label)
488
-
489
-
490
- def _resolve_resource_type(
491
- *,
492
- explicit_type: str | None,
493
- override_type: str | None,
494
- value: str,
495
- label: str,
496
- conflict_error: str | None = None,
497
- legacy_file_error: str | None = None,
498
- ) -> str:
499
- """
500
- Resolve resource type preference order and validate it.
501
-
502
- Parameters
503
- ----------
504
- explicit_type : str | None
505
- Explicit resource type provided by the user.
506
- override_type : str | None
507
- Resource type provided by an overriding option.
508
- value : str
509
- Resource value to infer type from if no explicit or override type is
510
- given.
511
- label : str
512
- Friendly label for error messages.
513
- conflict_error : str | None
514
- Error message to raise if there is a conflict between explicit and
515
- override types.
516
- legacy_file_error : str | None
517
- Error message to raise if the explicit type is a legacy 'file' type.
518
-
519
- Returns
520
- -------
521
- str
522
- Resolved and validated resource type.
523
-
524
- Raises
525
- ------
526
- typer.BadParameter
527
- If there is a conflict between explicit and override types, or if the
528
- explicit type is a legacy 'file' type.
529
- """
530
- if explicit_type is not None:
531
- if override_type is not None and conflict_error:
532
- raise typer.BadParameter(conflict_error)
533
- if legacy_file_error and explicit_type.strip().lower() == 'file':
534
- raise typer.BadParameter(legacy_file_error)
535
- candidate = explicit_type
536
- else:
537
- candidate = override_type or _infer_resource_type_or_exit(value)
538
- return _validate_choice(candidate, _SOURCE_CHOICES, label=label)
539
-
540
-
541
- def _stateful_namespace(
542
- state: CliState,
543
- *,
544
- command: str,
545
- **kwargs: object,
546
- ) -> argparse.Namespace:
547
- """
548
- Attach CLI state toggles to a handler namespace.
549
-
550
- Parameters
551
- ----------
552
- state : CliState
553
- Current CLI state stored on the Typer context.
554
- command : str
555
- Logical command name (e.g., ``extract``).
556
- **kwargs : object
557
- Additional attributes required by the handler.
558
-
559
- Returns
560
- -------
561
- argparse.Namespace
562
- Namespace compatible with the ``cmd_*`` handler signatures.
563
- """
564
- return _ns(
565
- command=command,
566
- pretty=state.pretty,
567
- quiet=state.quiet,
568
- verbose=state.verbose,
569
- **kwargs,
570
- )
571
-
572
-
573
- def _validate_choice(
574
- value: str,
575
- choices: Collection[str],
576
- *,
577
- label: str,
578
- ) -> str:
579
- """
580
- Validate CLI input against a whitelist of choices.
581
-
582
- Parameters
583
- ----------
584
- value : str
585
- Candidate value from the CLI option or argument.
586
- choices: Collection[str]
587
- Allowed values for the option.
588
- label : str
589
- Friendly label rendered in the validation error message.
590
-
591
- Returns
592
- -------
593
- str
594
- Sanitized and validated value.
595
-
596
- Raises
597
- ------
598
- typer.BadParameter
599
- If ``value`` is not present in ``choices``.
600
- """
601
- v = (value or '').strip()
602
- if v in choices:
603
- return v
604
- allowed = ', '.join(sorted(choices))
605
- raise typer.BadParameter(
606
- f"Invalid {label} '{value}'. Choose from: {allowed}",
607
- )
608
-
609
-
610
- # SECTION: TYPER APP ======================================================== #
611
-
612
-
613
- # Typer application instance (subcommands are registered below).
614
- app = typer.Typer(
615
- name='etlplus',
616
- # help='ETLPlus - A Swiss Army knife for simple ETL operations.',
617
- help=CLI_DESCRIPTION,
618
- epilog=CLI_EPILOG,
619
- add_completion=True,
620
- no_args_is_help=False,
621
- rich_markup_mode='markdown',
622
- )
623
-
624
-
625
- @app.callback(invoke_without_command=True)
626
- def _root(
627
- ctx: typer.Context,
628
- version: bool = typer.Option(
629
- False,
630
- '--version',
631
- '-V',
632
- is_eager=True,
633
- help='Show the version and exit.',
634
- ),
635
- pretty: bool = typer.Option(
636
- True,
637
- '--pretty/--no-pretty',
638
- help='Pretty-print JSON output (default: pretty).',
639
- ),
640
- quiet: bool = typer.Option(
641
- False,
642
- '--quiet',
643
- '-q',
644
- help='Suppress warnings and non-essential output.',
645
- ),
646
- verbose: bool = typer.Option(
647
- False,
648
- '--verbose',
649
- '-v',
650
- help='Emit extra diagnostics to stderr.',
651
- ),
652
- ) -> None:
653
- """
654
- Seed the Typer context with runtime flags and handle root-only options.
655
-
656
- Parameters
657
- ----------
658
- ctx : typer.Context
659
- Typer execution context provided to the command.
660
- version : bool
661
- If True, print the etlplus version and exit.
662
- pretty : bool
663
- Whether to pretty-print JSON output.
664
- quiet : bool
665
- Whether to suppress warnings and non-essential output.
666
- verbose : bool
667
- Whether to emit extra diagnostics to stderr.
668
-
669
- Raises
670
- ------
671
- typer.Exit
672
- If ``--version`` is provided or no subcommand is invoked.
673
- """
674
- ctx.obj = CliState(pretty=pretty, quiet=quiet, verbose=verbose)
675
-
676
- if version:
677
- typer.echo(f'etlplus {__version__}')
678
- raise typer.Exit(0)
679
-
680
- if ctx.invoked_subcommand is None and not ctx.resilient_parsing:
681
- typer.echo(ctx.command.get_help(ctx))
682
- raise typer.Exit(0)
683
-
684
-
685
- @app.command('extract')
686
- def extract_cmd(
687
- ctx: typer.Context,
688
- source: SourceInputArg,
689
- source_format: SourceFormatOption | None = None,
690
- source_type: SourceOverrideOption | None = None,
691
- ) -> int:
692
- """
693
- Extract data from files, databases, or REST APIs.
694
-
695
- Parameters
696
- ----------
697
- ctx : typer.Context
698
- Typer execution context provided to the command.
699
- source : SourceInputArg
700
- Data source (file path, URL, DSN, or ``-`` for stdin).
701
- source_format : SourceFormatOption | None, optional
702
- Payload format when not a file.
703
- source_type : SourceOverrideOption | None, optional
704
- Override the inferred source type.
705
-
706
- Returns
707
- -------
708
- int
709
- Zero on success.
710
-
711
- Examples
712
- --------
713
- - Extract from a file (type inferred):
714
- etlplus extract in.csv
715
- - Extract from a file (explicit via flag):
716
- etlplus extract --from file in.csv
717
- - Extract from an API:
718
- etlplus extract https://example.com/data.json
719
- etlplus extract --from api https://example.com/data.json
720
- - Extract from a database DSN:
721
- etlplus extract --from database postgresql://user:pass@host/db
722
- - Pipe into transform/load:
723
- etlplus extract in.csv \
724
- | etlplus transform --operations '{"select":["a"]}'
725
-
726
- Notes
727
- -----
728
- - The ``extract`` command always writes JSON to stdout.
729
- - CSV output is unsupported for this command.
730
- - Use shell redirection (``>``) or pipelines to persist the output.
731
- """
732
- state = _ensure_state(ctx)
733
-
734
- source_type = _optional_choice(
735
- source_type,
736
- _SOURCE_CHOICES,
737
- label='source_type',
738
- )
739
- source_format = _optional_choice(
740
- source_format,
741
- _FORMAT_CHOICES,
742
- label='source_format',
743
- )
744
-
745
- resolved_source = source
746
- resolved_source_type = source_type or _infer_resource_type_or_exit(
747
- resolved_source,
748
- )
749
-
750
- _log_inferred_resource(
751
- state,
752
- role='source',
753
- value=resolved_source,
754
- resource_type=resolved_source_type,
755
- )
756
-
757
- format_kwargs = _format_namespace_kwargs(
758
- format_value=source_format,
759
- default='json',
760
- )
761
- ns = _stateful_namespace(
762
- state,
763
- command='extract',
764
- source_type=resolved_source_type,
765
- source=resolved_source,
766
- **format_kwargs,
767
- )
768
- return int(cmd_extract(ns))
769
-
770
-
771
- @app.command('list')
772
- def list_cmd(
773
- ctx: typer.Context,
774
- config: PipelineConfigOption,
775
- jobs: bool = typer.Option(
776
- False,
777
- '--jobs',
778
- help='List available job names and exit',
779
- ),
780
- pipelines: bool = typer.Option(
781
- False,
782
- '--pipelines',
783
- help='List ETL pipelines',
784
- ),
785
- sources: bool = typer.Option(False, '--sources', help='List data sources'),
786
- targets: bool = typer.Option(False, '--targets', help='List data targets'),
787
- transforms: bool = typer.Option(
788
- False,
789
- '--transforms',
790
- help='List data transforms',
791
- ),
792
- ) -> int:
793
- """
794
- Print ETL entities from a pipeline YAML configuration.
795
-
796
- Parameters
797
- ----------
798
- ctx : typer.Context
799
- Typer execution context provided to the command.
800
- config : PipelineConfigOption
801
- Path to pipeline YAML configuration file.
802
- jobs : bool, optional
803
- If True, list available job names and exit.
804
- pipelines : bool, optional
805
- If True, list ETL pipelines.
806
- sources : bool, optional
807
- If True, list data sources.
808
- targets : bool, optional
809
- If True, list data targets.
810
- transforms : bool, optional
811
- If True, list data transforms.
812
-
813
- Returns
814
- -------
815
- int
816
- Zero on success.
817
- """
818
- state = _ensure_state(ctx)
819
- ns = _stateful_namespace(
820
- state,
821
- command='list',
822
- config=config,
823
- pipelines=pipelines,
824
- jobs=jobs,
825
- sources=sources,
826
- targets=targets,
827
- transforms=transforms,
828
- )
829
- return int(cmd_list(ns))
830
-
831
-
832
- @app.command('load')
833
- def load_cmd(
834
- ctx: typer.Context,
835
- target: TargetInputArg,
836
- source_format: StdinFormatOption | None = None,
837
- target_format: TargetFormatOption | None = None,
838
- target_type: TargetOverrideOption | None = None,
839
- ) -> int:
840
- """
841
- Load data into a file, database, or REST API.
842
-
843
- Parameters
844
- ----------
845
- ctx : typer.Context
846
- Typer execution context provided to the command.
847
- target : TargetInputArg
848
- Load destination (file path, URL/DSN, or ``-`` for stdout).
849
- source_format : StdinFormatOption | None, optional
850
- Hint for parsing stdin payloads (json or csv).
851
- target_format : TargetFormatOption | None, optional
852
- Payload format when not a file target (or when TARGET is ``-``).
853
- target_type : TargetOverrideOption | None, optional
854
- Override the inferred target type.
855
-
856
- Returns
857
- -------
858
- int
859
- Zero on success.
860
-
861
- Examples
862
- --------
863
- - Pipe into a file:
864
- etlplus extract in.csv \
865
- | etlplus transform --operations '{"select":["a"]}' \
866
- | etlplus load --to file out.json
867
- - Read from stdin and write to a file:
868
- etlplus load out.json
869
- - Write to stdout:
870
- etlplus load --to file -
871
-
872
- Notes
873
- -----
874
- - The ``load`` command reads JSON from stdin.
875
- - CSV input is unsupported unless ``--source-format csv`` is provided.
876
- - Convert upstream before piping into ``load`` when working with other
877
- formats.
878
- """
879
- state = _ensure_state(ctx)
880
-
881
- source_format = _optional_choice(
882
- source_format,
883
- _FORMAT_CHOICES,
884
- label='source_format',
885
- )
886
- target_type = _optional_choice(
887
- target_type,
888
- _SOURCE_CHOICES,
889
- label='target_type',
890
- )
891
- target_format = _optional_choice(
892
- target_format,
893
- _FORMAT_CHOICES,
894
- label='target_format',
895
- )
896
-
897
- resolved_target = target
898
- resolved_target_type = target_type or _infer_resource_type_or_exit(
899
- resolved_target,
900
- )
901
-
902
- resolved_source_value = '-'
903
- resolved_source_type = _infer_resource_type_soft(resolved_source_value)
904
-
905
- _log_inferred_resource(
906
- state,
907
- role='source',
908
- value=resolved_source_value,
909
- resource_type=resolved_source_type,
910
- )
911
- _log_inferred_resource(
912
- state,
913
- role='target',
914
- value=resolved_target,
915
- resource_type=resolved_target_type,
916
- )
917
-
918
- format_kwargs = _format_namespace_kwargs(
919
- format_value=target_format,
920
- default='json',
921
- )
922
- ns = _stateful_namespace(
923
- state,
924
- command='load',
925
- source=resolved_source_value,
926
- source_format=source_format,
927
- target_type=resolved_target_type,
928
- target=resolved_target,
929
- **format_kwargs,
930
- )
931
- return int(cmd_load(ns))
932
-
933
-
934
- @app.command('pipeline')
935
- def pipeline_cmd(
936
- ctx: typer.Context,
937
- config: PipelineConfigOption,
938
- job: str | None = typer.Option(
939
- None,
940
- '--job',
941
- metavar='JOB',
942
- help='Run a specific job by name',
943
- ),
944
- jobs: bool = typer.Option(
945
- False,
946
- '--jobs',
947
- help='List available job names and exit',
948
- ),
949
- pipeline: str | None = typer.Option(
950
- None,
951
- '--pipeline',
952
- help='Run a specific pipeline by name',
953
- ),
954
- ) -> int:
955
- """
956
- Inspect or run a pipeline YAML configuration.
957
-
958
- Parameters
959
- ----------
960
- ctx : typer.Context
961
- Typer execution context provided to the command.
962
- config : PipelineConfigOption
963
- Path to pipeline YAML configuration file.
964
- job : str | None, optional
965
- Name of a specific job to run.
966
- jobs : bool, optional
967
- If True, list available job names and exit.
968
- pipeline : str | None, optional
969
- Name of a specific pipeline to run.
970
-
971
- Returns
972
- -------
973
- int
974
- Zero on success.
975
- """
976
- state = _ensure_state(ctx)
977
- run_target = job or pipeline
978
- ns = _stateful_namespace(
979
- state,
980
- command='pipeline',
981
- config=config,
982
- list=jobs,
983
- run=run_target,
984
- )
985
- return int(cmd_pipeline(ns))
986
-
987
-
988
- @app.command('run')
989
- def run_cmd(
990
- ctx: typer.Context,
991
- config: PipelineConfigOption,
992
- job: str | None = typer.Option(
993
- None,
994
- '-j',
995
- '--job',
996
- help='Name of the job to run',
997
- ),
998
- pipeline: str | None = typer.Option(
999
- None,
1000
- '-p',
1001
- '--pipeline',
1002
- help='Name of the pipeline to run',
1003
- ),
1004
- ) -> int:
1005
- """
1006
- Execute an ETL job or pipeline from a YAML configuration.
1007
-
1008
- Parameters
1009
- ----------
1010
- ctx : typer.Context
1011
- Typer execution context provided to the command.
1012
- config : PipelineConfigOption
1013
- Path to pipeline YAML configuration file.
1014
- job : str | None, optional
1015
- Name of the job to run.
1016
- pipeline : str | None, optional
1017
- Name of the pipeline to run.
1018
-
1019
- Returns
1020
- -------
1021
- int
1022
- Zero on success.
1023
- """
1024
- state = _ensure_state(ctx)
1025
- ns = _stateful_namespace(
1026
- state,
1027
- command='run',
1028
- config=config,
1029
- job=job,
1030
- pipeline=pipeline,
1031
- )
1032
- return int(cmd_run(ns))
1033
-
1034
-
1035
- @app.command('transform')
1036
- def transform_cmd(
1037
- ctx: typer.Context,
1038
- operations: OperationsJSONOption = '{}',
1039
- source: StreamingSourceArg = '-',
1040
- source_format: SourceFormatOption | None = None,
1041
- source_type: SourceOverrideOption | None = None,
1042
- target: TargetPathOption | None = None,
1043
- target_format: TargetFormatOption | None = None,
1044
- target_type: TargetOverrideOption | None = None,
1045
- ) -> int:
1046
- """
1047
- Transform records using JSON-described operations.
1048
-
1049
- Parameters
1050
- ----------
1051
- ctx : typer.Context
1052
- Typer execution context provided to the command.
1053
- operations : OperationsJSONOption, optional
1054
- Transformation operations as a JSON string.
1055
- source : StreamingSourceArg, optional
1056
- Data source (file path or ``-`` for stdin).
1057
- source_format : SourceFormatOption | None, optional
1058
- Input payload format when not a file (or when SOURCE is -).
1059
- source_type : SourceOverrideOption | None, optional
1060
- Override the inferred source type.
1061
- target : TargetPathOption | None, optional
1062
- Optional output path. Use ``-`` for stdout.
1063
- target_format : TargetFormatOption | None, optional
1064
- Output payload format when not a file target (or when OUTPUT is -).
1065
- Accepts ``--target-format``.
1066
- target_type : TargetOverrideOption | None, optional
1067
- Override the inferred target type.
1068
-
1069
- Returns
1070
- -------
1071
- int
1072
- Zero on success.
1073
-
1074
- Examples
1075
- --------
1076
- - Transform data from a file and write to another file:
1077
- etlplus transform --from file in.json \
1078
- --operations '{"select": ["id", "name"]}' \
1079
- --to file out.json
1080
- - Transform data from stdin and write to stdout:
1081
- cat in.json \
1082
- | etlplus transform \
1083
- --operations '{"filter": {"field": "age", "gt": 30}}'
1084
- - Transform data from a file and write to stdout:
1085
- etlplus transform --from file in.csv \
1086
- --source-format csv \
1087
- --operations '{"select": ["id", "email"]}'
1088
- - Transform data from stdin and write to a file:
1089
- cat in.json \
1090
- | etlplus transform --operations '{"sort": ["-created_at"]}' \
1091
- --to file out.json
1092
-
1093
- Notes
1094
- -----
1095
- - The ``transform`` command reads JSON from stdin when SOURCE is ``-``.
1096
- - CSV input is unsupported for this command.
1097
- - Convert upstream before piping into ``transform``.
1098
- """
1099
- state = _ensure_state(ctx)
1100
-
1101
- source_format = _optional_choice(
1102
- source_format,
1103
- _FORMAT_CHOICES,
1104
- label='source_format',
1105
- )
1106
- source_type = _optional_choice(
1107
- source_type,
1108
- _SOURCE_CHOICES,
1109
- label='source_type',
1110
- )
1111
- target_format = _optional_choice(
1112
- target_format,
1113
- _FORMAT_CHOICES,
1114
- label='target_format',
1115
- )
1116
- target_format_kwargs = _format_namespace_kwargs(
1117
- format_value=target_format,
1118
- default='json',
1119
- )
1120
- target_type = _optional_choice(
1121
- target_type,
1122
- _SOURCE_CHOICES,
1123
- label='target_type',
1124
- )
1125
-
1126
- resolved_source_type = source_type or _infer_resource_type_soft(source)
1127
- resolved_source_value = source if source is not None else '-'
1128
- resolved_target_value = target if target is not None else '-'
1129
-
1130
- if resolved_source_type is not None:
1131
- resolved_source_type = _validate_choice(
1132
- resolved_source_type,
1133
- _SOURCE_CHOICES,
1134
- label='source_type',
1135
- )
1136
-
1137
- resolved_target_type = _resolve_resource_type(
1138
- explicit_type=None,
1139
- override_type=target_type,
1140
- value=resolved_target_value,
1141
- label='target_type',
1142
- )
1143
-
1144
- _log_inferred_resource(
1145
- state,
1146
- role='source',
1147
- value=resolved_source_value,
1148
- resource_type=resolved_source_type,
1149
- )
1150
- _log_inferred_resource(
1151
- state,
1152
- role='target',
1153
- value=resolved_target_value,
1154
- resource_type=resolved_target_type,
1155
- )
1156
-
1157
- ns = _stateful_namespace(
1158
- state,
1159
- command='transform',
1160
- source=resolved_source_value,
1161
- source_type=resolved_source_type,
1162
- operations=json_type(operations),
1163
- target=resolved_target_value,
1164
- source_format=source_format,
1165
- target_type=resolved_target_type,
1166
- target_format=target_format_kwargs['format'],
1167
- **target_format_kwargs,
1168
- )
1169
- return int(cmd_transform(ns))
1170
-
1171
-
1172
- @app.command('validate')
1173
- def validate_cmd(
1174
- ctx: typer.Context,
1175
- rules: RulesJSONOption = '{}',
1176
- source: StreamingSourceArg = '-',
1177
- source_format: SourceFormatOption | None = None,
1178
- source_type: SourceOverrideOption | None = None,
1179
- target: TargetPathOption | None = None,
1180
- ) -> int:
1181
- """
1182
- Validate data against JSON-described rules.
1183
-
1184
- Parameters
1185
- ----------
1186
- ctx : typer.Context
1187
- Typer execution context provided to the command.
1188
- rules : RulesJSONOption, optional
1189
- Validation rules as a JSON string.
1190
- source : StreamingSourceArg, optional
1191
- Data source (file path or ``-`` for stdin).
1192
- source_format : SourceFormatOption | None, optional
1193
- Optional stdin format hint (JSON or CSV) when SOURCE is ``-``.
1194
- source_type : SourceOverrideOption | None, optional
1195
- Override the inferred source type when heuristics fail.
1196
- target : TargetPathOption | None, optional
1197
- Optional output path. Use ``-`` for stdout.
1198
-
1199
- Returns
1200
- -------
1201
- int
1202
- Zero on success.
1203
- """
1204
- source_format = _optional_choice(
1205
- source_format,
1206
- _FORMAT_CHOICES,
1207
- label='source_format',
1208
- )
1209
- source_type = _optional_choice(
1210
- source_type,
1211
- _SOURCE_CHOICES,
1212
- label='source_type',
1213
- )
1214
- source_format_kwargs = _format_namespace_kwargs(
1215
- format_value=source_format,
1216
- default='json',
1217
- )
1218
-
1219
- state = _ensure_state(ctx)
1220
- resolved_source_type = source_type or _infer_resource_type_soft(source)
1221
-
1222
- _log_inferred_resource(
1223
- state,
1224
- role='source',
1225
- value=source,
1226
- resource_type=resolved_source_type,
1227
- )
1228
-
1229
- ns = _stateful_namespace(
1230
- state,
1231
- command='validate',
1232
- source=source,
1233
- source_type=resolved_source_type,
1234
- rules=json_type(rules), # convert CLI string to dict
1235
- target=target,
1236
- source_format=source_format,
1237
- **source_format_kwargs,
1238
- )
1239
- return int(cmd_validate(ns))