etlplus 0.8.0__py3-none-any.whl → 0.8.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
etlplus/cli/app.py DELETED
@@ -1,1312 +0,0 @@
1
- """
2
- :mod:`etlplus.cli.app` module.
3
-
4
- Defines the main `Typer` application for the ``etlplus`` command-line
5
- interface (CLI).
6
-
7
- Typer-First Interface
8
- ---------------------
9
- The CLI is implemented using `Typer` (Click) for parsing, help text, and
10
- subcommand dispatch. The Typer layer focuses on ergonomics (git-style
11
- subcommands, optional inference of resource types, stdin/stdout piping, and
12
- quality-of-life flags), while delegating business logic to the existing
13
- ``cmd_*`` handlers.
14
-
15
- Namespace Adapter
16
- -----------------
17
- The command handlers continue to accept an ``argparse.Namespace`` for
18
- backwards compatibility with existing ``cmd_*`` functions and tests. The
19
- Typer commands adapt parsed arguments into an ``argparse.Namespace`` and then
20
- call the corresponding ``cmd_*`` handler.
21
-
22
- Subcommands
23
- -----------
24
- - ``extract``: extract data from files, databases, or REST APIs
25
- - ``validate``: validate data against rules
26
- - ``transform``: transform records
27
- - ``load``: load data to files, databases, or REST APIs
28
- - ``render``: render SQL DDL from table schema specs
29
-
30
- Notes
31
- -----
32
- - Use ``-`` to read from stdin or to write to stdout.
33
- - Commands ``extract`` and ``transform`` support the command-line option
34
- ``--from`` to override inferred resource types.
35
- - Commands ``transform`` and ``load`` support the command-line option ``--to``
36
- to override inferred resource types.
37
- """
38
-
39
- # Pylint struggles with large CLI surfaces that legitimately require
40
- # numerous arguments in a single module.
41
- # pylint: disable=too-many-lines
42
- # pylint: disable=too-many-arguments,too-many-positional-arguments
43
-
44
- from __future__ import annotations
45
-
46
- import argparse
47
- import sys
48
- from collections.abc import Collection
49
- from dataclasses import dataclass
50
- from pathlib import Path
51
- from typing import Annotated
52
- from typing import Final
53
-
54
- import typer
55
-
56
- from .. import __version__
57
- from ..enums import DataConnectorType
58
- from ..enums import FileFormat
59
- from ..utils import json_type
60
- from .handlers import check_handler
61
- from .handlers import extract_handler
62
- from .handlers import load_handler
63
- from .handlers import render_handler
64
- from .handlers import run_handler
65
- from .handlers import transform_handler
66
- from .handlers import validate_handler
67
-
68
- # SECTION: EXPORTS ========================================================== #
69
-
70
-
71
- __all__ = [
72
- # Apps
73
- 'app',
74
- ]
75
-
76
-
77
- # SECTION: INTERNAL CONSTANTS =============================================== #
78
-
79
-
80
- _DB_SCHEMES = (
81
- 'postgres://',
82
- 'postgresql://',
83
- 'mysql://',
84
- )
85
-
86
- _SOURCE_CHOICES: Final[frozenset[str]] = frozenset(DataConnectorType.choices())
87
- _FORMAT_CHOICES: Final[frozenset[str]] = frozenset(FileFormat.choices())
88
-
89
-
90
- # SECTION: CONSTANTS ======================================================== #
91
-
92
-
93
- CLI_DESCRIPTION: Final[str] = '\n'.join(
94
- [
95
- 'ETLPlus - A Swiss Army knife for simple ETL operations.',
96
- '',
97
- ' Provide a subcommand and options. Examples:',
98
- '',
99
- ' etlplus extract in.csv > out.json',
100
- ' etlplus validate in.json --rules \'{"required": ["id"]}\'',
101
- (
102
- ' etlplus transform --from file in.json '
103
- '--operations \'{"select": ["id"]}\' --to file -o out.json'
104
- ),
105
- ' etlplus extract in.csv | etlplus load --to file out.json',
106
- ' cat data.json | etlplus load --to api https://example.com/data',
107
- '',
108
- ' Override format inference when extensions are misleading:',
109
- '',
110
- ' etlplus extract data.txt --source-format csv',
111
- ' etlplus load payload.bin --target-format json',
112
- ],
113
- )
114
-
115
- CLI_EPILOG: Final[str] = '\n'.join(
116
- [
117
- 'Tip:',
118
- ' --source-format and --target-format override format inference '
119
- 'based on filename extensions when needed.',
120
- ],
121
- )
122
-
123
- PROJECT_URL: Final[str] = 'https://github.com/Dagitali/ETLPlus'
124
-
125
-
126
- # SECTION: TYPE ALIASES ==================================================== #
127
-
128
-
129
- OperationsJSONOption = Annotated[
130
- str,
131
- typer.Option(
132
- '--operations',
133
- help='Transformation operations as JSON string.',
134
- ),
135
- ]
136
-
137
- PipelineConfigOption = Annotated[
138
- str,
139
- typer.Option(
140
- ...,
141
- '--config',
142
- metavar='PATH',
143
- help='Path to pipeline YAML configuration file.',
144
- ),
145
- ]
146
-
147
- RenderConfigOption = Annotated[
148
- str | None,
149
- typer.Option(
150
- '--config',
151
- metavar='PATH',
152
- help='Pipeline YAML that includes table_schemas for rendering.',
153
- show_default=False,
154
- ),
155
- ]
156
-
157
- RenderOutputOption = Annotated[
158
- str | None,
159
- typer.Option(
160
- '--output',
161
- '-o',
162
- metavar='PATH',
163
- help='Write rendered SQL to PATH (default: stdout).',
164
- ),
165
- ]
166
-
167
- RenderSpecOption = Annotated[
168
- str | None,
169
- typer.Option(
170
- '--spec',
171
- metavar='PATH',
172
- help='Standalone table spec file (.yml/.yaml/.json).',
173
- show_default=False,
174
- ),
175
- ]
176
-
177
- RenderTableOption = Annotated[
178
- str | None,
179
- typer.Option(
180
- '--table',
181
- metavar='NAME',
182
- help='Filter to a single table name from table_schemas.',
183
- ),
184
- ]
185
-
186
- RenderTemplateOption = Annotated[
187
- str,
188
- typer.Option(
189
- '--template',
190
- '-t',
191
- metavar='KEY|PATH',
192
- help='Template key (ddl/view) or path to a Jinja template file.',
193
- show_default=True,
194
- ),
195
- ]
196
-
197
- RenderTemplatePathOption = Annotated[
198
- str | None,
199
- typer.Option(
200
- '--template-path',
201
- metavar='PATH',
202
- help=(
203
- 'Explicit path to a Jinja template file (overrides template key).'
204
- ),
205
- ),
206
- ]
207
-
208
- RulesJSONOption = Annotated[
209
- str,
210
- typer.Option(
211
- '--rules',
212
- help='Validation rules as JSON string.',
213
- ),
214
- ]
215
-
216
- SourceFormatOption = Annotated[
217
- str | None,
218
- typer.Option(
219
- '--source-format',
220
- metavar='FORMAT',
221
- show_default=False,
222
- rich_help_panel='Format overrides',
223
- help=(
224
- 'Input payload format when SOURCE is - or an inline payload. '
225
- 'File sources infer format from the extension.'
226
- ),
227
- ),
228
- ]
229
-
230
- SourceInputArg = Annotated[
231
- str,
232
- typer.Argument(
233
- ...,
234
- metavar='SOURCE',
235
- help=(
236
- 'Extract from SOURCE. Use --from/--source-type to override the '
237
- 'inferred connector when needed.'
238
- ),
239
- ),
240
- ]
241
-
242
- SourceOverrideOption = Annotated[
243
- str | None,
244
- typer.Option(
245
- '--source-type',
246
- metavar='CONNECTOR',
247
- show_default=False,
248
- rich_help_panel='I/O overrides',
249
- help='Override the inferred source type (file, database, api).',
250
- ),
251
- ]
252
-
253
- StdinFormatOption = Annotated[
254
- str | None,
255
- typer.Option(
256
- '--source-format',
257
- metavar='FORMAT',
258
- show_default=False,
259
- rich_help_panel='Format overrides',
260
- help='Input payload format when reading from stdin (default: json).',
261
- ),
262
- ]
263
-
264
- StreamingSourceArg = Annotated[
265
- str,
266
- typer.Argument(
267
- ...,
268
- metavar='SOURCE',
269
- help=(
270
- 'Data source to transform or validate (path, JSON payload, or '
271
- '- for stdin).'
272
- ),
273
- ),
274
- ]
275
-
276
- TargetFormatOption = Annotated[
277
- str | None,
278
- typer.Option(
279
- '--target-format',
280
- metavar='FORMAT',
281
- show_default=False,
282
- rich_help_panel='Format overrides',
283
- help=(
284
- 'Payload format when TARGET is - or a non-file connector. File '
285
- 'targets infer format from the extension.'
286
- ),
287
- ),
288
- ]
289
-
290
- TargetInputArg = Annotated[
291
- str,
292
- typer.Argument(
293
- ...,
294
- metavar='TARGET',
295
- help=(
296
- 'Load JSON data from stdin into TARGET. Use --to/--target-type '
297
- 'to override connector inference when needed. Source data must '
298
- 'be piped into stdin.'
299
- ),
300
- ),
301
- ]
302
-
303
- TargetOverrideOption = Annotated[
304
- str | None,
305
- typer.Option(
306
- '--target-type',
307
- metavar='CONNECTOR',
308
- show_default=False,
309
- rich_help_panel='I/O overrides',
310
- help='Override the inferred target type (file, database, api).',
311
- ),
312
- ]
313
-
314
- TargetPathOption = Annotated[
315
- str | None,
316
- typer.Option(
317
- '--target',
318
- metavar='PATH',
319
- help='Target file for transformed or validated output (- for stdout).',
320
- ),
321
- ]
322
-
323
-
324
- # SECTION: DATA CLASSES ===================================================== #
325
-
326
-
327
- @dataclass(slots=True)
328
- class CliState:
329
- """Mutable container for runtime CLI toggles."""
330
-
331
- pretty: bool = True
332
- quiet: bool = False
333
- verbose: bool = False
334
-
335
-
336
- # SECTION: INTERNAL FUNCTIONS =============================================== #
337
-
338
-
339
- def _ensure_state(
340
- ctx: typer.Context,
341
- ) -> CliState:
342
- """
343
- Return the :class:`CliState` stored on the :mod:`typer` context.
344
-
345
- Parameters
346
- ----------
347
- ctx : typer.Context
348
- Typer execution context provided to the command.
349
-
350
- Returns
351
- -------
352
- CliState
353
- Mutable CLI flag container stored on ``ctx``.
354
- """
355
- if not isinstance(getattr(ctx, 'obj', None), CliState):
356
- ctx.obj = CliState()
357
- return ctx.obj
358
-
359
-
360
- def _format_namespace_kwargs(
361
- *,
362
- format_value: str | None,
363
- default: str,
364
- ) -> dict[str, object]:
365
- """
366
- Return common namespace kwargs for format handling.
367
-
368
- Parameters
369
- ----------
370
- format_value : str | None
371
- User-provided format value from the CLI option.
372
- default : str
373
- Default format to use when none is provided.
374
-
375
- Returns
376
- -------
377
- dict[str, object]
378
- Keyword arguments for format-related namespace attributes.
379
- """
380
- return {
381
- 'format': (format_value or default),
382
- '_format_explicit': (format_value is not None),
383
- }
384
-
385
-
386
- def _infer_resource_type(
387
- value: str,
388
- ) -> str:
389
- """
390
- Infer the resource type from a path, URL, or DSN string.
391
-
392
- Parameters
393
- ----------
394
- value : str
395
- Raw CLI argument that represents a source or target.
396
-
397
- Returns
398
- -------
399
- str
400
- One of ``file``, ``database``, or ``api`` based on heuristics.
401
-
402
- Raises
403
- ------
404
- ValueError
405
- If the resource type could not be inferred.
406
- """
407
- val = (value or '').strip()
408
- low = val.lower()
409
-
410
- match (val, low):
411
- case ('-', _):
412
- return 'file'
413
- case (_, inferred) if inferred.startswith(('http://', 'https://')):
414
- return 'api'
415
- case (_, inferred) if inferred.startswith(_DB_SCHEMES):
416
- return 'database'
417
-
418
- path = Path(val)
419
- if path.exists() or path.suffix:
420
- return 'file'
421
-
422
- raise ValueError(
423
- 'Could not infer resource type. Use --from/--to to specify it.',
424
- )
425
-
426
-
427
- def _infer_resource_type_or_exit(
428
- value: str,
429
- ) -> str:
430
- """Infer a resource type and map ``ValueError`` to ``BadParameter``.
431
-
432
- Parameters
433
- ----------
434
- value : str
435
- CLI value describing a source/target.
436
-
437
- Returns
438
- -------
439
- str
440
- Inferred resource type.
441
-
442
- Raises
443
- ------
444
- typer.BadParameter
445
- If heuristics fail to infer a resource type.
446
- """
447
- try:
448
- return _infer_resource_type(value)
449
- except ValueError as exc: # pragma: no cover - exercised indirectly
450
- raise typer.BadParameter(str(exc)) from exc
451
-
452
-
453
- def _infer_resource_type_soft(
454
- value: str | None,
455
- ) -> str | None:
456
- """
457
- Make a best-effort inference that tolerates inline payloads.
458
-
459
- Parameters
460
- ----------
461
- value : str | None
462
- CLI value describing a source/target.
463
-
464
- Returns
465
- -------
466
- str | None
467
- Inferred resource type, or ``None`` if inference failed.
468
- """
469
- if value is None:
470
- return None
471
- try:
472
- return _infer_resource_type(value)
473
- except ValueError:
474
- return None
475
-
476
-
477
- def _log_inferred_resource(
478
- state: CliState,
479
- *,
480
- role: str,
481
- value: str,
482
- resource_type: str | None,
483
- ) -> None:
484
- """
485
- Emit a uniform verbose message for inferred resource types.
486
-
487
- Parameters
488
- ----------
489
- state : CliState
490
- Current CLI state stored on the Typer context.
491
- role : str
492
- Friendly label for the resource (e.g., ``source`` or ``target``).
493
- value : str
494
- Resource value provided on the CLI.
495
- resource_type : str | None
496
- Inferred resource type or ``None`` if not inferred.
497
- """
498
- if not state.verbose or resource_type is None:
499
- return
500
- print(
501
- f'Inferred {role}_type={resource_type} for {role}={value}',
502
- file=sys.stderr,
503
- )
504
-
505
-
506
- def _ns(
507
- **kwargs: object,
508
- ) -> argparse.Namespace:
509
- """Build an :class:`argparse.Namespace` for the legacy handlers.
510
-
511
- Parameters
512
- ----------
513
- **kwargs : object
514
- Attributes applied to the resulting namespace.
515
-
516
- Returns
517
- -------
518
- argparse.Namespace
519
- Namespace compatible with the ``cmd_*`` handler signatures.
520
- """
521
- return argparse.Namespace(**kwargs)
522
-
523
-
524
- def _optional_choice(
525
- value: str | None,
526
- choices: Collection[str],
527
- *,
528
- label: str,
529
- ) -> str | None:
530
- """
531
- Validate optional CLI choice inputs while preserving ``None``.
532
-
533
- Parameters
534
- ----------
535
- value : str | None
536
- Candidate value provided by the CLI option.
537
- choices : Collection[str]
538
- Allowed options for the parameter.
539
- label : str
540
- Friendly label rendered in error messages.
541
-
542
- Returns
543
- -------
544
- str | None
545
- Sanitized choice or ``None`` when the option is omitted.
546
- """
547
- if value is None:
548
- return None
549
- return _validate_choice(value, choices, label=label)
550
-
551
-
552
- def _resolve_resource_type(
553
- *,
554
- explicit_type: str | None,
555
- override_type: str | None,
556
- value: str,
557
- label: str,
558
- conflict_error: str | None = None,
559
- legacy_file_error: str | None = None,
560
- ) -> str:
561
- """
562
- Resolve resource type preference order and validate it.
563
-
564
- Parameters
565
- ----------
566
- explicit_type : str | None
567
- Explicit resource type provided by the user.
568
- override_type : str | None
569
- Resource type provided by an overriding option.
570
- value : str
571
- Resource value to infer type from if no explicit or override type is
572
- given.
573
- label : str
574
- Friendly label for error messages.
575
- conflict_error : str | None
576
- Error message to raise if there is a conflict between explicit and
577
- override types.
578
- legacy_file_error : str | None
579
- Error message to raise if the explicit type is a legacy 'file' type.
580
-
581
- Returns
582
- -------
583
- str
584
- Resolved and validated resource type.
585
-
586
- Raises
587
- ------
588
- typer.BadParameter
589
- If there is a conflict between explicit and override types, or if the
590
- explicit type is a legacy 'file' type.
591
- """
592
- if explicit_type is not None:
593
- if override_type is not None and conflict_error:
594
- raise typer.BadParameter(conflict_error)
595
- if legacy_file_error and explicit_type.strip().lower() == 'file':
596
- raise typer.BadParameter(legacy_file_error)
597
- candidate = explicit_type
598
- else:
599
- candidate = override_type or _infer_resource_type_or_exit(value)
600
- return _validate_choice(candidate, _SOURCE_CHOICES, label=label)
601
-
602
-
603
- def _stateful_namespace(
604
- state: CliState,
605
- *,
606
- command: str,
607
- **kwargs: object,
608
- ) -> argparse.Namespace:
609
- """
610
- Attach CLI state toggles to a handler namespace.
611
-
612
- Parameters
613
- ----------
614
- state : CliState
615
- Current CLI state stored on the Typer context.
616
- command : str
617
- Logical command name (e.g., ``extract``).
618
- **kwargs : object
619
- Additional attributes required by the handler.
620
-
621
- Returns
622
- -------
623
- argparse.Namespace
624
- Namespace compatible with the ``cmd_*`` handler signatures.
625
- """
626
- return _ns(
627
- command=command,
628
- pretty=state.pretty,
629
- quiet=state.quiet,
630
- verbose=state.verbose,
631
- **kwargs,
632
- )
633
-
634
-
635
- def _validate_choice(
636
- value: str,
637
- choices: Collection[str],
638
- *,
639
- label: str,
640
- ) -> str:
641
- """
642
- Validate CLI input against a whitelist of choices.
643
-
644
- Parameters
645
- ----------
646
- value : str
647
- Candidate value from the CLI option or argument.
648
- choices: Collection[str]
649
- Allowed values for the option.
650
- label : str
651
- Friendly label rendered in the validation error message.
652
-
653
- Returns
654
- -------
655
- str
656
- Sanitized and validated value.
657
-
658
- Raises
659
- ------
660
- typer.BadParameter
661
- If ``value`` is not present in ``choices``.
662
- """
663
- v = (value or '').strip()
664
- if v in choices:
665
- return v
666
- allowed = ', '.join(sorted(choices))
667
- raise typer.BadParameter(
668
- f"Invalid {label} '{value}'. Choose from: {allowed}",
669
- )
670
-
671
-
672
- # SECTION: TYPER APP ======================================================== #
673
-
674
-
675
- # Typer application instance (subcommands are registered below).
676
- app = typer.Typer(
677
- name='etlplus',
678
- # help='ETLPlus - A Swiss Army knife for simple ETL operations.',
679
- help=CLI_DESCRIPTION,
680
- epilog=CLI_EPILOG,
681
- add_completion=True,
682
- no_args_is_help=False,
683
- rich_markup_mode='markdown',
684
- )
685
-
686
-
687
- @app.callback(invoke_without_command=True)
688
- def _root(
689
- ctx: typer.Context,
690
- version: bool = typer.Option(
691
- False,
692
- '--version',
693
- '-V',
694
- is_eager=True,
695
- help='Show the version and exit.',
696
- ),
697
- pretty: bool = typer.Option(
698
- True,
699
- '--pretty/--no-pretty',
700
- help='Pretty-print JSON output (default: pretty).',
701
- ),
702
- quiet: bool = typer.Option(
703
- False,
704
- '--quiet',
705
- '-q',
706
- help='Suppress warnings and non-essential output.',
707
- ),
708
- verbose: bool = typer.Option(
709
- False,
710
- '--verbose',
711
- '-v',
712
- help='Emit extra diagnostics to stderr.',
713
- ),
714
- ) -> None:
715
- """
716
- Seed the Typer context with runtime flags and handle root-only options.
717
-
718
- Parameters
719
- ----------
720
- ctx : typer.Context
721
- Typer execution context provided to the command.
722
- version : bool
723
- If True, print the etlplus version and exit.
724
- pretty : bool
725
- Whether to pretty-print JSON output.
726
- quiet : bool
727
- Whether to suppress warnings and non-essential output.
728
- verbose : bool
729
- Whether to emit extra diagnostics to stderr.
730
-
731
- Raises
732
- ------
733
- typer.Exit
734
- If ``--version`` is provided or no subcommand is invoked.
735
- """
736
- ctx.obj = CliState(pretty=pretty, quiet=quiet, verbose=verbose)
737
-
738
- if version:
739
- typer.echo(f'etlplus {__version__}')
740
- raise typer.Exit(0)
741
-
742
- if ctx.invoked_subcommand is None and not ctx.resilient_parsing:
743
- typer.echo(ctx.command.get_help(ctx))
744
- raise typer.Exit(0)
745
-
746
-
747
- @app.command('check')
748
- def check_cmd(
749
- ctx: typer.Context,
750
- config: PipelineConfigOption,
751
- jobs: bool = typer.Option(
752
- False,
753
- '--jobs',
754
- help='List available job names and exit',
755
- ),
756
- pipelines: bool = typer.Option(
757
- False,
758
- '--pipelines',
759
- help='List ETL pipelines',
760
- ),
761
- sources: bool = typer.Option(
762
- False,
763
- '--sources',
764
- help='List data sources',
765
- ),
766
- summary: bool = typer.Option(
767
- False,
768
- '--summary',
769
- help='Show pipeline summary (name, version, sources, targets, jobs)',
770
- ),
771
- targets: bool = typer.Option(
772
- False,
773
- '--targets',
774
- help='List data targets',
775
- ),
776
- transforms: bool = typer.Option(
777
- False,
778
- '--transforms',
779
- help='List data transforms',
780
- ),
781
- ) -> int:
782
- """
783
- Print ETL entities from a pipeline YAML configuration.
784
-
785
- Parameters
786
- ----------
787
- ctx : typer.Context
788
- Typer execution context provided to the command.
789
- config : PipelineConfigOption
790
- Path to pipeline YAML configuration file.
791
- jobs : bool, optional
792
- If True, list available job names and exit.
793
- pipelines : bool, optional
794
- If True, list ETL pipelines.
795
- sources : bool, optional
796
- If True, list data sources.
797
- summary : bool, optional
798
- If True, show pipeline summary (name, version, sources, targets, jobs).
799
- targets : bool, optional
800
- If True, list data targets.
801
- transforms : bool, optional
802
- If True, list data transforms.
803
-
804
- Returns
805
- -------
806
- int
807
- Zero on success.
808
- """
809
- state = _ensure_state(ctx)
810
- ns = _stateful_namespace(
811
- state,
812
- command='check',
813
- config=config,
814
- summary=summary,
815
- pipelines=pipelines,
816
- jobs=jobs,
817
- sources=sources,
818
- targets=targets,
819
- transforms=transforms,
820
- )
821
- return int(check_handler(ns))
822
-
823
-
824
- @app.command('extract')
825
- def extract_cmd(
826
- ctx: typer.Context,
827
- source: SourceInputArg,
828
- source_format: SourceFormatOption | None = None,
829
- source_type: SourceOverrideOption | None = None,
830
- ) -> int:
831
- """
832
- Extract data from files, databases, or REST APIs.
833
-
834
- Parameters
835
- ----------
836
- ctx : typer.Context
837
- Typer execution context provided to the command.
838
- source : SourceInputArg
839
- Data source (file path, URL, DSN, or ``-`` for stdin).
840
- source_format : SourceFormatOption | None, optional
841
- Payload format when not a file.
842
- source_type : SourceOverrideOption | None, optional
843
- Override the inferred source type.
844
-
845
- Returns
846
- -------
847
- int
848
- Zero on success.
849
-
850
- Examples
851
- --------
852
- - Extract from a file (type inferred):
853
- etlplus extract in.csv
854
- - Extract from a file (explicit via flag):
855
- etlplus extract --from file in.csv
856
- - Extract from an API:
857
- etlplus extract https://example.com/data.json
858
- etlplus extract --from api https://example.com/data.json
859
- - Extract from a database DSN:
860
- etlplus extract --from database postgresql://user:pass@host/db
861
- - Pipe into transform/load:
862
- etlplus extract in.csv \
863
- | etlplus transform --operations '{"select":["a"]}'
864
-
865
- Notes
866
- -----
867
- - The ``extract`` command always writes JSON to stdout.
868
- - CSV output is unsupported for this command.
869
- - Use shell redirection (``>``) or pipelines to persist the output.
870
- """
871
- state = _ensure_state(ctx)
872
-
873
- source_type = _optional_choice(
874
- source_type,
875
- _SOURCE_CHOICES,
876
- label='source_type',
877
- )
878
- source_format = _optional_choice(
879
- source_format,
880
- _FORMAT_CHOICES,
881
- label='source_format',
882
- )
883
-
884
- resolved_source = source
885
- resolved_source_type = source_type or _infer_resource_type_or_exit(
886
- resolved_source,
887
- )
888
-
889
- _log_inferred_resource(
890
- state,
891
- role='source',
892
- value=resolved_source,
893
- resource_type=resolved_source_type,
894
- )
895
-
896
- format_kwargs = _format_namespace_kwargs(
897
- format_value=source_format,
898
- default='json',
899
- )
900
- ns = _stateful_namespace(
901
- state,
902
- command='extract',
903
- source_type=resolved_source_type,
904
- source=resolved_source,
905
- **format_kwargs,
906
- )
907
- return int(extract_handler(ns))
908
-
909
-
910
- @app.command('load')
911
- def load_cmd(
912
- ctx: typer.Context,
913
- target: TargetInputArg,
914
- source_format: StdinFormatOption | None = None,
915
- target_format: TargetFormatOption | None = None,
916
- target_type: TargetOverrideOption | None = None,
917
- ) -> int:
918
- """
919
- Load data into a file, database, or REST API.
920
-
921
- Parameters
922
- ----------
923
- ctx : typer.Context
924
- Typer execution context provided to the command.
925
- target : TargetInputArg
926
- Load destination (file path, URL/DSN, or ``-`` for stdout).
927
- source_format : StdinFormatOption | None, optional
928
- Hint for parsing stdin payloads (json or csv).
929
- target_format : TargetFormatOption | None, optional
930
- Payload format when not a file target (or when TARGET is ``-``).
931
- target_type : TargetOverrideOption | None, optional
932
- Override the inferred target type.
933
-
934
- Returns
935
- -------
936
- int
937
- Zero on success.
938
-
939
- Examples
940
- --------
941
- - Pipe into a file:
942
- etlplus extract in.csv \
943
- | etlplus transform --operations '{"select":["a"]}' \
944
- | etlplus load --to file out.json
945
- - Read from stdin and write to a file:
946
- etlplus load out.json
947
- - Write to stdout:
948
- etlplus load --to file -
949
-
950
- Notes
951
- -----
952
- - The ``load`` command reads JSON from stdin.
953
- - CSV input is unsupported unless ``--source-format csv`` is provided.
954
- - Convert upstream before piping into ``load`` when working with other
955
- formats.
956
- """
957
- state = _ensure_state(ctx)
958
-
959
- source_format = _optional_choice(
960
- source_format,
961
- _FORMAT_CHOICES,
962
- label='source_format',
963
- )
964
- target_type = _optional_choice(
965
- target_type,
966
- _SOURCE_CHOICES,
967
- label='target_type',
968
- )
969
- target_format = _optional_choice(
970
- target_format,
971
- _FORMAT_CHOICES,
972
- label='target_format',
973
- )
974
-
975
- resolved_target = target
976
- resolved_target_type = target_type or _infer_resource_type_or_exit(
977
- resolved_target,
978
- )
979
-
980
- resolved_source_value = '-'
981
- resolved_source_type = _infer_resource_type_soft(resolved_source_value)
982
-
983
- _log_inferred_resource(
984
- state,
985
- role='source',
986
- value=resolved_source_value,
987
- resource_type=resolved_source_type,
988
- )
989
- _log_inferred_resource(
990
- state,
991
- role='target',
992
- value=resolved_target,
993
- resource_type=resolved_target_type,
994
- )
995
-
996
- format_kwargs = _format_namespace_kwargs(
997
- format_value=target_format,
998
- default='json',
999
- )
1000
- ns = _stateful_namespace(
1001
- state,
1002
- command='load',
1003
- source=resolved_source_value,
1004
- source_format=source_format,
1005
- target_type=resolved_target_type,
1006
- target=resolved_target,
1007
- **format_kwargs,
1008
- )
1009
- return int(load_handler(ns))
1010
-
1011
-
1012
- @app.command('render')
1013
- def render_cmd(
1014
- ctx: typer.Context,
1015
- config: RenderConfigOption = None,
1016
- spec: RenderSpecOption = None,
1017
- table: RenderTableOption = None,
1018
- template: RenderTemplateOption = 'ddl',
1019
- template_path: RenderTemplatePathOption = None,
1020
- output: RenderOutputOption = None,
1021
- ) -> int:
1022
- """
1023
- Render SQL DDL from table schemas defined in YAML/JSON configs.
1024
-
1025
- Parameters
1026
- ----------
1027
- ctx : typer.Context
1028
- Typer execution context provided to the command.
1029
- config : RenderConfigOption, optional
1030
- Pipeline YAML containing ``table_schemas`` entries.
1031
- spec : RenderSpecOption, optional
1032
- Standalone table spec file (.yml/.yaml/.json).
1033
- table : RenderTableOption, optional
1034
- Filter to a single table name within the available specs.
1035
- template : RenderTemplateOption, optional
1036
- Built-in template key or template file path.
1037
- template_path : RenderTemplatePathOption, optional
1038
- Explicit template file path to render with.
1039
- output : RenderOutputOption, optional
1040
- Path to write SQL to (stdout when omitted).
1041
-
1042
- Returns
1043
- -------
1044
- int
1045
- Zero on success.
1046
- """
1047
- state = _ensure_state(ctx)
1048
- ns = _stateful_namespace(
1049
- state,
1050
- command='render',
1051
- config=config,
1052
- spec=spec,
1053
- table=table,
1054
- template=template,
1055
- template_path=template_path,
1056
- output=output,
1057
- )
1058
- return int(render_handler(ns))
1059
-
1060
-
1061
- @app.command('run')
1062
- def run_cmd(
1063
- ctx: typer.Context,
1064
- config: PipelineConfigOption,
1065
- job: str | None = typer.Option(
1066
- None,
1067
- '-j',
1068
- '--job',
1069
- help='Name of the job to run',
1070
- ),
1071
- pipeline: str | None = typer.Option(
1072
- None,
1073
- '-p',
1074
- '--pipeline',
1075
- help='Name of the pipeline to run',
1076
- ),
1077
- ) -> int:
1078
- """
1079
- Execute an ETL job or pipeline from a YAML configuration.
1080
-
1081
- Parameters
1082
- ----------
1083
- ctx : typer.Context
1084
- Typer execution context provided to the command.
1085
- config : PipelineConfigOption
1086
- Path to pipeline YAML configuration file.
1087
- job : str | None, optional
1088
- Name of the job to run.
1089
- pipeline : str | None, optional
1090
- Name of the pipeline to run.
1091
-
1092
- Returns
1093
- -------
1094
- int
1095
- Zero on success.
1096
- """
1097
- state = _ensure_state(ctx)
1098
- ns = _stateful_namespace(
1099
- state,
1100
- command='run',
1101
- config=config,
1102
- job=job,
1103
- pipeline=pipeline,
1104
- )
1105
- return int(run_handler(ns))
1106
-
1107
-
1108
- @app.command('transform')
1109
- def transform_cmd(
1110
- ctx: typer.Context,
1111
- operations: OperationsJSONOption = '{}',
1112
- source: StreamingSourceArg = '-',
1113
- source_format: SourceFormatOption | None = None,
1114
- source_type: SourceOverrideOption | None = None,
1115
- target: TargetPathOption | None = None,
1116
- target_format: TargetFormatOption | None = None,
1117
- target_type: TargetOverrideOption | None = None,
1118
- ) -> int:
1119
- """
1120
- Transform records using JSON-described operations.
1121
-
1122
- Parameters
1123
- ----------
1124
- ctx : typer.Context
1125
- Typer execution context provided to the command.
1126
- operations : OperationsJSONOption, optional
1127
- Transformation operations as a JSON string.
1128
- source : StreamingSourceArg, optional
1129
- Data source (file path or ``-`` for stdin).
1130
- source_format : SourceFormatOption | None, optional
1131
- Input payload format when not a file (or when SOURCE is -).
1132
- source_type : SourceOverrideOption | None, optional
1133
- Override the inferred source type.
1134
- target : TargetPathOption | None, optional
1135
- Optional output path. Use ``-`` for stdout.
1136
- target_format : TargetFormatOption | None, optional
1137
- Output payload format when not a file target (or when OUTPUT is -).
1138
- Accepts ``--target-format``.
1139
- target_type : TargetOverrideOption | None, optional
1140
- Override the inferred target type.
1141
-
1142
- Returns
1143
- -------
1144
- int
1145
- Zero on success.
1146
-
1147
- Examples
1148
- --------
1149
- - Transform data from a file and write to another file:
1150
- etlplus transform --from file in.json \
1151
- --operations '{"select": ["id", "name"]}' \
1152
- --to file out.json
1153
- - Transform data from stdin and write to stdout:
1154
- cat in.json \
1155
- | etlplus transform \
1156
- --operations '{"filter": {"field": "age", "gt": 30}}'
1157
- - Transform data from a file and write to stdout:
1158
- etlplus transform --from file in.csv \
1159
- --source-format csv \
1160
- --operations '{"select": ["id", "email"]}'
1161
- - Transform data from stdin and write to a file:
1162
- cat in.json \
1163
- | etlplus transform --operations '{"sort": ["-created_at"]}' \
1164
- --to file out.json
1165
-
1166
- Notes
1167
- -----
1168
- - The ``transform`` command reads JSON from stdin when SOURCE is ``-``.
1169
- - CSV input is unsupported for this command.
1170
- - Convert upstream before piping into ``transform``.
1171
- """
1172
- state = _ensure_state(ctx)
1173
-
1174
- source_format = _optional_choice(
1175
- source_format,
1176
- _FORMAT_CHOICES,
1177
- label='source_format',
1178
- )
1179
- source_type = _optional_choice(
1180
- source_type,
1181
- _SOURCE_CHOICES,
1182
- label='source_type',
1183
- )
1184
- target_format = _optional_choice(
1185
- target_format,
1186
- _FORMAT_CHOICES,
1187
- label='target_format',
1188
- )
1189
- target_format_kwargs = _format_namespace_kwargs(
1190
- format_value=target_format,
1191
- default='json',
1192
- )
1193
- target_type = _optional_choice(
1194
- target_type,
1195
- _SOURCE_CHOICES,
1196
- label='target_type',
1197
- )
1198
-
1199
- resolved_source_type = source_type or _infer_resource_type_soft(source)
1200
- resolved_source_value = source if source is not None else '-'
1201
- resolved_target_value = target if target is not None else '-'
1202
-
1203
- if resolved_source_type is not None:
1204
- resolved_source_type = _validate_choice(
1205
- resolved_source_type,
1206
- _SOURCE_CHOICES,
1207
- label='source_type',
1208
- )
1209
-
1210
- resolved_target_type = _resolve_resource_type(
1211
- explicit_type=None,
1212
- override_type=target_type,
1213
- value=resolved_target_value,
1214
- label='target_type',
1215
- )
1216
-
1217
- _log_inferred_resource(
1218
- state,
1219
- role='source',
1220
- value=resolved_source_value,
1221
- resource_type=resolved_source_type,
1222
- )
1223
- _log_inferred_resource(
1224
- state,
1225
- role='target',
1226
- value=resolved_target_value,
1227
- resource_type=resolved_target_type,
1228
- )
1229
-
1230
- ns = _stateful_namespace(
1231
- state,
1232
- command='transform',
1233
- source=resolved_source_value,
1234
- source_type=resolved_source_type,
1235
- operations=json_type(operations),
1236
- target=resolved_target_value,
1237
- source_format=source_format,
1238
- target_type=resolved_target_type,
1239
- target_format=target_format_kwargs['format'],
1240
- **target_format_kwargs,
1241
- )
1242
- return int(transform_handler(ns))
1243
-
1244
-
1245
- @app.command('validate')
1246
- def validate_cmd(
1247
- ctx: typer.Context,
1248
- rules: RulesJSONOption = '{}',
1249
- source: StreamingSourceArg = '-',
1250
- source_format: SourceFormatOption | None = None,
1251
- source_type: SourceOverrideOption | None = None,
1252
- target: TargetPathOption | None = None,
1253
- ) -> int:
1254
- """
1255
- Validate data against JSON-described rules.
1256
-
1257
- Parameters
1258
- ----------
1259
- ctx : typer.Context
1260
- Typer execution context provided to the command.
1261
- rules : RulesJSONOption, optional
1262
- Validation rules as a JSON string.
1263
- source : StreamingSourceArg, optional
1264
- Data source (file path or ``-`` for stdin).
1265
- source_format : SourceFormatOption | None, optional
1266
- Optional stdin format hint (JSON or CSV) when SOURCE is ``-``.
1267
- source_type : SourceOverrideOption | None, optional
1268
- Override the inferred source type when heuristics fail.
1269
- target : TargetPathOption | None, optional
1270
- Optional output path. Use ``-`` for stdout.
1271
-
1272
- Returns
1273
- -------
1274
- int
1275
- Zero on success.
1276
- """
1277
- source_format = _optional_choice(
1278
- source_format,
1279
- _FORMAT_CHOICES,
1280
- label='source_format',
1281
- )
1282
- source_type = _optional_choice(
1283
- source_type,
1284
- _SOURCE_CHOICES,
1285
- label='source_type',
1286
- )
1287
- source_format_kwargs = _format_namespace_kwargs(
1288
- format_value=source_format,
1289
- default='json',
1290
- )
1291
-
1292
- state = _ensure_state(ctx)
1293
- resolved_source_type = source_type or _infer_resource_type_soft(source)
1294
-
1295
- _log_inferred_resource(
1296
- state,
1297
- role='source',
1298
- value=source,
1299
- resource_type=resolved_source_type,
1300
- )
1301
-
1302
- ns = _stateful_namespace(
1303
- state,
1304
- command='validate',
1305
- source=source,
1306
- source_type=resolved_source_type,
1307
- rules=json_type(rules), # convert CLI string to dict
1308
- target=target,
1309
- source_format=source_format,
1310
- **source_format_kwargs,
1311
- )
1312
- return int(validate_handler(ns))