etlplus 0.4.1__py3-none-any.whl → 0.4.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
etlplus/cli/app.py CHANGED
@@ -28,12 +28,18 @@ Subcommands
28
28
 
29
29
  Notes
30
30
  -----
31
- - Use ``-`` to read from stdin and ``--output -`` (or ``load ... file -``) to
32
- write to stdout.
33
- - ``extract`` supports ``--from`` and ``load`` supports ``--to`` to override
34
- inferred resource types.
31
+ - Use ``-`` to read from stdin or to write to stdout.
32
+ - Commands ``extract`` and ``transform`` support the command-line option
33
+ ``--from`` to override inferred resource types.
34
+ - Commands ``transform`` and ``load`` support the command-line option ``--to``
35
+ to override inferred resource types.
35
36
  """
36
37
 
38
+ # Pylint struggles with large CLI surfaces that legitimately require
39
+ # numerous arguments in a single module.
40
+ # pylint: disable=too-many-lines
41
+ # pylint: disable=too-many-arguments,too-many-positional-arguments
42
+
37
43
  from __future__ import annotations
38
44
 
39
45
  import argparse
@@ -41,6 +47,7 @@ import sys
41
47
  from collections.abc import Collection
42
48
  from dataclasses import dataclass
43
49
  from pathlib import Path
50
+ from typing import Annotated
44
51
  from typing import Final
45
52
 
46
53
  import typer
@@ -66,6 +73,19 @@ __all__ = [
66
73
  ]
67
74
 
68
75
 
76
+ # SECTION: INTERNAL CONSTANTS =============================================== #
77
+
78
+
79
+ _DB_SCHEMES = (
80
+ 'postgres://',
81
+ 'postgresql://',
82
+ 'mysql://',
83
+ )
84
+
85
+ _SOURCE_CHOICES: Final[frozenset[str]] = frozenset(DataConnectorType.choices())
86
+ _FORMAT_CHOICES: Final[frozenset[str]] = frozenset(FileFormat.choices())
87
+
88
+
69
89
  # SECTION: CONSTANTS ======================================================== #
70
90
 
71
91
 
@@ -75,67 +95,168 @@ CLI_DESCRIPTION: Final[str] = '\n'.join(
75
95
  '',
76
96
  ' Provide a subcommand and options. Examples:',
77
97
  '',
78
- ' etlplus extract file in.csv -o out.json',
98
+ ' etlplus extract in.csv > out.json',
79
99
  ' etlplus validate in.json --rules \'{"required": ["id"]}\'',
80
- ' etlplus transform in.json --operations \'{"select": ["id"]}\'',
81
- ' etlplus load in.json file out.json',
100
+ (
101
+ ' etlplus transform --from file in.json '
102
+ '--operations \'{"select": ["id"]}\' --to file -o out.json'
103
+ ),
104
+ ' etlplus extract in.csv | etlplus load --to file out.json',
105
+ ' cat data.json | etlplus load --to api https://example.com/data',
82
106
  '',
83
- ' Enforce error if --format is provided for files. Examples:',
107
+ ' Override format inference when extensions are misleading:',
84
108
  '',
85
- ' etlplus extract file in.csv --format csv --strict-format',
86
- ' etlplus load in.json file out.csv --format csv --strict-format',
109
+ ' etlplus extract data.txt --source-format csv',
110
+ ' etlplus load payload.bin --target-format json',
87
111
  ],
88
112
  )
89
113
 
90
114
  CLI_EPILOG: Final[str] = '\n'.join(
91
115
  [
92
- 'Environment:',
93
- (
94
- ' ETLPLUS_FORMAT_BEHAVIOR controls behavior when '
95
- '--format is provided for files.'
96
- ),
97
- ' Values:',
98
- ' - error|fail|strict: treat as error',
99
- ' - warn (default): print a warning',
100
- ' - ignore|silent: no message',
101
- '',
102
- 'Note:',
103
- ' --strict-format overrides the environment behavior.',
116
+ 'Tip:',
117
+ ' --source-format and --target-format override format inference '
118
+ 'based on filename extensions when needed.',
104
119
  ],
105
120
  )
106
121
 
107
122
  PROJECT_URL: Final[str] = 'https://github.com/Dagitali/ETLPlus'
108
123
 
109
- EXTRACT_ARGS = typer.Argument(
110
- ...,
111
- metavar='[SOURCE_TYPE] SOURCE',
112
- help=(
113
- 'Extract from a SOURCE. You may provide SOURCE_TYPE explicitly as '
114
- 'the first positional argument, or omit it and use --from or let '
115
- 'etlplus infer it from the SOURCE.'
124
+
125
+ # SECTION: TYPE ALIASES ==================================================== #
126
+
127
+
128
+ SourceInputArg = Annotated[
129
+ str,
130
+ typer.Argument(
131
+ ...,
132
+ metavar='SOURCE',
133
+ help=(
134
+ 'Extract from SOURCE. Use --from/--source-type to override the '
135
+ 'inferred connector when needed.'
136
+ ),
116
137
  ),
117
- )
118
- LOAD_ARGS = typer.Argument(
119
- ...,
120
- metavar='[SOURCE] [TARGET_TYPE] TARGET',
121
- help=(
122
- 'Load SOURCE into a target. SOURCE defaults to - (stdin). You may '
123
- 'provide legacy positional form: SOURCE TARGET_TYPE TARGET.'
138
+ ]
139
+
140
+ StreamingSourceArg = Annotated[
141
+ str,
142
+ typer.Argument(
143
+ ...,
144
+ metavar='SOURCE',
145
+ help=(
146
+ 'Data source to transform or validate (path, JSON payload, or '
147
+ '- for stdin).'
148
+ ),
124
149
  ),
125
- )
150
+ ]
126
151
 
152
+ TargetInputArg = Annotated[
153
+ str,
154
+ typer.Argument(
155
+ ...,
156
+ metavar='TARGET',
157
+ help=(
158
+ 'Load JSON data from stdin into TARGET. Use --to/--target-type '
159
+ 'to override connector inference when needed. Source data must '
160
+ 'be piped into stdin.'
161
+ ),
162
+ ),
163
+ ]
127
164
 
128
- # SECTION: INTERNAL CONSTANTS =============================================== #
165
+ SourceOverrideOption = Annotated[
166
+ str | None,
167
+ typer.Option(
168
+ '--source-type',
169
+ metavar='CONNECTOR',
170
+ show_default=False,
171
+ rich_help_panel='I/O overrides',
172
+ help='Override the inferred source type (file, database, api).',
173
+ ),
174
+ ]
129
175
 
176
+ TargetOverrideOption = Annotated[
177
+ str | None,
178
+ typer.Option(
179
+ '--target-type',
180
+ metavar='CONNECTOR',
181
+ show_default=False,
182
+ rich_help_panel='I/O overrides',
183
+ help='Override the inferred target type (file, database, api).',
184
+ ),
185
+ ]
130
186
 
131
- _DB_SCHEMES = (
132
- 'postgres://',
133
- 'postgresql://',
134
- 'mysql://',
135
- )
187
+ SourceFormatOption = Annotated[
188
+ str | None,
189
+ typer.Option(
190
+ '--source-format',
191
+ metavar='FORMAT',
192
+ show_default=False,
193
+ rich_help_panel='Format overrides',
194
+ help=(
195
+ 'Input payload format when SOURCE is - or an inline payload. '
196
+ 'File sources infer format from the extension.'
197
+ ),
198
+ ),
199
+ ]
136
200
 
137
- _SOURCE_CHOICES: Final[frozenset[str]] = frozenset(DataConnectorType.choices())
138
- _FORMAT_CHOICES: Final[frozenset[str]] = frozenset(FileFormat.choices())
201
+ StdinFormatOption = Annotated[
202
+ str | None,
203
+ typer.Option(
204
+ '--source-format',
205
+ metavar='FORMAT',
206
+ show_default=False,
207
+ rich_help_panel='Format overrides',
208
+ help='Input payload format when reading from stdin (default: json).',
209
+ ),
210
+ ]
211
+
212
+ TargetFormatOption = Annotated[
213
+ str | None,
214
+ typer.Option(
215
+ '--target-format',
216
+ metavar='FORMAT',
217
+ show_default=False,
218
+ rich_help_panel='Format overrides',
219
+ help=(
220
+ 'Payload format when TARGET is - or a non-file connector. File '
221
+ 'targets infer format from the extension.'
222
+ ),
223
+ ),
224
+ ]
225
+
226
+ OperationsJSONOption = Annotated[
227
+ str,
228
+ typer.Option(
229
+ '--operations',
230
+ help='Transformation operations as JSON string.',
231
+ ),
232
+ ]
233
+
234
+ RulesJSONOption = Annotated[
235
+ str,
236
+ typer.Option(
237
+ '--rules',
238
+ help='Validation rules as JSON string.',
239
+ ),
240
+ ]
241
+
242
+ TargetPathOption = Annotated[
243
+ str | None,
244
+ typer.Option(
245
+ '--target',
246
+ metavar='PATH',
247
+ help='Target file for transformed or validated output (- for stdout).',
248
+ ),
249
+ ]
250
+
251
+ PipelineConfigOption = Annotated[
252
+ str,
253
+ typer.Option(
254
+ ...,
255
+ '--config',
256
+ metavar='PATH',
257
+ help='Path to pipeline YAML configuration file.',
258
+ ),
259
+ ]
139
260
 
140
261
 
141
262
  # SECTION: DATA CLASSES ===================================================== #
@@ -174,6 +295,32 @@ def _ensure_state(
174
295
  return ctx.obj
175
296
 
176
297
 
298
+ def _format_namespace_kwargs(
299
+ *,
300
+ format_value: str | None,
301
+ default: str,
302
+ ) -> dict[str, object]:
303
+ """
304
+ Return common namespace kwargs for format handling.
305
+
306
+ Parameters
307
+ ----------
308
+ format_value : str | None
309
+ User-provided format value from the CLI option.
310
+ default : str
311
+ Default format to use when none is provided.
312
+
313
+ Returns
314
+ -------
315
+ dict[str, object]
316
+ Keyword arguments for format-related namespace attributes.
317
+ """
318
+ return {
319
+ 'format': (format_value or default),
320
+ '_format_explicit': (format_value is not None),
321
+ }
322
+
323
+
177
324
  def _infer_resource_type(
178
325
  value: str,
179
326
  ) -> str:
@@ -198,12 +345,13 @@ def _infer_resource_type(
198
345
  val = (value or '').strip()
199
346
  low = val.lower()
200
347
 
201
- if val == '-':
202
- return 'file'
203
- if low.startswith(('http://', 'https://')):
204
- return 'api'
205
- if low.startswith(_DB_SCHEMES):
206
- return 'database'
348
+ match (val, low):
349
+ case ('-', _):
350
+ return 'file'
351
+ case (_, inferred) if inferred.startswith(('http://', 'https://')):
352
+ return 'api'
353
+ case (_, inferred) if inferred.startswith(_DB_SCHEMES):
354
+ return 'database'
207
355
 
208
356
  path = Path(val)
209
357
  if path.exists() or path.suffix:
@@ -240,6 +388,59 @@ def _infer_resource_type_or_exit(
240
388
  raise typer.BadParameter(str(exc)) from exc
241
389
 
242
390
 
391
+ def _infer_resource_type_soft(
392
+ value: str | None,
393
+ ) -> str | None:
394
+ """
395
+ Make a best-effort inference that tolerates inline payloads.
396
+
397
+ Parameters
398
+ ----------
399
+ value : str | None
400
+ CLI value describing a source/target.
401
+
402
+ Returns
403
+ -------
404
+ str | None
405
+ Inferred resource type, or ``None`` if inference failed.
406
+ """
407
+ if value is None:
408
+ return None
409
+ try:
410
+ return _infer_resource_type(value)
411
+ except ValueError:
412
+ return None
413
+
414
+
415
+ def _log_inferred_resource(
416
+ state: CliState,
417
+ *,
418
+ role: str,
419
+ value: str,
420
+ resource_type: str | None,
421
+ ) -> None:
422
+ """
423
+ Emit a uniform verbose message for inferred resource types.
424
+
425
+ Parameters
426
+ ----------
427
+ state : CliState
428
+ Current CLI state stored on the Typer context.
429
+ role : str
430
+ Friendly label for the resource (e.g., ``source`` or ``target``).
431
+ value : str
432
+ Resource value provided on the CLI.
433
+ resource_type : str | None
434
+ Inferred resource type or ``None`` if not inferred.
435
+ """
436
+ if not state.verbose or resource_type is None:
437
+ return
438
+ print(
439
+ f'Inferred {role}_type={resource_type} for {role}={value}',
440
+ file=sys.stderr,
441
+ )
442
+
443
+
243
444
  def _ns(
244
445
  **kwargs: object,
245
446
  ) -> argparse.Namespace:
@@ -286,13 +487,65 @@ def _optional_choice(
286
487
  return _validate_choice(value, choices, label=label)
287
488
 
288
489
 
490
+ def _resolve_resource_type(
491
+ *,
492
+ explicit_type: str | None,
493
+ override_type: str | None,
494
+ value: str,
495
+ label: str,
496
+ conflict_error: str | None = None,
497
+ legacy_file_error: str | None = None,
498
+ ) -> str:
499
+ """
500
+ Resolve resource type preference order and validate it.
501
+
502
+ Parameters
503
+ ----------
504
+ explicit_type : str | None
505
+ Explicit resource type provided by the user.
506
+ override_type : str | None
507
+ Resource type provided by an overriding option.
508
+ value : str
509
+ Resource value to infer type from if no explicit or override type is
510
+ given.
511
+ label : str
512
+ Friendly label for error messages.
513
+ conflict_error : str | None
514
+ Error message to raise if there is a conflict between explicit and
515
+ override types.
516
+ legacy_file_error : str | None
517
+ Error message to raise if the explicit type is a legacy 'file' type.
518
+
519
+ Returns
520
+ -------
521
+ str
522
+ Resolved and validated resource type.
523
+
524
+ Raises
525
+ ------
526
+ typer.BadParameter
527
+ If there is a conflict between explicit and override types, or if the
528
+ explicit type is a legacy 'file' type.
529
+ """
530
+ if explicit_type is not None:
531
+ if override_type is not None and conflict_error:
532
+ raise typer.BadParameter(conflict_error)
533
+ if legacy_file_error and explicit_type.strip().lower() == 'file':
534
+ raise typer.BadParameter(legacy_file_error)
535
+ candidate = explicit_type
536
+ else:
537
+ candidate = override_type or _infer_resource_type_or_exit(value)
538
+ return _validate_choice(candidate, _SOURCE_CHOICES, label=label)
539
+
540
+
289
541
  def _stateful_namespace(
290
542
  state: CliState,
291
543
  *,
292
544
  command: str,
293
545
  **kwargs: object,
294
546
  ) -> argparse.Namespace:
295
- """Attach CLI state toggles to a handler namespace.
547
+ """
548
+ Attach CLI state toggles to a handler namespace.
296
549
 
297
550
  Parameters
298
551
  ----------
@@ -432,34 +685,9 @@ def _root(
432
685
  @app.command('extract')
433
686
  def extract_cmd(
434
687
  ctx: typer.Context,
435
- args: list[str] = EXTRACT_ARGS,
436
- from_: str | None = typer.Option(
437
- None,
438
- '--from',
439
- help='Override the inferred source type (file, database, api).',
440
- ),
441
- output: str | None = typer.Option(
442
- None,
443
- '-o',
444
- '--output',
445
- help='Output file to save extracted data (JSON). Use - for stdout.',
446
- ),
447
- strict_format: bool = typer.Option(
448
- False,
449
- '--strict-format',
450
- help=(
451
- 'Treat providing --format for file sources as an error '
452
- '(overrides environment behavior)'
453
- ),
454
- ),
455
- source_format: str | None = typer.Option(
456
- None,
457
- '--format',
458
- help=(
459
- 'Payload format when not a file (or when SOURCE is -). '
460
- 'For normal file paths, format is inferred from extension.'
461
- ),
462
- ),
688
+ source: SourceInputArg,
689
+ source_format: SourceFormatOption | None = None,
690
+ source_type: SourceOverrideOption | None = None,
463
691
  ) -> int:
464
692
  """
465
693
  Extract data from files, databases, or REST APIs.
@@ -468,267 +696,146 @@ def extract_cmd(
468
696
  ----------
469
697
  ctx : typer.Context
470
698
  Typer execution context provided to the command.
471
- args : list[str]
472
- Positional arguments: either SOURCE, or SOURCE_TYPE SOURCE.
473
- from_ : str | None
474
- Override the inferred source type.
475
- output : str | None
476
- Output file to save extracted data.
477
- strict_format : bool
478
- Whether to enforce strict format behavior.
479
- source_format : str | None
699
+ source : SourceInputArg
700
+ Data source (file path, URL, DSN, or ``-`` for stdin).
701
+ source_format : SourceFormatOption | None, optional
480
702
  Payload format when not a file.
703
+ source_type : SourceOverrideOption | None, optional
704
+ Override the inferred source type.
481
705
 
482
706
  Returns
483
707
  -------
484
708
  int
485
709
  Zero on success.
486
710
 
487
- Raises
488
- ------
489
- typer.BadParameter
490
- If invalid parameters are provided.
491
-
492
711
  Examples
493
712
  --------
494
713
  - Extract from a file (type inferred):
495
714
  etlplus extract in.csv
496
-
497
- - Extract from a file (explicit):
498
- etlplus extract file in.csv
715
+ - Extract from a file (explicit via flag):
499
716
  etlplus extract --from file in.csv
500
-
501
717
  - Extract from an API:
502
718
  etlplus extract https://example.com/data.json
503
719
  etlplus extract --from api https://example.com/data.json
504
-
505
720
  - Extract from a database DSN:
506
721
  etlplus extract --from database postgresql://user:pass@host/db
507
-
508
722
  - Pipe into transform/load:
509
723
  etlplus extract in.csv \
510
724
  | etlplus transform --operations '{"select":["a"]}'
725
+
726
+ Notes
727
+ -----
728
+ - The ``extract`` command always writes JSON to stdout.
729
+ - CSV output is unsupported for this command.
730
+ - Use shell redirection (``>``) or pipelines to persist the output.
511
731
  """
512
732
  state = _ensure_state(ctx)
513
733
 
514
- if len(args) > 2:
515
- raise typer.BadParameter('Provide SOURCE, or SOURCE_TYPE SOURCE.')
516
-
517
- from_ = _optional_choice(from_, _SOURCE_CHOICES, label='from')
734
+ source_type = _optional_choice(
735
+ source_type,
736
+ _SOURCE_CHOICES,
737
+ label='source_type',
738
+ )
518
739
  source_format = _optional_choice(
519
740
  source_format,
520
741
  _FORMAT_CHOICES,
521
- label='format',
742
+ label='source_format',
522
743
  )
523
744
 
524
- if len(args) == 2:
525
- if from_ is not None:
526
- raise typer.BadParameter(
527
- 'Do not combine --from with an explicit SOURCE_TYPE.',
528
- )
529
- source_type = _validate_choice(
530
- args[0],
531
- _SOURCE_CHOICES,
532
- label='source_type',
533
- )
534
- source = args[1]
535
- else:
536
- source = args[0]
537
- if from_ is not None:
538
- source_type = from_
539
- else:
540
- source_type = _infer_resource_type_or_exit(source)
541
-
542
- source_type = _validate_choice(
543
- source_type,
544
- _SOURCE_CHOICES,
545
- label='source_type',
546
- )
547
-
548
- if state.verbose:
549
- print(
550
- f'Inferred source_type={source_type} for source={source}',
551
- file=sys.stderr,
552
- )
745
+ resolved_source = source
746
+ resolved_source_type = source_type or _infer_resource_type_or_exit(
747
+ resolved_source,
748
+ )
553
749
 
554
- ns = _stateful_namespace(
750
+ _log_inferred_resource(
555
751
  state,
556
- command='extract',
557
- source_type=source_type,
558
- source=source,
559
- output=output,
560
- strict_format=strict_format,
561
- format=(source_format or 'json'),
562
- _format_explicit=(source_format is not None),
752
+ role='source',
753
+ value=resolved_source,
754
+ resource_type=resolved_source_type,
563
755
  )
564
- return int(cmd_extract(ns))
565
756
 
566
-
567
- @app.command('validate')
568
- def validate_cmd(
569
- ctx: typer.Context,
570
- source: str = typer.Argument(
571
- '-',
572
- metavar='SOURCE',
573
- help=(
574
- 'Data source to validate (file path, JSON string, or - for stdin).'
575
- ),
576
- ),
577
- rules: str = typer.Option(
578
- '{}',
579
- '--rules',
580
- help='Validation rules as JSON string',
581
- ),
582
- output: str | None = typer.Option(
583
- None,
584
- '-o',
585
- '--output',
586
- help='Output file to save validated data (JSON). Use - for stdout.',
587
- ),
588
- input_format: str | None = typer.Option(
589
- None,
590
- '--input-format',
591
- help='Input payload format for stdin (json or csv).',
592
- ),
593
- ) -> int:
594
- """
595
- Validate data against JSON-described rules.
596
-
597
- Parameters
598
- ----------
599
- ctx : typer.Context
600
- Typer execution context provided to the command.
601
- source : str
602
- Data source (file path or ``-`` for stdin).
603
- rules : str
604
- Validation rules as a JSON string.
605
- output : str | None
606
- Optional output path. Use ``-`` for stdout.
607
- input_format : str | None
608
- Optional stdin format hint (json or csv).
609
-
610
- Returns
611
- -------
612
- int
613
- Zero on success.
614
- """
615
- input_format = _optional_choice(
616
- input_format,
617
- _FORMAT_CHOICES,
618
- label='input_format',
757
+ format_kwargs = _format_namespace_kwargs(
758
+ format_value=source_format,
759
+ default='json',
619
760
  )
620
-
621
- state = _ensure_state(ctx)
622
-
623
761
  ns = _stateful_namespace(
624
762
  state,
625
- command='validate',
626
- source=source,
627
- rules=json_type(rules),
628
- output=output,
629
- input_format=input_format,
763
+ command='extract',
764
+ source_type=resolved_source_type,
765
+ source=resolved_source,
766
+ **format_kwargs,
630
767
  )
631
- return int(cmd_validate(ns))
768
+ return int(cmd_extract(ns))
632
769
 
633
770
 
634
- @app.command('transform')
635
- def transform_cmd(
771
+ @app.command('list')
772
+ def list_cmd(
636
773
  ctx: typer.Context,
637
- source: str = typer.Argument(
638
- '-',
639
- metavar='SOURCE',
640
- help=(
641
- 'Data source to transform '
642
- '(file path, JSON string, or - for stdin).'
643
- ),
644
- ),
645
- operations: str = typer.Option(
646
- '{}',
647
- '--operations',
648
- help='Transformation operations as JSON string',
774
+ config: PipelineConfigOption,
775
+ jobs: bool = typer.Option(
776
+ False,
777
+ '--jobs',
778
+ help='List available job names and exit',
649
779
  ),
650
- output: str | None = typer.Option(
651
- None,
652
- '-o',
653
- '--output',
654
- help='Output file to save transformed data (JSON). Use - for stdout.',
780
+ pipelines: bool = typer.Option(
781
+ False,
782
+ '--pipelines',
783
+ help='List ETL pipelines',
655
784
  ),
656
- input_format: str | None = typer.Option(
657
- None,
658
- '--input-format',
659
- help='Input payload format for stdin (json or csv).',
785
+ sources: bool = typer.Option(False, '--sources', help='List data sources'),
786
+ targets: bool = typer.Option(False, '--targets', help='List data targets'),
787
+ transforms: bool = typer.Option(
788
+ False,
789
+ '--transforms',
790
+ help='List data transforms',
660
791
  ),
661
792
  ) -> int:
662
793
  """
663
- Transform records using JSON-described operations.
794
+ Print ETL entities from a pipeline YAML configuration.
664
795
 
665
796
  Parameters
666
797
  ----------
667
798
  ctx : typer.Context
668
799
  Typer execution context provided to the command.
669
- source : str
670
- Data source (file path or ``-`` for stdin).
671
- operations : str
672
- Transformation operations as a JSON string.
673
- output : str | None
674
- Optional output path. Use ``-`` for stdout.
675
- input_format : str | None
676
- Optional stdin format hint (json or csv).
800
+ config : PipelineConfigOption
801
+ Path to pipeline YAML configuration file.
802
+ jobs : bool, optional
803
+ If True, list available job names and exit.
804
+ pipelines : bool, optional
805
+ If True, list ETL pipelines.
806
+ sources : bool, optional
807
+ If True, list data sources.
808
+ targets : bool, optional
809
+ If True, list data targets.
810
+ transforms : bool, optional
811
+ If True, list data transforms.
677
812
 
678
813
  Returns
679
814
  -------
680
815
  int
681
816
  Zero on success.
682
817
  """
683
- input_format = _optional_choice(
684
- input_format,
685
- _FORMAT_CHOICES,
686
- label='input_format',
687
- )
688
-
689
818
  state = _ensure_state(ctx)
690
-
691
819
  ns = _stateful_namespace(
692
820
  state,
693
- command='transform',
694
- source=source,
695
- operations=json_type(operations),
696
- output=output,
697
- input_format=input_format,
821
+ command='list',
822
+ config=config,
823
+ pipelines=pipelines,
824
+ jobs=jobs,
825
+ sources=sources,
826
+ targets=targets,
827
+ transforms=transforms,
698
828
  )
699
- return int(cmd_transform(ns))
829
+ return int(cmd_list(ns))
700
830
 
701
831
 
702
832
  @app.command('load')
703
833
  def load_cmd(
704
834
  ctx: typer.Context,
705
- args: list[str] = LOAD_ARGS,
706
- to: str | None = typer.Option(
707
- None,
708
- '--to',
709
- help='Override the inferred target type (file, database, api).',
710
- ),
711
- strict_format: bool = typer.Option(
712
- False,
713
- '--strict-format',
714
- help=(
715
- 'Treat providing --format for file targets as an error '
716
- '(overrides environment behavior)'
717
- ),
718
- ),
719
- target_format: str | None = typer.Option(
720
- None,
721
- '--format',
722
- help=(
723
- 'Payload format when not a file (or when TARGET is -). '
724
- 'For normal file targets, format is inferred from extension.'
725
- ),
726
- ),
727
- input_format: str | None = typer.Option(
728
- None,
729
- '--input-format',
730
- help='Input payload format for stdin (json or csv).',
731
- ),
835
+ target: TargetInputArg,
836
+ source_format: StdinFormatOption | None = None,
837
+ target_format: TargetFormatOption | None = None,
838
+ target_type: TargetOverrideOption | None = None,
732
839
  ) -> int:
733
840
  """
734
841
  Load data into a file, database, or REST API.
@@ -737,107 +844,89 @@ def load_cmd(
737
844
  ----------
738
845
  ctx : typer.Context
739
846
  Typer execution context provided to the command.
740
- args : list[str]
741
- Positional arguments: TARGET, SOURCE TARGET, or SOURCE TARGET_TYPE
742
- TARGET.
743
- to : str | None
847
+ target : TargetInputArg
848
+ Load destination (file path, URL/DSN, or ``-`` for stdout).
849
+ source_format : StdinFormatOption | None, optional
850
+ Hint for parsing stdin payloads (json or csv).
851
+ target_format : TargetFormatOption | None, optional
852
+ Payload format when not a file target (or when TARGET is ``-``).
853
+ target_type : TargetOverrideOption | None, optional
744
854
  Override the inferred target type.
745
- strict_format : bool
746
- Whether to enforce strict format behavior.
747
- target_format : str | None
748
- Payload format when not a file.
749
- input_format : str | None
750
- Input payload format for stdin.
751
855
 
752
856
  Returns
753
857
  -------
754
858
  int
755
859
  Zero on success.
756
860
 
757
- Raises
758
- ------
759
- typer.BadParameter
760
- If the arguments are invalid
761
-
762
861
  Examples
763
862
  --------
764
863
  - Pipe into a file:
765
864
  etlplus extract in.csv \
766
865
  | etlplus transform --operations '{"select":["a"]}' \
767
866
  | etlplus load --to file out.json
768
-
769
- - Legacy form:
770
- etlplus load in.json file out.json
771
-
867
+ - Read from stdin and write to a file:
868
+ etlplus load out.json
772
869
  - Write to stdout:
773
- etlplus load in.json file -
870
+ etlplus load --to file -
871
+
872
+ Notes
873
+ -----
874
+ - The ``load`` command reads JSON from stdin.
875
+ - CSV input is unsupported unless ``--source-format csv`` is provided.
876
+ - Convert upstream before piping into ``load`` when working with other
877
+ formats.
774
878
  """
775
879
  state = _ensure_state(ctx)
776
880
 
777
- if len(args) > 3:
778
- raise typer.BadParameter(
779
- 'Provide TARGET, SOURCE TARGET, or SOURCE TARGET_TYPE TARGET.',
780
- )
781
-
782
- to = _optional_choice(to, _SOURCE_CHOICES, label='to')
783
- target_format = _optional_choice(
784
- target_format,
881
+ source_format = _optional_choice(
882
+ source_format,
785
883
  _FORMAT_CHOICES,
786
- label='format',
884
+ label='source_format',
787
885
  )
788
- input_format = _optional_choice(
789
- input_format,
790
- _FORMAT_CHOICES,
791
- label='input_format',
792
- )
793
-
794
- # Parse positional args.
795
- match args:
796
- case [source, target_type_raw, target] if to is None:
797
- target_type = _validate_choice(
798
- target_type_raw,
799
- _SOURCE_CHOICES,
800
- label='target_type',
801
- )
802
- case [_, _, _]:
803
- raise typer.BadParameter(
804
- 'Do not combine --to with the legacy SOURCE TARGET_TYPE '
805
- 'TARGET form.',
806
- )
807
- case [source, target]:
808
- target_type = to or _infer_resource_type_or_exit(target)
809
- case [solo_target]:
810
- source = '-'
811
- target = solo_target
812
- target_type = to or _infer_resource_type_or_exit(target)
813
- case []:
814
- raise typer.BadParameter(
815
- 'Provide TARGET, SOURCE TARGET, or legacy SOURCE '
816
- 'TARGET_TYPE TARGET.',
817
- )
818
-
819
- target_type = _validate_choice(
886
+ target_type = _optional_choice(
820
887
  target_type,
821
888
  _SOURCE_CHOICES,
822
889
  label='target_type',
823
890
  )
891
+ target_format = _optional_choice(
892
+ target_format,
893
+ _FORMAT_CHOICES,
894
+ label='target_format',
895
+ )
824
896
 
825
- if state.verbose:
826
- print(
827
- f'Inferred target_type={target_type} for target={target}',
828
- file=sys.stderr,
829
- )
897
+ resolved_target = target
898
+ resolved_target_type = target_type or _infer_resource_type_or_exit(
899
+ resolved_target,
900
+ )
901
+
902
+ resolved_source_value = '-'
903
+ resolved_source_type = _infer_resource_type_soft(resolved_source_value)
904
+
905
+ _log_inferred_resource(
906
+ state,
907
+ role='source',
908
+ value=resolved_source_value,
909
+ resource_type=resolved_source_type,
910
+ )
911
+ _log_inferred_resource(
912
+ state,
913
+ role='target',
914
+ value=resolved_target,
915
+ resource_type=resolved_target_type,
916
+ )
830
917
 
918
+ format_kwargs = _format_namespace_kwargs(
919
+ format_value=target_format,
920
+ default='json',
921
+ )
831
922
  ns = _stateful_namespace(
832
923
  state,
833
924
  command='load',
834
- source=source,
835
- target_type=target_type,
836
- target=target,
837
- strict_format=strict_format,
838
- format=(target_format or 'json'),
839
- _format_explicit=(target_format is not None),
840
- input_format=input_format,
925
+ source=resolved_source_value,
926
+ source_format=source_format,
927
+ target_type=resolved_target_type,
928
+ target=resolved_target,
929
+ **format_kwargs,
841
930
  )
842
931
  return int(cmd_load(ns))
843
932
 
@@ -845,21 +934,22 @@ def load_cmd(
845
934
  @app.command('pipeline')
846
935
  def pipeline_cmd(
847
936
  ctx: typer.Context,
848
- config: str = typer.Option(
849
- ...,
850
- '--config',
851
- help='Path to pipeline YAML configuration file',
937
+ config: PipelineConfigOption,
938
+ job: str | None = typer.Option(
939
+ None,
940
+ '--job',
941
+ metavar='JOB',
942
+ help='Run a specific job by name',
852
943
  ),
853
- list_: bool = typer.Option(
944
+ jobs: bool = typer.Option(
854
945
  False,
855
- '--list',
946
+ '--jobs',
856
947
  help='List available job names and exit',
857
948
  ),
858
- run_job: str | None = typer.Option(
949
+ pipeline: str | None = typer.Option(
859
950
  None,
860
- '--run',
861
- metavar='JOB',
862
- help='Run a specific job by name',
951
+ '--pipeline',
952
+ help='Run a specific pipeline by name',
863
953
  ),
864
954
  ) -> int:
865
955
  """
@@ -869,12 +959,14 @@ def pipeline_cmd(
869
959
  ----------
870
960
  ctx : typer.Context
871
961
  Typer execution context provided to the command.
872
- config : str
962
+ config : PipelineConfigOption
873
963
  Path to pipeline YAML configuration file.
874
- list_ : bool
875
- If True, list available job names and exit.
876
- run_job : str | None
964
+ job : str | None, optional
877
965
  Name of a specific job to run.
966
+ jobs : bool, optional
967
+ If True, list available job names and exit.
968
+ pipeline : str | None, optional
969
+ Name of a specific pipeline to run.
878
970
 
879
971
  Returns
880
972
  -------
@@ -882,54 +974,47 @@ def pipeline_cmd(
882
974
  Zero on success.
883
975
  """
884
976
  state = _ensure_state(ctx)
977
+ run_target = job or pipeline
885
978
  ns = _stateful_namespace(
886
979
  state,
887
980
  command='pipeline',
888
981
  config=config,
889
- list=list_,
890
- run=run_job,
982
+ list=jobs,
983
+ run=run_target,
891
984
  )
892
985
  return int(cmd_pipeline(ns))
893
986
 
894
987
 
895
- @app.command('list')
896
- def list_cmd(
988
+ @app.command('run')
989
+ def run_cmd(
897
990
  ctx: typer.Context,
898
- config: str = typer.Option(
899
- ...,
900
- '--config',
901
- help='Path to pipeline YAML configuration file',
902
- ),
903
- pipelines: bool = typer.Option(
904
- False,
905
- '--pipelines',
906
- help='List ETL pipelines',
991
+ config: PipelineConfigOption,
992
+ job: str | None = typer.Option(
993
+ None,
994
+ '-j',
995
+ '--job',
996
+ help='Name of the job to run',
907
997
  ),
908
- sources: bool = typer.Option(False, '--sources', help='List data sources'),
909
- targets: bool = typer.Option(False, '--targets', help='List data targets'),
910
- transforms: bool = typer.Option(
911
- False,
912
- '--transforms',
913
- help='List data transforms',
998
+ pipeline: str | None = typer.Option(
999
+ None,
1000
+ '-p',
1001
+ '--pipeline',
1002
+ help='Name of the pipeline to run',
914
1003
  ),
915
1004
  ) -> int:
916
1005
  """
917
- Print ETL entities from a pipeline YAML configuration.
1006
+ Execute an ETL job or pipeline from a YAML configuration.
918
1007
 
919
1008
  Parameters
920
1009
  ----------
921
1010
  ctx : typer.Context
922
1011
  Typer execution context provided to the command.
923
- config : str
1012
+ config : PipelineConfigOption
924
1013
  Path to pipeline YAML configuration file.
925
- pipelines : bool
926
- If True, list ETL pipelines.
927
- sources : bool
928
- If True, list data sources.
929
- targets : bool
930
- If True, list data targets.
931
- transforms : bool
932
- If True, list data transforms.
1014
+ job : str | None, optional
1015
+ Name of the job to run.
1016
+ pipeline : str | None, optional
1017
+ Name of the pipeline to run.
933
1018
 
934
1019
  Returns
935
1020
  -------
@@ -939,62 +1024,216 @@ def list_cmd(
939
1024
  state = _ensure_state(ctx)
940
1025
  ns = _stateful_namespace(
941
1026
  state,
942
- command='list',
1027
+ command='run',
943
1028
  config=config,
944
- pipelines=pipelines,
945
- sources=sources,
946
- targets=targets,
947
- transforms=transforms,
1029
+ job=job,
1030
+ pipeline=pipeline,
948
1031
  )
949
- return int(cmd_list(ns))
1032
+ return int(cmd_run(ns))
950
1033
 
951
1034
 
952
- @app.command('run')
953
- def run_cmd(
1035
+ @app.command('transform')
1036
+ def transform_cmd(
954
1037
  ctx: typer.Context,
955
- config: str = typer.Option(
956
- ...,
957
- '--config',
958
- help='Path to pipeline YAML configuration file',
959
- ),
960
- job: str | None = typer.Option(
961
- None,
962
- '-j',
963
- '--job',
964
- help='Name of the job to run',
965
- ),
966
- pipeline: str | None = typer.Option(
967
- None,
968
- '-p',
969
- '--pipeline',
970
- help='Name of the pipeline to run',
971
- ),
1038
+ operations: OperationsJSONOption = '{}',
1039
+ source: StreamingSourceArg = '-',
1040
+ source_format: SourceFormatOption | None = None,
1041
+ source_type: SourceOverrideOption | None = None,
1042
+ target: TargetPathOption | None = None,
1043
+ target_format: TargetFormatOption | None = None,
1044
+ target_type: TargetOverrideOption | None = None,
972
1045
  ) -> int:
973
1046
  """
974
- Execute an ETL job or pipeline from a YAML configuration.
1047
+ Transform records using JSON-described operations.
975
1048
 
976
1049
  Parameters
977
1050
  ----------
978
1051
  ctx : typer.Context
979
1052
  Typer execution context provided to the command.
980
- config : str
981
- Path to pipeline YAML configuration file.
982
- job : str | None
983
- Name of the job to run.
984
- pipeline : str | None
985
- Name of the pipeline to run.
1053
+ operations : OperationsJSONOption, optional
1054
+ Transformation operations as a JSON string.
1055
+ source : StreamingSourceArg, optional
1056
+ Data source (file path or ``-`` for stdin).
1057
+ source_format : SourceFormatOption | None, optional
1058
+ Input payload format when not a file (or when SOURCE is -).
1059
+ source_type : SourceOverrideOption | None, optional
1060
+ Override the inferred source type.
1061
+ target : TargetPathOption | None, optional
1062
+ Optional output path. Use ``-`` for stdout.
1063
+ target_format : TargetFormatOption | None, optional
1064
+ Output payload format when not a file target (or when OUTPUT is -).
1065
+ Accepts ``--target-format``.
1066
+ target_type : TargetOverrideOption | None, optional
1067
+ Override the inferred target type.
986
1068
 
987
1069
  Returns
988
1070
  -------
989
1071
  int
990
1072
  Zero on success.
1073
+
1074
+ Examples
1075
+ --------
1076
+ - Transform data from a file and write to another file:
1077
+ etlplus transform --from file in.json \
1078
+ --operations '{"select": ["id", "name"]}' \
1079
+ --to file out.json
1080
+ - Transform data from stdin and write to stdout:
1081
+ cat in.json \
1082
+ | etlplus transform \
1083
+ --operations '{"filter": {"field": "age", "gt": 30}}'
1084
+ - Transform data from a file and write to stdout:
1085
+ etlplus transform --from file in.csv \
1086
+ --source-format csv \
1087
+ --operations '{"select": ["id", "email"]}'
1088
+ - Transform data from stdin and write to a file:
1089
+ cat in.json \
1090
+ | etlplus transform --operations '{"sort": ["-created_at"]}' \
1091
+ --to file out.json
1092
+
1093
+ Notes
1094
+ -----
1095
+ - The ``transform`` command reads JSON from stdin when SOURCE is ``-``.
1096
+ - CSV input is unsupported for this command.
1097
+ - Convert upstream before piping into ``transform``.
991
1098
  """
992
1099
  state = _ensure_state(ctx)
1100
+
1101
+ source_format = _optional_choice(
1102
+ source_format,
1103
+ _FORMAT_CHOICES,
1104
+ label='source_format',
1105
+ )
1106
+ source_type = _optional_choice(
1107
+ source_type,
1108
+ _SOURCE_CHOICES,
1109
+ label='source_type',
1110
+ )
1111
+ target_format = _optional_choice(
1112
+ target_format,
1113
+ _FORMAT_CHOICES,
1114
+ label='target_format',
1115
+ )
1116
+ target_format_kwargs = _format_namespace_kwargs(
1117
+ format_value=target_format,
1118
+ default='json',
1119
+ )
1120
+ target_type = _optional_choice(
1121
+ target_type,
1122
+ _SOURCE_CHOICES,
1123
+ label='target_type',
1124
+ )
1125
+
1126
+ resolved_source_type = source_type or _infer_resource_type_soft(source)
1127
+ resolved_source_value = source if source is not None else '-'
1128
+ resolved_target_value = target if target is not None else '-'
1129
+
1130
+ if resolved_source_type is not None:
1131
+ resolved_source_type = _validate_choice(
1132
+ resolved_source_type,
1133
+ _SOURCE_CHOICES,
1134
+ label='source_type',
1135
+ )
1136
+
1137
+ resolved_target_type = _resolve_resource_type(
1138
+ explicit_type=None,
1139
+ override_type=target_type,
1140
+ value=resolved_target_value,
1141
+ label='target_type',
1142
+ )
1143
+
1144
+ _log_inferred_resource(
1145
+ state,
1146
+ role='source',
1147
+ value=resolved_source_value,
1148
+ resource_type=resolved_source_type,
1149
+ )
1150
+ _log_inferred_resource(
1151
+ state,
1152
+ role='target',
1153
+ value=resolved_target_value,
1154
+ resource_type=resolved_target_type,
1155
+ )
1156
+
993
1157
  ns = _stateful_namespace(
994
1158
  state,
995
- command='run',
996
- config=config,
997
- job=job,
998
- pipeline=pipeline,
1159
+ command='transform',
1160
+ source=resolved_source_value,
1161
+ source_type=resolved_source_type,
1162
+ operations=json_type(operations),
1163
+ target=resolved_target_value,
1164
+ source_format=source_format,
1165
+ target_type=resolved_target_type,
1166
+ target_format=target_format_kwargs['format'],
1167
+ **target_format_kwargs,
999
1168
  )
1000
- return int(cmd_run(ns))
1169
+ return int(cmd_transform(ns))
1170
+
1171
+
1172
+ @app.command('validate')
1173
+ def validate_cmd(
1174
+ ctx: typer.Context,
1175
+ rules: RulesJSONOption = '{}',
1176
+ source: StreamingSourceArg = '-',
1177
+ source_format: SourceFormatOption | None = None,
1178
+ source_type: SourceOverrideOption | None = None,
1179
+ target: TargetPathOption | None = None,
1180
+ ) -> int:
1181
+ """
1182
+ Validate data against JSON-described rules.
1183
+
1184
+ Parameters
1185
+ ----------
1186
+ ctx : typer.Context
1187
+ Typer execution context provided to the command.
1188
+ rules : RulesJSONOption, optional
1189
+ Validation rules as a JSON string.
1190
+ source : StreamingSourceArg, optional
1191
+ Data source (file path or ``-`` for stdin).
1192
+ source_format : SourceFormatOption | None, optional
1193
+ Optional stdin format hint (JSON or CSV) when SOURCE is ``-``.
1194
+ source_type : SourceOverrideOption | None, optional
1195
+ Override the inferred source type when heuristics fail.
1196
+ target : TargetPathOption | None, optional
1197
+ Optional output path. Use ``-`` for stdout.
1198
+
1199
+ Returns
1200
+ -------
1201
+ int
1202
+ Zero on success.
1203
+ """
1204
+ source_format = _optional_choice(
1205
+ source_format,
1206
+ _FORMAT_CHOICES,
1207
+ label='source_format',
1208
+ )
1209
+ source_type = _optional_choice(
1210
+ source_type,
1211
+ _SOURCE_CHOICES,
1212
+ label='source_type',
1213
+ )
1214
+ source_format_kwargs = _format_namespace_kwargs(
1215
+ format_value=source_format,
1216
+ default='json',
1217
+ )
1218
+
1219
+ state = _ensure_state(ctx)
1220
+ resolved_source_type = source_type or _infer_resource_type_soft(source)
1221
+
1222
+ _log_inferred_resource(
1223
+ state,
1224
+ role='source',
1225
+ value=source,
1226
+ resource_type=resolved_source_type,
1227
+ )
1228
+
1229
+ ns = _stateful_namespace(
1230
+ state,
1231
+ command='validate',
1232
+ source=source,
1233
+ source_type=resolved_source_type,
1234
+ rules=json_type(rules), # convert CLI string to dict
1235
+ target=target,
1236
+ source_format=source_format,
1237
+ **source_format_kwargs,
1238
+ )
1239
+ return int(cmd_validate(ns))