etlplus 0.8.3__py3-none-any.whl → 0.10.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/cli/commands.py +175 -121
- etlplus/cli/constants.py +13 -7
- etlplus/cli/handlers.py +3 -4
- etlplus/cli/io.py +21 -5
- etlplus/cli/main.py +2 -1
- etlplus/cli/options.py +1 -1
- etlplus/cli/state.py +3 -2
- etlplus/enums.py +195 -1
- etlplus/file.py +11 -16
- etlplus/load.py +1 -1
- etlplus/utils.py +1 -1
- {etlplus-0.8.3.dist-info → etlplus-0.10.4.dist-info}/METADATA +61 -29
- {etlplus-0.8.3.dist-info → etlplus-0.10.4.dist-info}/RECORD +17 -17
- {etlplus-0.8.3.dist-info → etlplus-0.10.4.dist-info}/WHEEL +0 -0
- {etlplus-0.8.3.dist-info → etlplus-0.10.4.dist-info}/entry_points.txt +0 -0
- {etlplus-0.8.3.dist-info → etlplus-0.10.4.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.8.3.dist-info → etlplus-0.10.4.dist-info}/top_level.txt +0 -0
etlplus/cli/commands.py
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
Typer application and subcommands for the ``etlplus`` command-line interface
|
|
5
5
|
(CLI). Typer (Click) is used for CLI parsing, help text, and subcommand
|
|
6
6
|
dispatch. The Typer layer focuses on ergonomics (git-style subcommands,
|
|
7
|
-
optional inference of resource types,
|
|
7
|
+
optional inference of resource types, STDIN/STDOUT piping, and quality-of-life
|
|
8
8
|
flags), while delegating business logic to the existing :func:`*_handler`
|
|
9
9
|
handlers.
|
|
10
10
|
|
|
@@ -19,7 +19,7 @@ Subcommands
|
|
|
19
19
|
|
|
20
20
|
Notes
|
|
21
21
|
-----
|
|
22
|
-
- Use ``-`` to read from
|
|
22
|
+
- Use ``-`` to read from STDIN or to write to STDOUT.
|
|
23
23
|
- Commands ``extract`` and ``transform`` support the command-line option
|
|
24
24
|
``--source-type`` to override inferred resource types.
|
|
25
25
|
- Commands ``transform`` and ``load`` support the command-line option
|
|
@@ -69,6 +69,16 @@ OperationsOption = Annotated[
|
|
|
69
69
|
),
|
|
70
70
|
]
|
|
71
71
|
|
|
72
|
+
OutputOption = Annotated[
|
|
73
|
+
str | None,
|
|
74
|
+
typer.Option(
|
|
75
|
+
'--output',
|
|
76
|
+
'-o',
|
|
77
|
+
metavar='PATH',
|
|
78
|
+
help='Write output to file PATH (default: STDOUT).',
|
|
79
|
+
),
|
|
80
|
+
]
|
|
81
|
+
|
|
72
82
|
PipelineConfigOption = Annotated[
|
|
73
83
|
str,
|
|
74
84
|
typer.Option(
|
|
@@ -95,7 +105,7 @@ RenderOutputOption = Annotated[
|
|
|
95
105
|
'--output',
|
|
96
106
|
'-o',
|
|
97
107
|
metavar='PATH',
|
|
98
|
-
help='Write rendered SQL to PATH (default:
|
|
108
|
+
help='Write rendered SQL to PATH (default: STDOUT).',
|
|
99
109
|
),
|
|
100
110
|
]
|
|
101
111
|
|
|
@@ -148,53 +158,50 @@ RulesOption = Annotated[
|
|
|
148
158
|
),
|
|
149
159
|
]
|
|
150
160
|
|
|
151
|
-
|
|
152
|
-
FileFormat | None,
|
|
153
|
-
typer.Option(
|
|
154
|
-
'--source-format',
|
|
155
|
-
**typer_format_option_kwargs(context='source'),
|
|
156
|
-
),
|
|
157
|
-
]
|
|
158
|
-
|
|
159
|
-
SourceInputArg = Annotated[
|
|
161
|
+
SourceArg = Annotated[
|
|
160
162
|
str,
|
|
161
163
|
typer.Argument(
|
|
162
164
|
...,
|
|
163
165
|
metavar='SOURCE',
|
|
164
166
|
help=(
|
|
165
|
-
'Extract from SOURCE
|
|
166
|
-
'
|
|
167
|
+
'Extract data from SOURCE (JSON payload, file/folder path, '
|
|
168
|
+
'URI/URL, or - for STDIN). Use --source-format to override the '
|
|
169
|
+
'inferred data format and --source-type to override the inferred '
|
|
170
|
+
'data connector.'
|
|
167
171
|
),
|
|
168
172
|
),
|
|
169
173
|
]
|
|
170
174
|
|
|
171
|
-
|
|
175
|
+
SourceFormatOption = Annotated[
|
|
176
|
+
FileFormat | None,
|
|
177
|
+
typer.Option(
|
|
178
|
+
'--source-format',
|
|
179
|
+
**typer_format_option_kwargs(context='source'),
|
|
180
|
+
),
|
|
181
|
+
]
|
|
182
|
+
|
|
183
|
+
SourceTypeOption = Annotated[
|
|
172
184
|
str | None,
|
|
173
185
|
typer.Option(
|
|
174
186
|
'--source-type',
|
|
175
187
|
metavar='CONNECTOR',
|
|
176
188
|
show_default=False,
|
|
177
189
|
rich_help_panel='I/O overrides',
|
|
178
|
-
help=
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
StdinFormatOption = Annotated[
|
|
183
|
-
FileFormat | None,
|
|
184
|
-
typer.Option(
|
|
185
|
-
'--source-format',
|
|
186
|
-
**typer_format_option_kwargs(context='source'),
|
|
190
|
+
help=(
|
|
191
|
+
'Override the inferred source type (api, database, file, folder).'
|
|
192
|
+
),
|
|
187
193
|
),
|
|
188
194
|
]
|
|
189
195
|
|
|
190
|
-
|
|
196
|
+
TargetArg = Annotated[
|
|
191
197
|
str,
|
|
192
198
|
typer.Argument(
|
|
193
199
|
...,
|
|
194
|
-
metavar='
|
|
200
|
+
metavar='TARGET',
|
|
195
201
|
help=(
|
|
196
|
-
'
|
|
197
|
-
'-
|
|
202
|
+
'Load data into TARGET (file/folder path, URI/URL, or - for '
|
|
203
|
+
'STDOUT). Use --target-format to override the inferred data '
|
|
204
|
+
'format and --target-type to override the inferred data connector.'
|
|
198
205
|
),
|
|
199
206
|
),
|
|
200
207
|
]
|
|
@@ -207,36 +214,16 @@ TargetFormatOption = Annotated[
|
|
|
207
214
|
),
|
|
208
215
|
]
|
|
209
216
|
|
|
210
|
-
|
|
211
|
-
str,
|
|
212
|
-
typer.Argument(
|
|
213
|
-
...,
|
|
214
|
-
metavar='TARGET',
|
|
215
|
-
help=(
|
|
216
|
-
'Load JSON data from stdin into TARGET. Use --to/--target-type '
|
|
217
|
-
'to override connector inference when needed. Source data must '
|
|
218
|
-
'be piped into stdin.'
|
|
219
|
-
),
|
|
220
|
-
),
|
|
221
|
-
]
|
|
222
|
-
|
|
223
|
-
TargetOverrideOption = Annotated[
|
|
217
|
+
TargetTypeOption = Annotated[
|
|
224
218
|
str | None,
|
|
225
219
|
typer.Option(
|
|
226
220
|
'--target-type',
|
|
227
221
|
metavar='CONNECTOR',
|
|
228
222
|
show_default=False,
|
|
229
223
|
rich_help_panel='I/O overrides',
|
|
230
|
-
help=
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
TargetPathOption = Annotated[
|
|
235
|
-
str | None,
|
|
236
|
-
typer.Option(
|
|
237
|
-
'--target',
|
|
238
|
-
metavar='PATH',
|
|
239
|
-
help='Target file for transformed or validated output (- for stdout).',
|
|
224
|
+
help=(
|
|
225
|
+
'Override the inferred target type (api, database, file, folder).'
|
|
226
|
+
),
|
|
240
227
|
),
|
|
241
228
|
]
|
|
242
229
|
|
|
@@ -314,7 +301,7 @@ def _root(
|
|
|
314
301
|
False,
|
|
315
302
|
'--verbose',
|
|
316
303
|
'-v',
|
|
317
|
-
help='Emit extra diagnostics to
|
|
304
|
+
help='Emit extra diagnostics to STDERR.',
|
|
318
305
|
),
|
|
319
306
|
) -> None:
|
|
320
307
|
"""
|
|
@@ -332,7 +319,7 @@ def _root(
|
|
|
332
319
|
Whether to suppress warnings and non-essential output. Default is
|
|
333
320
|
``False``.
|
|
334
321
|
verbose : bool, optional
|
|
335
|
-
Whether to emit extra diagnostics to
|
|
322
|
+
Whether to emit extra diagnostics to STDERR. Default is ``False``.
|
|
336
323
|
|
|
337
324
|
Raises
|
|
338
325
|
------
|
|
@@ -412,7 +399,17 @@ def check_cmd(
|
|
|
412
399
|
-------
|
|
413
400
|
int
|
|
414
401
|
Exit code.
|
|
402
|
+
|
|
403
|
+
Raises
|
|
404
|
+
------
|
|
405
|
+
typer.Exit
|
|
406
|
+
When argument order is invalid or required arguments are missing.
|
|
415
407
|
"""
|
|
408
|
+
# Argument order enforcement.
|
|
409
|
+
if not config:
|
|
410
|
+
typer.echo("Error: Missing required option '--config'.", err=True)
|
|
411
|
+
raise typer.Exit(2)
|
|
412
|
+
|
|
416
413
|
state = ensure_state(ctx)
|
|
417
414
|
return int(
|
|
418
415
|
handlers.check_handler(
|
|
@@ -431,9 +428,9 @@ def check_cmd(
|
|
|
431
428
|
@app.command('extract')
|
|
432
429
|
def extract_cmd(
|
|
433
430
|
ctx: typer.Context,
|
|
434
|
-
source:
|
|
435
|
-
source_format: SourceFormatOption
|
|
436
|
-
source_type:
|
|
431
|
+
source: SourceArg = '-',
|
|
432
|
+
source_format: SourceFormatOption = None,
|
|
433
|
+
source_type: SourceTypeOption = None,
|
|
437
434
|
) -> int:
|
|
438
435
|
"""
|
|
439
436
|
Extract data from files, databases, or REST APIs.
|
|
@@ -442,23 +439,40 @@ def extract_cmd(
|
|
|
442
439
|
----------
|
|
443
440
|
ctx : typer.Context
|
|
444
441
|
The Typer context.
|
|
445
|
-
source :
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
source_format : SourceFormatOption
|
|
449
|
-
|
|
450
|
-
Default is
|
|
451
|
-
source_type : SourceOverrideOption | None, optional
|
|
452
|
-
Override the inferred source type (file, database, api). Default is
|
|
442
|
+
source : SourceArg, optional
|
|
443
|
+
Source (JSON payload, file/folder path, URL/URI, or - for STDIN)
|
|
444
|
+
from which to extract data. Default is ``-``.
|
|
445
|
+
source_format : SourceFormatOption, optional
|
|
446
|
+
Data source format. Overrides the inferred format (``csv``, ``json``,
|
|
447
|
+
etc.) based on filename extension or STDIN content. Default is
|
|
453
448
|
``None``.
|
|
449
|
+
source_type : SourceTypeOption, optional
|
|
450
|
+
Data source type. Overrides the inferred type (``api``, ``database``,
|
|
451
|
+
``file``, ``folder``) based on URI/URL schema. Default is ``None``.
|
|
454
452
|
|
|
455
453
|
Returns
|
|
456
454
|
-------
|
|
457
455
|
int
|
|
458
456
|
Exit code.
|
|
457
|
+
|
|
458
|
+
Raises
|
|
459
|
+
------
|
|
460
|
+
typer.Exit
|
|
461
|
+
When argument order is invalid or required arguments are missing.
|
|
459
462
|
"""
|
|
460
463
|
state = ensure_state(ctx)
|
|
461
464
|
|
|
465
|
+
# Argument order enforcement
|
|
466
|
+
if source.startswith('--'):
|
|
467
|
+
typer.echo(
|
|
468
|
+
f"Error: Option '{source}' must follow the 'SOURCE' argument.",
|
|
469
|
+
err=True,
|
|
470
|
+
)
|
|
471
|
+
raise typer.Exit(2)
|
|
472
|
+
if not source:
|
|
473
|
+
typer.echo("Error: Missing required argument 'SOURCE'.", err=True)
|
|
474
|
+
raise typer.Exit(2)
|
|
475
|
+
|
|
462
476
|
source_type = optional_choice(
|
|
463
477
|
source_type,
|
|
464
478
|
DATA_CONNECTORS,
|
|
@@ -473,22 +487,19 @@ def extract_cmd(
|
|
|
473
487
|
),
|
|
474
488
|
)
|
|
475
489
|
|
|
476
|
-
|
|
477
|
-
resolved_source_type = source_type or infer_resource_type_or_exit(
|
|
478
|
-
resolved_source,
|
|
479
|
-
)
|
|
490
|
+
resolved_source_type = source_type or infer_resource_type_or_exit(source)
|
|
480
491
|
|
|
481
492
|
log_inferred_resource(
|
|
482
493
|
state,
|
|
483
494
|
role='source',
|
|
484
|
-
value=
|
|
495
|
+
value=source,
|
|
485
496
|
resource_type=resolved_source_type,
|
|
486
497
|
)
|
|
487
498
|
|
|
488
499
|
return int(
|
|
489
500
|
handlers.extract_handler(
|
|
490
501
|
source_type=resolved_source_type,
|
|
491
|
-
source=
|
|
502
|
+
source=source,
|
|
492
503
|
format_hint=source_format,
|
|
493
504
|
format_explicit=source_format is not None,
|
|
494
505
|
pretty=state.pretty,
|
|
@@ -499,10 +510,10 @@ def extract_cmd(
|
|
|
499
510
|
@app.command('load')
|
|
500
511
|
def load_cmd(
|
|
501
512
|
ctx: typer.Context,
|
|
502
|
-
|
|
503
|
-
|
|
513
|
+
source_format: SourceFormatOption = None,
|
|
514
|
+
target: TargetArg = '-',
|
|
504
515
|
target_format: TargetFormatOption = None,
|
|
505
|
-
target_type:
|
|
516
|
+
target_type: TargetTypeOption = None,
|
|
506
517
|
) -> int:
|
|
507
518
|
"""
|
|
508
519
|
Load data into a file, database, or REST API.
|
|
@@ -511,29 +522,45 @@ def load_cmd(
|
|
|
511
522
|
----------
|
|
512
523
|
ctx : typer.Context
|
|
513
524
|
The Typer context.
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
into stdin.
|
|
518
|
-
source_format : StdinFormatOption, optional
|
|
519
|
-
Format of the source. Overrides filename-based inference when provided.
|
|
520
|
-
Default is ``None``.
|
|
521
|
-
target_format : TargetFormatOption, optional
|
|
522
|
-
Format of the target. Overrides filename-based inference when provided.
|
|
523
|
-
Default is ``None``.
|
|
524
|
-
target_type : TargetOverrideOption, optional
|
|
525
|
-
Override the inferred target type (file, database, api). Default is
|
|
525
|
+
source_format : SourceFormatOption, optional
|
|
526
|
+
Data source format. Overrides the inferred format (``csv``, ``json``,
|
|
527
|
+
etc.) based on filename extension or STDIN content. Default is
|
|
526
528
|
``None``.
|
|
529
|
+
target : TargetArg, optional
|
|
530
|
+
Target (file/folder path, URL/URI, or - for STDOUT) into which to load
|
|
531
|
+
data. Default is ``-``.
|
|
532
|
+
target_format : TargetFormatOption, optional
|
|
533
|
+
Target data format. Overrides the inferred format (``csv``, ``json``,
|
|
534
|
+
etc.) based on filename extension. Default is ``None``.
|
|
535
|
+
target_type : TargetTypeOption, optional
|
|
536
|
+
Data target type. Overrides the inferred type (``api``, ``database``,
|
|
537
|
+
``file``, ``folder``) based on URI/URL schema. Default is ``None``.
|
|
527
538
|
|
|
528
539
|
Returns
|
|
529
540
|
-------
|
|
530
541
|
int
|
|
531
542
|
Exit code.
|
|
543
|
+
|
|
544
|
+
Raises
|
|
545
|
+
------
|
|
546
|
+
typer.Exit
|
|
547
|
+
When argument order is invalid or required arguments are missing.
|
|
532
548
|
"""
|
|
549
|
+
# Argument order enforcement
|
|
550
|
+
if target.startswith('--'):
|
|
551
|
+
typer.echo(
|
|
552
|
+
f"Error: Option '{target}' must follow the 'TARGET' argument.",
|
|
553
|
+
err=True,
|
|
554
|
+
)
|
|
555
|
+
raise typer.Exit(2)
|
|
556
|
+
if not target:
|
|
557
|
+
typer.echo("Error: Missing required argument 'TARGET'.", err=True)
|
|
558
|
+
raise typer.Exit(2)
|
|
559
|
+
|
|
533
560
|
state = ensure_state(ctx)
|
|
534
561
|
|
|
535
562
|
source_format = cast(
|
|
536
|
-
|
|
563
|
+
SourceFormatOption,
|
|
537
564
|
optional_choice(
|
|
538
565
|
source_format,
|
|
539
566
|
FILE_FORMATS,
|
|
@@ -597,7 +624,7 @@ def render_cmd(
|
|
|
597
624
|
table: RenderTableOption = None,
|
|
598
625
|
template: RenderTemplateOption = 'ddl',
|
|
599
626
|
template_path: RenderTemplatePathOption = None,
|
|
600
|
-
output:
|
|
627
|
+
output: OutputOption = None,
|
|
601
628
|
) -> int:
|
|
602
629
|
"""
|
|
603
630
|
Render SQL DDL from table schemas defined in YAML/JSON configs.
|
|
@@ -616,14 +643,27 @@ def render_cmd(
|
|
|
616
643
|
Template key (ddl/view) or path to a Jinja template file.
|
|
617
644
|
template_path : RenderTemplatePathOption, optional
|
|
618
645
|
Explicit path to a Jinja template file (overrides template key).
|
|
619
|
-
output :
|
|
620
|
-
|
|
646
|
+
output : OutputOption, optional
|
|
647
|
+
Path of file to which to write rendered SQL (default: STDOUT).
|
|
621
648
|
|
|
622
649
|
Returns
|
|
623
650
|
-------
|
|
624
651
|
int
|
|
625
652
|
Exit code.
|
|
653
|
+
|
|
654
|
+
Raises
|
|
655
|
+
------
|
|
656
|
+
typer.Exit
|
|
657
|
+
When argument order is invalid or required arguments are missing.
|
|
626
658
|
"""
|
|
659
|
+
# Argument order enforcement
|
|
660
|
+
if not (config or spec):
|
|
661
|
+
typer.echo(
|
|
662
|
+
"Error: Missing required option '--config' or '--spec'.",
|
|
663
|
+
err=True,
|
|
664
|
+
)
|
|
665
|
+
raise typer.Exit(2)
|
|
666
|
+
|
|
627
667
|
state = ensure_state(ctx)
|
|
628
668
|
return int(
|
|
629
669
|
handlers.render_handler(
|
|
@@ -674,7 +714,17 @@ def run_cmd(
|
|
|
674
714
|
-------
|
|
675
715
|
int
|
|
676
716
|
Exit code.
|
|
717
|
+
|
|
718
|
+
Raises
|
|
719
|
+
------
|
|
720
|
+
typer.Exit
|
|
721
|
+
When argument order is invalid or required arguments are missing.
|
|
677
722
|
"""
|
|
723
|
+
# Argument order enforcement
|
|
724
|
+
if not config:
|
|
725
|
+
typer.echo("Error: Missing required option '--config'.", err=True)
|
|
726
|
+
raise typer.Exit(2)
|
|
727
|
+
|
|
678
728
|
state = ensure_state(ctx)
|
|
679
729
|
return int(
|
|
680
730
|
handlers.run_handler(
|
|
@@ -690,12 +740,12 @@ def run_cmd(
|
|
|
690
740
|
def transform_cmd(
|
|
691
741
|
ctx: typer.Context,
|
|
692
742
|
operations: OperationsOption = '{}',
|
|
693
|
-
source:
|
|
743
|
+
source: SourceArg = '-',
|
|
694
744
|
source_format: SourceFormatOption = None,
|
|
695
|
-
source_type:
|
|
696
|
-
target:
|
|
745
|
+
source_type: SourceTypeOption = None,
|
|
746
|
+
target: TargetArg = '-',
|
|
697
747
|
target_format: TargetFormatOption = None,
|
|
698
|
-
target_type:
|
|
748
|
+
target_type: TargetTypeOption = None,
|
|
699
749
|
) -> int:
|
|
700
750
|
"""
|
|
701
751
|
Transform records using JSON-described operations.
|
|
@@ -704,24 +754,27 @@ def transform_cmd(
|
|
|
704
754
|
----------
|
|
705
755
|
ctx : typer.Context
|
|
706
756
|
The Typer context.
|
|
707
|
-
operations : OperationsOption
|
|
708
|
-
Transformation operations as JSON string.
|
|
709
|
-
source :
|
|
710
|
-
|
|
757
|
+
operations : OperationsOption, optional
|
|
758
|
+
Transformation operations as JSON string. Default is ``{}``.
|
|
759
|
+
source : SourceArg, optional
|
|
760
|
+
Source (JSON payload, file/folder path, URL/URI, or - for STDIN) from
|
|
761
|
+
which to extract data. Default is ``-``.
|
|
711
762
|
source_format : SourceFormatOption, optional
|
|
712
|
-
|
|
713
|
-
Default is
|
|
714
|
-
source_type : SourceOverrideOption, optional
|
|
715
|
-
Override the inferred source type (file, database, api). Default is
|
|
763
|
+
Data source format. Overrides the inferred format (``csv``, ``json``,
|
|
764
|
+
etc.) based on filename extension or STDIN content. Default is
|
|
716
765
|
``None``.
|
|
717
|
-
|
|
718
|
-
|
|
766
|
+
source_type : SourceTypeOption, optional
|
|
767
|
+
Data source type. Overrides the inferred type (``api``, ``database``,
|
|
768
|
+
``file``, ``folder``) based on URI/URL schema. Default is ``None``.
|
|
769
|
+
target : TargetArg, optional
|
|
770
|
+
Target (file/folder path, URL/URI, or - for STDOUT) into which to load
|
|
771
|
+
data. Default is ``-``.
|
|
719
772
|
target_format : TargetFormatOption, optional
|
|
720
|
-
|
|
721
|
-
Default is ``None``.
|
|
722
|
-
target_type :
|
|
723
|
-
|
|
724
|
-
``None``.
|
|
773
|
+
Target data format. Overrides the inferred format (``csv``, ``json``,
|
|
774
|
+
etc.) based on filename extension. Default is ``None``.
|
|
775
|
+
target_type : TargetTypeOption, optional
|
|
776
|
+
Data target type. Overrides the inferred type (``api``, ``database``,
|
|
777
|
+
``file``, ``folder``) based on URI/URL schema. Default is ``None``.
|
|
725
778
|
|
|
726
779
|
Returns
|
|
727
780
|
-------
|
|
@@ -805,10 +858,10 @@ def transform_cmd(
|
|
|
805
858
|
def validate_cmd(
|
|
806
859
|
ctx: typer.Context,
|
|
807
860
|
rules: RulesOption = '{}',
|
|
808
|
-
source:
|
|
861
|
+
source: SourceArg = '-',
|
|
809
862
|
source_format: SourceFormatOption = None,
|
|
810
|
-
source_type:
|
|
811
|
-
|
|
863
|
+
source_type: SourceTypeOption = None,
|
|
864
|
+
output: OutputOption = '-',
|
|
812
865
|
) -> int:
|
|
813
866
|
"""
|
|
814
867
|
Validate data against JSON-described rules.
|
|
@@ -819,16 +872,17 @@ def validate_cmd(
|
|
|
819
872
|
The Typer context.
|
|
820
873
|
rules : RulesOption
|
|
821
874
|
Validation rules as JSON string.
|
|
822
|
-
source :
|
|
823
|
-
Data source to validate (path, JSON payload, or - for
|
|
875
|
+
source : SourceArg
|
|
876
|
+
Data source to validate (path, JSON payload, or - for STDIN).
|
|
824
877
|
source_format : SourceFormatOption, optional
|
|
825
|
-
|
|
826
|
-
Default is
|
|
827
|
-
source_type : SourceOverrideOption, optional
|
|
828
|
-
Override the inferred source type (file, database, api). Default is
|
|
878
|
+
Data source format. Overrides the inferred format (``csv``, ``json``,
|
|
879
|
+
etc.) based on filename extension or STDIN content. Default is
|
|
829
880
|
``None``.
|
|
830
|
-
|
|
831
|
-
|
|
881
|
+
source_type : SourceTypeOption, optional
|
|
882
|
+
Data source type. Overrides the inferred type (``api``, ``database``,
|
|
883
|
+
``file``, ``folder``) based on URI/URL schema. Default is ``None``.
|
|
884
|
+
output : OutputOption, optional
|
|
885
|
+
Output file for validated output (- for STDOUT). Default is ``None``.
|
|
832
886
|
|
|
833
887
|
Returns
|
|
834
888
|
-------
|
|
@@ -863,7 +917,7 @@ def validate_cmd(
|
|
|
863
917
|
source=source,
|
|
864
918
|
rules=_parse_json_option(rules, '--rules'),
|
|
865
919
|
source_format=source_format,
|
|
866
|
-
target=
|
|
920
|
+
target=output,
|
|
867
921
|
format_explicit=source_format is not None,
|
|
868
922
|
pretty=state.pretty,
|
|
869
923
|
),
|
etlplus/cli/constants.py
CHANGED
|
@@ -37,18 +37,24 @@ CLI_DESCRIPTION: Final[str] = '\n'.join(
|
|
|
37
37
|
[
|
|
38
38
|
'ETLPlus - A Swiss Army knife for simple ETL operations.',
|
|
39
39
|
'',
|
|
40
|
-
'
|
|
40
|
+
'Provide a subcommand and options. Examples:',
|
|
41
41
|
'',
|
|
42
42
|
' etlplus extract in.csv > out.json',
|
|
43
43
|
' etlplus validate in.json --rules "{"required": ["id"]}"',
|
|
44
44
|
(
|
|
45
|
-
' etlplus transform --
|
|
46
|
-
'--operations "{"select": ["id"]}"
|
|
45
|
+
' etlplus transform in.json --source-type file out.json '
|
|
46
|
+
'--target-type file --operations "{"select": ["id"]}"'
|
|
47
|
+
),
|
|
48
|
+
(
|
|
49
|
+
' etlplus extract in.csv | '
|
|
50
|
+
'etlplus load out.json --target-type file'
|
|
51
|
+
),
|
|
52
|
+
(
|
|
53
|
+
' cat data.json | '
|
|
54
|
+
'etlplus load https://example.com/data --target-type api'
|
|
47
55
|
),
|
|
48
|
-
' etlplus extract in.csv | etlplus load --to file out.json',
|
|
49
|
-
' cat data.json | etlplus load --to api https://example.com/data',
|
|
50
56
|
'',
|
|
51
|
-
'
|
|
57
|
+
'Override format inference when extensions are misleading:',
|
|
52
58
|
'',
|
|
53
59
|
' etlplus extract data.txt --source-format csv',
|
|
54
60
|
' etlplus load payload.bin --target-format json',
|
|
@@ -57,7 +63,7 @@ CLI_DESCRIPTION: Final[str] = '\n'.join(
|
|
|
57
63
|
CLI_EPILOG: Final[str] = '\n'.join(
|
|
58
64
|
[
|
|
59
65
|
'Tip:',
|
|
60
|
-
'
|
|
66
|
+
'`--source-format` and `--target-format` override format inference '
|
|
61
67
|
'based on filename extensions when needed.',
|
|
62
68
|
],
|
|
63
69
|
)
|
etlplus/cli/handlers.py
CHANGED
|
@@ -637,18 +637,17 @@ def validate_handler(
|
|
|
637
637
|
field_rules = cast(Mapping[str, FieldRules], rules_payload)
|
|
638
638
|
result = validate(payload, field_rules)
|
|
639
639
|
|
|
640
|
-
|
|
641
|
-
if target_path:
|
|
640
|
+
if target and target != '-':
|
|
642
641
|
validated_data = result.get('data')
|
|
643
642
|
if validated_data is not None:
|
|
644
643
|
cli_io.write_json_output(
|
|
645
644
|
validated_data,
|
|
646
|
-
|
|
645
|
+
target,
|
|
647
646
|
success_message='Validation result saved to',
|
|
648
647
|
)
|
|
649
648
|
else:
|
|
650
649
|
print(
|
|
651
|
-
f'Validation failed, no data to save for {
|
|
650
|
+
f'Validation failed, no data to save for {target}',
|
|
652
651
|
file=sys.stderr,
|
|
653
652
|
)
|
|
654
653
|
else:
|
etlplus/cli/io.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
:mod:`etlplus.cli.io` module.
|
|
3
3
|
|
|
4
|
-
Shared I/O helpers for CLI handlers (
|
|
4
|
+
Shared I/O helpers for CLI handlers (STDIN/STDOUT, payload hydration).
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
from __future__ import annotations
|
|
@@ -78,7 +78,7 @@ def emit_or_write(
|
|
|
78
78
|
data : Any
|
|
79
79
|
The data to serialize.
|
|
80
80
|
output_path : str | None
|
|
81
|
-
Target file path; when falsy or ``'-'`` data is emitted to
|
|
81
|
+
Target file path; when falsy or ``'-'`` data is emitted to STDOUT.
|
|
82
82
|
pretty : bool
|
|
83
83
|
Whether to pretty-print JSON emission.
|
|
84
84
|
success_message : str
|
|
@@ -138,6 +138,11 @@ def materialize_file_payload(
|
|
|
138
138
|
JSONData | object
|
|
139
139
|
The materialized payload if a file was read, otherwise the original
|
|
140
140
|
source.
|
|
141
|
+
|
|
142
|
+
Raises
|
|
143
|
+
------
|
|
144
|
+
FileNotFoundError
|
|
145
|
+
When the specified file does not exist.
|
|
141
146
|
"""
|
|
142
147
|
if isinstance(source, (dict, list)):
|
|
143
148
|
return cast(JSONData, source)
|
|
@@ -164,6 +169,17 @@ def materialize_file_payload(
|
|
|
164
169
|
|
|
165
170
|
if fmt is None:
|
|
166
171
|
return source
|
|
172
|
+
if not path.exists():
|
|
173
|
+
if isinstance(source, str):
|
|
174
|
+
stripped = source.lstrip()
|
|
175
|
+
hint = (format_hint or '').strip().lower()
|
|
176
|
+
if (
|
|
177
|
+
stripped.startswith(('{', '['))
|
|
178
|
+
or '\n' in source
|
|
179
|
+
or (hint == 'csv' and ',' in source)
|
|
180
|
+
):
|
|
181
|
+
return parse_text_payload(source, format_hint)
|
|
182
|
+
raise FileNotFoundError(f'File not found: {path}')
|
|
167
183
|
if fmt == FileFormat.CSV:
|
|
168
184
|
return read_csv_rows(path)
|
|
169
185
|
return File(path, fmt).read()
|
|
@@ -246,7 +262,7 @@ def read_csv_rows(
|
|
|
246
262
|
|
|
247
263
|
|
|
248
264
|
def read_stdin_text() -> str:
|
|
249
|
-
"""Return entire
|
|
265
|
+
"""Return entire STDIN payload."""
|
|
250
266
|
return sys.stdin.read()
|
|
251
267
|
|
|
252
268
|
|
|
@@ -258,12 +274,12 @@ def resolve_cli_payload(
|
|
|
258
274
|
hydrate_files: bool = True,
|
|
259
275
|
) -> JSONData | object:
|
|
260
276
|
"""
|
|
261
|
-
Normalize CLI-provided payloads, honoring
|
|
277
|
+
Normalize CLI-provided payloads, honoring STDIN and inline data.
|
|
262
278
|
|
|
263
279
|
Parameters
|
|
264
280
|
----------
|
|
265
281
|
source : object
|
|
266
|
-
The source payload, potentially
|
|
282
|
+
The source payload, potentially STDIN or a file path.
|
|
267
283
|
format_hint : str | None
|
|
268
284
|
An optional format hint (e.g., 'json', 'csv').
|
|
269
285
|
format_explicit : bool
|
etlplus/cli/main.py
CHANGED
|
@@ -34,7 +34,7 @@ def _emit_context_help(
|
|
|
34
34
|
ctx: click.Context | None,
|
|
35
35
|
) -> bool:
|
|
36
36
|
"""
|
|
37
|
-
Mirror Click help output for the provided context onto
|
|
37
|
+
Mirror Click help output for the provided context onto STDERR.
|
|
38
38
|
|
|
39
39
|
Parameters
|
|
40
40
|
----------
|
|
@@ -92,6 +92,7 @@ def _is_illegal_option_error(
|
|
|
92
92
|
exc,
|
|
93
93
|
(
|
|
94
94
|
click.exceptions.BadOptionUsage,
|
|
95
|
+
click.exceptions.BadParameter,
|
|
95
96
|
click.exceptions.NoSuchOption,
|
|
96
97
|
),
|
|
97
98
|
)
|
etlplus/cli/options.py
CHANGED
|
@@ -43,7 +43,7 @@ def typer_format_option_kwargs(
|
|
|
43
43
|
'show_default': False,
|
|
44
44
|
'rich_help_panel': rich_help_panel,
|
|
45
45
|
'help': (
|
|
46
|
-
f'Payload format when the {context} is
|
|
46
|
+
f'Payload format when the {context} is STDIN/inline or a '
|
|
47
47
|
'non-file connector. File connectors infer from extensions.'
|
|
48
48
|
),
|
|
49
49
|
}
|
etlplus/cli/state.py
CHANGED
|
@@ -130,7 +130,8 @@ def infer_resource_type(
|
|
|
130
130
|
return 'file'
|
|
131
131
|
|
|
132
132
|
raise ValueError(
|
|
133
|
-
'Could not infer resource type.
|
|
133
|
+
'Could not infer resource type. '
|
|
134
|
+
'Use --source-type/--target-type to specify it.',
|
|
134
135
|
)
|
|
135
136
|
|
|
136
137
|
|
|
@@ -304,7 +305,7 @@ def validate_choice(
|
|
|
304
305
|
|
|
305
306
|
Parameters
|
|
306
307
|
----------
|
|
307
|
-
value : str
|
|
308
|
+
value : str | object
|
|
308
309
|
The input value to validate.
|
|
309
310
|
choices : Collection[str]
|
|
310
311
|
The set of valid choices.
|
etlplus/enums.py
CHANGED
|
@@ -8,6 +8,7 @@ from __future__ import annotations
|
|
|
8
8
|
|
|
9
9
|
import enum
|
|
10
10
|
import operator as _op
|
|
11
|
+
from pathlib import PurePath
|
|
11
12
|
from statistics import fmean
|
|
12
13
|
from typing import Self
|
|
13
14
|
|
|
@@ -19,16 +20,21 @@ from .types import StrStrMap
|
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
__all__ = [
|
|
23
|
+
# Enums
|
|
22
24
|
'AggregateName',
|
|
23
25
|
'CoercibleStrEnum',
|
|
26
|
+
'CompressionFormat',
|
|
24
27
|
'DataConnectorType',
|
|
25
28
|
'FileFormat',
|
|
26
29
|
'HttpMethod',
|
|
27
30
|
'OperatorName',
|
|
28
31
|
'PipelineStep',
|
|
32
|
+
# Functions
|
|
33
|
+
'coerce_compression_format',
|
|
29
34
|
'coerce_data_connector_type',
|
|
30
35
|
'coerce_file_format',
|
|
31
36
|
'coerce_http_method',
|
|
37
|
+
'infer_file_format_and_compression',
|
|
32
38
|
]
|
|
33
39
|
|
|
34
40
|
|
|
@@ -172,6 +178,39 @@ class AggregateName(CoercibleStrEnum):
|
|
|
172
178
|
return lambda xs, n: (fmean(xs) if xs else 0.0)
|
|
173
179
|
|
|
174
180
|
|
|
181
|
+
class CompressionFormat(CoercibleStrEnum):
|
|
182
|
+
"""Supported compression formats for data files."""
|
|
183
|
+
|
|
184
|
+
# -- Constants -- #
|
|
185
|
+
|
|
186
|
+
GZ = 'gz'
|
|
187
|
+
ZIP = 'zip'
|
|
188
|
+
|
|
189
|
+
# -- Class Methods -- #
|
|
190
|
+
|
|
191
|
+
@classmethod
|
|
192
|
+
def aliases(cls) -> StrStrMap:
|
|
193
|
+
"""
|
|
194
|
+
Return a mapping of common aliases for each enum member.
|
|
195
|
+
|
|
196
|
+
Returns
|
|
197
|
+
-------
|
|
198
|
+
StrStrMap
|
|
199
|
+
A mapping of alias names to their corresponding enum member names.
|
|
200
|
+
"""
|
|
201
|
+
return {
|
|
202
|
+
# File extensions
|
|
203
|
+
'.gz': 'gz',
|
|
204
|
+
'.gzip': 'gz',
|
|
205
|
+
'.zip': 'zip',
|
|
206
|
+
# MIME types
|
|
207
|
+
'application/gzip': 'gz',
|
|
208
|
+
'application/x-gzip': 'gz',
|
|
209
|
+
'application/zip': 'zip',
|
|
210
|
+
'application/x-zip-compressed': 'zip',
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
|
|
175
214
|
class DataConnectorType(CoercibleStrEnum):
|
|
176
215
|
"""Supported data connector types."""
|
|
177
216
|
|
|
@@ -208,8 +247,19 @@ class FileFormat(CoercibleStrEnum):
|
|
|
208
247
|
|
|
209
248
|
# -- Constants -- #
|
|
210
249
|
|
|
250
|
+
AVRO = 'avro'
|
|
211
251
|
CSV = 'csv'
|
|
252
|
+
FEATHER = 'feather'
|
|
253
|
+
GZ = 'gz'
|
|
212
254
|
JSON = 'json'
|
|
255
|
+
NDJSON = 'ndjson'
|
|
256
|
+
ORC = 'orc'
|
|
257
|
+
PARQUET = 'parquet'
|
|
258
|
+
TSV = 'tsv'
|
|
259
|
+
TXT = 'txt'
|
|
260
|
+
XLS = 'xls'
|
|
261
|
+
XLSX = 'xlsx'
|
|
262
|
+
ZIP = 'zip'
|
|
213
263
|
XML = 'xml'
|
|
214
264
|
YAML = 'yaml'
|
|
215
265
|
|
|
@@ -227,11 +277,61 @@ class FileFormat(CoercibleStrEnum):
|
|
|
227
277
|
"""
|
|
228
278
|
return {
|
|
229
279
|
# Common shorthand
|
|
280
|
+
'parq': 'parquet',
|
|
230
281
|
'yml': 'yaml',
|
|
282
|
+
# File extensions
|
|
283
|
+
'.avro': 'avro',
|
|
284
|
+
'.csv': 'csv',
|
|
285
|
+
'.feather': 'feather',
|
|
286
|
+
'.gz': 'gz',
|
|
287
|
+
'.json': 'json',
|
|
288
|
+
'.jsonl': 'ndjson',
|
|
289
|
+
'.ndjson': 'ndjson',
|
|
290
|
+
'.orc': 'orc',
|
|
291
|
+
'.parquet': 'parquet',
|
|
292
|
+
'.pq': 'parquet',
|
|
293
|
+
'.tsv': 'tsv',
|
|
294
|
+
'.txt': 'txt',
|
|
295
|
+
'.xls': 'xls',
|
|
296
|
+
'.xlsx': 'xlsx',
|
|
297
|
+
'.zip': 'zip',
|
|
298
|
+
'.xml': 'xml',
|
|
299
|
+
'.yaml': 'yaml',
|
|
300
|
+
'.yml': 'yaml',
|
|
231
301
|
# MIME types
|
|
232
|
-
'
|
|
302
|
+
'application/avro': 'avro',
|
|
303
|
+
'application/csv': 'csv',
|
|
304
|
+
'application/feather': 'feather',
|
|
305
|
+
'application/gzip': 'gz',
|
|
233
306
|
'application/json': 'json',
|
|
307
|
+
'application/jsonlines': 'ndjson',
|
|
308
|
+
'application/ndjson': 'ndjson',
|
|
309
|
+
'application/orc': 'orc',
|
|
310
|
+
'application/parquet': 'parquet',
|
|
311
|
+
'application/vnd.apache.avro': 'avro',
|
|
312
|
+
'application/vnd.apache.parquet': 'parquet',
|
|
313
|
+
'application/vnd.apache.arrow.file': 'feather',
|
|
314
|
+
'application/vnd.apache.orc': 'orc',
|
|
315
|
+
'application/vnd.ms-excel': 'xls',
|
|
316
|
+
(
|
|
317
|
+
'application/vnd.openxmlformats-'
|
|
318
|
+
'officedocument.spreadsheetml.sheet'
|
|
319
|
+
): 'xlsx',
|
|
320
|
+
'application/x-avro': 'avro',
|
|
321
|
+
'application/x-csv': 'csv',
|
|
322
|
+
'application/x-feather': 'feather',
|
|
323
|
+
'application/x-orc': 'orc',
|
|
324
|
+
'application/x-ndjson': 'ndjson',
|
|
325
|
+
'application/x-parquet': 'parquet',
|
|
326
|
+
'application/x-yaml': 'yaml',
|
|
234
327
|
'application/xml': 'xml',
|
|
328
|
+
'application/zip': 'zip',
|
|
329
|
+
'text/csv': 'csv',
|
|
330
|
+
'text/plain': 'txt',
|
|
331
|
+
'text/tab-separated-values': 'tsv',
|
|
332
|
+
'text/tsv': 'tsv',
|
|
333
|
+
'text/xml': 'xml',
|
|
334
|
+
'text/yaml': 'yaml',
|
|
235
335
|
}
|
|
236
336
|
|
|
237
337
|
|
|
@@ -365,6 +465,13 @@ class PipelineStep(CoercibleStrEnum):
|
|
|
365
465
|
# SECTION: INTERNAL CONSTANTS ============================================== #
|
|
366
466
|
|
|
367
467
|
|
|
468
|
+
# Compression formats that are also file formats.
|
|
469
|
+
_COMPRESSION_FILE_FORMATS: set[FileFormat] = {
|
|
470
|
+
FileFormat.GZ,
|
|
471
|
+
FileFormat.ZIP,
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
|
|
368
475
|
# Precomputed order index for PipelineStep; avoids recomputing on each access.
|
|
369
476
|
_PIPELINE_ORDER_INDEX: dict[PipelineStep, int] = {
|
|
370
477
|
PipelineStep.FILTER: 0,
|
|
@@ -402,6 +509,18 @@ def coerce_file_format(
|
|
|
402
509
|
return FileFormat.coerce(file_format)
|
|
403
510
|
|
|
404
511
|
|
|
512
|
+
def coerce_compression_format(
|
|
513
|
+
compression_format: CompressionFormat | str,
|
|
514
|
+
) -> CompressionFormat:
|
|
515
|
+
"""
|
|
516
|
+
Normalize textual compression format values to :class:`CompressionFormat`.
|
|
517
|
+
|
|
518
|
+
This thin wrapper is kept for backward compatibility; prefer
|
|
519
|
+
:meth:`CompressionFormat.coerce` going forward.
|
|
520
|
+
"""
|
|
521
|
+
return CompressionFormat.coerce(compression_format)
|
|
522
|
+
|
|
523
|
+
|
|
405
524
|
def coerce_http_method(
|
|
406
525
|
http_method: HttpMethod | str,
|
|
407
526
|
) -> HttpMethod:
|
|
@@ -412,3 +531,78 @@ def coerce_http_method(
|
|
|
412
531
|
:meth:`HttpMethod.coerce` going forward.
|
|
413
532
|
"""
|
|
414
533
|
return HttpMethod.coerce(http_method)
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
def infer_file_format_and_compression(
|
|
537
|
+
value: object,
|
|
538
|
+
filename: object | None = None,
|
|
539
|
+
) -> tuple[FileFormat | None, CompressionFormat | None]:
|
|
540
|
+
"""
|
|
541
|
+
Infer data format and compression from a filename, extension, or MIME type.
|
|
542
|
+
|
|
543
|
+
Parameters
|
|
544
|
+
----------
|
|
545
|
+
value : object
|
|
546
|
+
A filename, extension, MIME type, or existing enum member.
|
|
547
|
+
filename : object | None, optional
|
|
548
|
+
A filename to consult for extension-based inference (e.g. when
|
|
549
|
+
``value`` is ``application/octet-stream``).
|
|
550
|
+
|
|
551
|
+
Returns
|
|
552
|
+
-------
|
|
553
|
+
tuple[FileFormat | None, CompressionFormat | None]
|
|
554
|
+
The inferred data format and compression, if any.
|
|
555
|
+
"""
|
|
556
|
+
if isinstance(value, FileFormat):
|
|
557
|
+
if value in _COMPRESSION_FILE_FORMATS:
|
|
558
|
+
return None, CompressionFormat.coerce(value.value)
|
|
559
|
+
return value, None
|
|
560
|
+
if isinstance(value, CompressionFormat):
|
|
561
|
+
return None, value
|
|
562
|
+
|
|
563
|
+
text = str(value).strip()
|
|
564
|
+
if not text:
|
|
565
|
+
return None, None
|
|
566
|
+
|
|
567
|
+
normalized = text.casefold()
|
|
568
|
+
mime = normalized.split(';', 1)[0].strip()
|
|
569
|
+
|
|
570
|
+
is_octet_stream = mime == 'application/octet-stream'
|
|
571
|
+
compression = CompressionFormat.try_coerce(mime)
|
|
572
|
+
fmt = None if is_octet_stream else FileFormat.try_coerce(mime)
|
|
573
|
+
|
|
574
|
+
is_mime = mime.startswith(
|
|
575
|
+
(
|
|
576
|
+
'application/',
|
|
577
|
+
'text/',
|
|
578
|
+
'audio/',
|
|
579
|
+
'image/',
|
|
580
|
+
'video/',
|
|
581
|
+
'multipart/',
|
|
582
|
+
),
|
|
583
|
+
)
|
|
584
|
+
suffix_source: object | None = filename if filename is not None else text
|
|
585
|
+
if is_mime and filename is None:
|
|
586
|
+
suffix_source = None
|
|
587
|
+
|
|
588
|
+
suffixes = (
|
|
589
|
+
PurePath(str(suffix_source)).suffixes
|
|
590
|
+
if suffix_source is not None
|
|
591
|
+
else []
|
|
592
|
+
)
|
|
593
|
+
if suffixes:
|
|
594
|
+
normalized_suffixes = [suffix.casefold() for suffix in suffixes]
|
|
595
|
+
compression = (
|
|
596
|
+
CompressionFormat.try_coerce(normalized_suffixes[-1])
|
|
597
|
+
or compression
|
|
598
|
+
)
|
|
599
|
+
if compression is not None:
|
|
600
|
+
normalized_suffixes = normalized_suffixes[:-1]
|
|
601
|
+
if normalized_suffixes:
|
|
602
|
+
fmt = FileFormat.try_coerce(normalized_suffixes[-1]) or fmt
|
|
603
|
+
|
|
604
|
+
if fmt in _COMPRESSION_FILE_FORMATS:
|
|
605
|
+
compression = compression or CompressionFormat.coerce(fmt.value)
|
|
606
|
+
fmt = None
|
|
607
|
+
|
|
608
|
+
return fmt, compression
|
etlplus/file.py
CHANGED
|
@@ -16,6 +16,7 @@ from typing import Any
|
|
|
16
16
|
from typing import cast
|
|
17
17
|
|
|
18
18
|
from .enums import FileFormat
|
|
19
|
+
from .enums import infer_file_format_and_compression
|
|
19
20
|
from .types import JSONData
|
|
20
21
|
from .types import JSONDict
|
|
21
22
|
from .types import JSONList
|
|
@@ -33,15 +34,6 @@ __all__ = ['File']
|
|
|
33
34
|
|
|
34
35
|
_DEFAULT_XML_ROOT = 'root'
|
|
35
36
|
|
|
36
|
-
# Map common filename extensions to FileFormat (used for inference)
|
|
37
|
-
_EXT_TO_FORMAT: dict[str, FileFormat] = {
|
|
38
|
-
'csv': FileFormat.CSV,
|
|
39
|
-
'json': FileFormat.JSON,
|
|
40
|
-
'xml': FileFormat.XML,
|
|
41
|
-
'yaml': FileFormat.YAML,
|
|
42
|
-
'yml': FileFormat.YAML,
|
|
43
|
-
}
|
|
44
|
-
|
|
45
37
|
# Optional YAML support (lazy-loaded to avoid hard dependency)
|
|
46
38
|
# Cached access function to avoid global statements.
|
|
47
39
|
_YAML_CACHE: dict[str, Any] = {}
|
|
@@ -246,14 +238,17 @@ class File:
|
|
|
246
238
|
ValueError
|
|
247
239
|
If the extension is unknown or unsupported.
|
|
248
240
|
"""
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
return
|
|
252
|
-
|
|
241
|
+
fmt, compression = infer_file_format_and_compression(self.path)
|
|
242
|
+
if fmt is not None:
|
|
243
|
+
return fmt
|
|
244
|
+
if compression is not None:
|
|
253
245
|
raise ValueError(
|
|
254
|
-
'Cannot infer file format from '
|
|
255
|
-
f'
|
|
256
|
-
)
|
|
246
|
+
'Cannot infer file format from compressed file '
|
|
247
|
+
f'{self.path!r} with compression {compression.value!r}',
|
|
248
|
+
)
|
|
249
|
+
raise ValueError(
|
|
250
|
+
f'Cannot infer file format from extension {self.path.suffix!r}',
|
|
251
|
+
)
|
|
257
252
|
|
|
258
253
|
# -- Instance Methods (Generic API) -- #
|
|
259
254
|
|
etlplus/load.py
CHANGED
|
@@ -104,7 +104,7 @@ def load_data(
|
|
|
104
104
|
return File(source, FileFormat.JSON).read_json()
|
|
105
105
|
|
|
106
106
|
if isinstance(source, str):
|
|
107
|
-
# Special case: '-' means read JSON from
|
|
107
|
+
# Special case: '-' means read JSON from STDIN (Unix convention).
|
|
108
108
|
if source == '-':
|
|
109
109
|
raw = sys.stdin.read()
|
|
110
110
|
return _parse_json_string(raw)
|
etlplus/utils.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: etlplus
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.10.4
|
|
4
4
|
Summary: A Swiss Army knife for simple ETL operations
|
|
5
5
|
Home-page: https://github.com/Dagitali/ETLPlus
|
|
6
6
|
Author: ETLPlus Team
|
|
@@ -64,6 +64,7 @@ package and command-line interface for data extraction, validation, transformati
|
|
|
64
64
|
- [Quickstart](#quickstart)
|
|
65
65
|
- [Usage](#usage)
|
|
66
66
|
- [Command Line Interface](#command-line-interface)
|
|
67
|
+
- [Argument Order and Required Options](#argument-order-and-required-options)
|
|
67
68
|
- [Check Pipelines](#check-pipelines)
|
|
68
69
|
- [Render SQL DDL](#render-sql-ddl)
|
|
69
70
|
- [Extract Data](#extract-data)
|
|
@@ -151,8 +152,8 @@ etlplus --version
|
|
|
151
152
|
|
|
152
153
|
# One-liner: extract CSV, filter, select, and write JSON
|
|
153
154
|
etlplus extract file examples/data/sample.csv \
|
|
154
|
-
| etlplus transform
|
|
155
|
-
-
|
|
155
|
+
| etlplus transform --operations '{"filter": {"field": "age", "op": "gt", "value": 25}, "select": ["name", "email"]}' \
|
|
156
|
+
- temp/sample_output.json
|
|
156
157
|
```
|
|
157
158
|
|
|
158
159
|
[Python API](#python-api):
|
|
@@ -185,6 +186,27 @@ etlplus --version
|
|
|
185
186
|
The CLI is implemented with Typer (Click-based). There is no argparse compatibility layer, so rely
|
|
186
187
|
on the documented commands/flags and run `etlplus <command> --help` for current options.
|
|
187
188
|
|
|
189
|
+
**Example error messages:**
|
|
190
|
+
|
|
191
|
+
- If you omit a required argument: `Error: Missing required argument 'SOURCE'.`
|
|
192
|
+
- If you place an option before its argument: `Error: Option '--source-format' must follow the 'SOURCE' argument.`
|
|
193
|
+
|
|
194
|
+
#### Argument Order and Required Options
|
|
195
|
+
|
|
196
|
+
For each command, positional arguments must precede options. Required options must follow their
|
|
197
|
+
associated argument:
|
|
198
|
+
|
|
199
|
+
- **extract**: `etlplus extract SOURCE [--source-format ...] [--source-type ...]`
|
|
200
|
+
- `SOURCE` is required. `--source-format` and `--source-type` must follow `SOURCE`.
|
|
201
|
+
- **transform**: `etlplus transform [--operations ...] SOURCE [--source-format ...] [--source-type ...] TARGET [--target-format ...] [--target-type ...]`
|
|
202
|
+
- `SOURCE` and `TARGET` are required. Format/type options must follow their respective argument.
|
|
203
|
+
- **load**: `etlplus load TARGET [--target-format ...] [--target-type ...] [--source-format ...]`
|
|
204
|
+
- `TARGET` is required. `--target-format` and `--target-type` must follow `TARGET`.
|
|
205
|
+
- **validate**: `etlplus validate SOURCE [--rules ...] [--source-format ...] [--source-type ...]`
|
|
206
|
+
- `SOURCE` is required. `--rules` and format/type options must follow `SOURCE`.
|
|
207
|
+
|
|
208
|
+
If required arguments or options are missing, or if options are placed before their associated argument, the CLI will display a clear error message.
|
|
209
|
+
|
|
188
210
|
#### Check Pipelines
|
|
189
211
|
|
|
190
212
|
Use `etlplus check` to explore pipeline YAML definitions without running them. The command can print
|
|
@@ -251,7 +273,7 @@ etlplus extract api https://api.example.com/data
|
|
|
251
273
|
|
|
252
274
|
Save extracted data to file:
|
|
253
275
|
```bash
|
|
254
|
-
etlplus extract file examples/data/sample.csv
|
|
276
|
+
etlplus extract file examples/data/sample.csv > temp/sample_output.json
|
|
255
277
|
```
|
|
256
278
|
|
|
257
279
|
#### Validate Data
|
|
@@ -270,59 +292,67 @@ etlplus validate examples/data/sample.json --rules '{"email": {"type": "string",
|
|
|
270
292
|
|
|
271
293
|
When piping data through `etlplus transform`, use `--source-format` whenever the SOURCE argument is
|
|
272
294
|
`-` or a literal payload, mirroring the `etlplus extract` semantics. Use `--target-format` to
|
|
273
|
-
control the emitted format for
|
|
274
|
-
paths continue to infer formats from their extensions. Use `--
|
|
275
|
-
connector type and `--
|
|
276
|
-
extract`/`etlplus load` behavior.
|
|
295
|
+
control the emitted format for STDOUT or other non-file outputs, just like `etlplus load`. File
|
|
296
|
+
paths continue to infer formats from their extensions. Use `--source-type` to override the inferred
|
|
297
|
+
source connector type and `--target-type` to override the inferred target connector type, matching
|
|
298
|
+
the `etlplus extract`/`etlplus load` behavior.
|
|
277
299
|
|
|
278
300
|
Transform file inputs while overriding connector types:
|
|
279
301
|
```bash
|
|
280
|
-
etlplus transform
|
|
302
|
+
etlplus transform \
|
|
281
303
|
--operations '{"select": ["name", "email"]}' \
|
|
282
|
-
--
|
|
304
|
+
examples/data/sample.json --source-type file \
|
|
305
|
+
temp/selected_output.json --target-type file
|
|
283
306
|
```
|
|
284
307
|
|
|
285
308
|
Filter and select fields:
|
|
286
309
|
```bash
|
|
287
|
-
etlplus transform
|
|
288
|
-
--operations '{"filter": {"field": "age", "op": "gt", "value": 26}, "select": ["name"]}'
|
|
310
|
+
etlplus transform \
|
|
311
|
+
--operations '{"filter": {"field": "age", "op": "gt", "value": 26}, "select": ["name"]}' \
|
|
312
|
+
'[{"name": "John", "age": 30}, {"name": "Jane", "age": 25}]'
|
|
289
313
|
```
|
|
290
314
|
|
|
291
315
|
Sort data:
|
|
292
316
|
```bash
|
|
293
|
-
etlplus transform
|
|
317
|
+
etlplus transform \
|
|
318
|
+
--operations '{"sort": {"field": "age", "reverse": true}}' \
|
|
319
|
+
examples/data/sample.json
|
|
294
320
|
```
|
|
295
321
|
|
|
296
322
|
Aggregate data:
|
|
297
323
|
```bash
|
|
298
|
-
etlplus transform
|
|
324
|
+
etlplus transform \
|
|
325
|
+
--operations '{"aggregate": {"field": "age", "func": "sum"}}' \
|
|
326
|
+
examples/data/sample.json
|
|
299
327
|
```
|
|
300
328
|
|
|
301
329
|
Map/rename fields:
|
|
302
330
|
```bash
|
|
303
|
-
etlplus transform
|
|
331
|
+
etlplus transform \
|
|
332
|
+
--operations '{"map": {"name": "new_name"}}' \
|
|
333
|
+
examples/data/sample.json
|
|
304
334
|
```
|
|
305
335
|
|
|
306
336
|
#### Load Data
|
|
307
337
|
|
|
308
|
-
`etlplus load` consumes JSON from
|
|
338
|
+
`etlplus load` consumes JSON from STDIN; provide only the target argument plus optional flags.
|
|
309
339
|
|
|
310
340
|
Load to JSON file:
|
|
311
341
|
```bash
|
|
312
342
|
etlplus extract file examples/data/sample.json \
|
|
313
|
-
| etlplus load
|
|
343
|
+
| etlplus load temp/sample_output.json --target-type file
|
|
314
344
|
```
|
|
315
345
|
|
|
316
346
|
Load to CSV file:
|
|
317
347
|
```bash
|
|
318
348
|
etlplus extract file examples/data/sample.csv \
|
|
319
|
-
| etlplus load
|
|
349
|
+
| etlplus load temp/sample_output.csv --target-type file
|
|
320
350
|
```
|
|
321
351
|
|
|
322
352
|
Load to REST API:
|
|
323
353
|
```bash
|
|
324
354
|
cat examples/data/sample.json \
|
|
325
|
-
| etlplus load
|
|
355
|
+
| etlplus load https://api.example.com/endpoint --target-type api
|
|
326
356
|
```
|
|
327
357
|
|
|
328
358
|
### Python API
|
|
@@ -375,20 +405,22 @@ etlplus run --config examples/configs/pipeline.yml --job file_to_file_customers
|
|
|
375
405
|
|
|
376
406
|
```bash
|
|
377
407
|
# 1. Extract from CSV
|
|
378
|
-
etlplus extract file examples/data/sample.csv
|
|
408
|
+
etlplus extract file examples/data/sample.csv > temp/sample_extracted.json
|
|
379
409
|
|
|
380
410
|
# 2. Transform (filter and select fields)
|
|
381
|
-
etlplus transform
|
|
411
|
+
etlplus transform \
|
|
382
412
|
--operations '{"filter": {"field": "age", "op": "gt", "value": 25}, "select": ["name", "email"]}' \
|
|
383
|
-
|
|
413
|
+
temp/sample_extracted.json \
|
|
414
|
+
temp/sample_transformed.json
|
|
384
415
|
|
|
385
416
|
# 3. Validate transformed data
|
|
386
|
-
etlplus validate
|
|
387
|
-
--rules '{"name": {"type": "string", "required": true}, "email": {"type": "string", "required": true}}'
|
|
417
|
+
etlplus validate \
|
|
418
|
+
--rules '{"name": {"type": "string", "required": true}, "email": {"type": "string", "required": true}}' \
|
|
419
|
+
temo/sample_transformed.json
|
|
388
420
|
|
|
389
421
|
# 4. Load to CSV
|
|
390
422
|
cat temp/sample_transformed.json \
|
|
391
|
-
| etlplus load
|
|
423
|
+
| etlplus load temp/sample_output.csv
|
|
392
424
|
```
|
|
393
425
|
|
|
394
426
|
### Format Overrides
|
|
@@ -401,14 +433,14 @@ Examples (zsh):
|
|
|
401
433
|
|
|
402
434
|
```zsh
|
|
403
435
|
# Force CSV parsing for an extension-less file
|
|
404
|
-
etlplus extract
|
|
436
|
+
etlplus extract data.txt --source-type file --source-format csv
|
|
405
437
|
|
|
406
438
|
# Write CSV to a file without the .csv suffix
|
|
407
|
-
etlplus load
|
|
439
|
+
etlplus load output.bin --target-type file --target-format csv < data.json
|
|
408
440
|
|
|
409
441
|
# Leave the flags off when extensions already match the desired format
|
|
410
|
-
etlplus extract --
|
|
411
|
-
etlplus load
|
|
442
|
+
etlplus extract data.csv --source-type file
|
|
443
|
+
etlplus load data.json --target-type file < data.json
|
|
412
444
|
```
|
|
413
445
|
|
|
414
446
|
## Transformation Operations
|
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
etlplus/__init__.py,sha256=M2gScnyir6WOMAh_EuoQIiAzdcTls0_5hbd_Q6of8I0,1021
|
|
2
2
|
etlplus/__main__.py,sha256=btoROneNiigyfBU7BSzPKZ1R9gzBMpxcpsbPwmuHwTM,479
|
|
3
3
|
etlplus/__version__.py,sha256=1E0GMK_yUWCMQFKxXjTvyMwofi0qT2k4CDNiHWiymWE,327
|
|
4
|
-
etlplus/enums.py,sha256=
|
|
4
|
+
etlplus/enums.py,sha256=8hzprOLyeCCzlHaXpG4VfgmxPSEdlZeOnHLFzBneKNs,15969
|
|
5
5
|
etlplus/extract.py,sha256=f44JdHhNTACxgn44USx05paKTwq7LQY-V4wANCW9hVM,6173
|
|
6
|
-
etlplus/file.py,sha256=
|
|
7
|
-
etlplus/load.py,sha256=
|
|
6
|
+
etlplus/file.py,sha256=B-zebTrIFDKaaKzA9Fq5-L0JwDNYa2T--_6veR3N03s,17939
|
|
7
|
+
etlplus/load.py,sha256=R_y0_vtsEo1bwxWVQu2bfhB5ZIJoIoWu2ycCdvY4RnE,8737
|
|
8
8
|
etlplus/mixins.py,sha256=ifGpHwWv7U00yqGf-kN93vJax2IiK4jaGtTsPsO3Oak,1350
|
|
9
9
|
etlplus/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
etlplus/run.py,sha256=X4kp5FQlIWVf1_d9oSrchKau7BFDCE1Zkscvu7WPaWw,12340
|
|
11
11
|
etlplus/run_helpers.py,sha256=bj6MkaeFxjl3CeKG1HoXKx5DwAlXNERVW-GX-z1P_qQ,24373
|
|
12
12
|
etlplus/transform.py,sha256=uAUVDDHYCgx7GpVez9IK3OAZM-CnCuMa9iox3vwGGJA,25296
|
|
13
13
|
etlplus/types.py,sha256=1hsDlnF6r76zAwaUYay-i6pCM-Y0IU5nP7Crj8PLCQ4,6157
|
|
14
|
-
etlplus/utils.py,sha256=
|
|
14
|
+
etlplus/utils.py,sha256=BMLTWAvCJj3zLEcffBgURYnu0UGhhXsfH2WWpAt7fV8,13363
|
|
15
15
|
etlplus/validate.py,sha256=7rJoEI_SIILdPpoBqqh2UJqg9oeReDz34mYSlc3t7Qg,12989
|
|
16
16
|
etlplus/api/README.md,sha256=ZiyjxLz0LfFCzeYKXwtH8yY1OJ4hXCju7t2ICroFoU8,7215
|
|
17
17
|
etlplus/api/__init__.py,sha256=P2JUYFy6Ep4t6xnsBiCBfQCkQLHYYhA-yXPXCobS8Y0,4295
|
|
@@ -31,13 +31,13 @@ etlplus/api/rate_limiting/__init__.py,sha256=ZySB1dZettEDnWvI1EHf_TZ9L08M_kKsNR-
|
|
|
31
31
|
etlplus/api/rate_limiting/config.py,sha256=2b4wIynblN-1EyMqI4aXa71SljzSjXYh5N1Nngr3jOg,9406
|
|
32
32
|
etlplus/api/rate_limiting/rate_limiter.py,sha256=Uxozqd_Ej5Lsj-M-mLT2WexChgWh7x35_YP10yqYPQA,7159
|
|
33
33
|
etlplus/cli/__init__.py,sha256=J97-Rv931IL1_b4AXnB7Fbbd7HKnHBpx18NQfC_kE6c,299
|
|
34
|
-
etlplus/cli/commands.py,sha256
|
|
35
|
-
etlplus/cli/constants.py,sha256=
|
|
36
|
-
etlplus/cli/handlers.py,sha256=
|
|
37
|
-
etlplus/cli/io.py,sha256=
|
|
38
|
-
etlplus/cli/main.py,sha256=
|
|
39
|
-
etlplus/cli/options.py,sha256=
|
|
40
|
-
etlplus/cli/state.py,sha256=
|
|
34
|
+
etlplus/cli/commands.py,sha256=BK2qmFsser6AXOgEvpiadrYMIiwviAzqkSxMlBhRXRw,24670
|
|
35
|
+
etlplus/cli/constants.py,sha256=KIZj7J2tNf5mJbkqAdZmu5FXYW2FQmxwgeOKWc3-3Hg,1944
|
|
36
|
+
etlplus/cli/handlers.py,sha256=K0GazvrPgocJ-63HZqF0xhyJk8TB1Gcj-eIbWltXKRU,17759
|
|
37
|
+
etlplus/cli/io.py,sha256=7sldiZz4-Geomge5IO_XYykXPa6UiORfUWzLCdQePG8,7846
|
|
38
|
+
etlplus/cli/main.py,sha256=IgeqxypixfwLHR-QcpgVMQ7vMZ865bXOh2oO9v-BWeM,5234
|
|
39
|
+
etlplus/cli/options.py,sha256=vfXT3YLh7wG1iC-aTdSg6ItMC8l6n0Lozmy53XjqLbA,1199
|
|
40
|
+
etlplus/cli/state.py,sha256=Pfd8ru0wYIN7eGp1_A0tioqs1LiCDZCuJ6AnjZb6yYQ,8027
|
|
41
41
|
etlplus/cli/types.py,sha256=tclhKVJXDqHzlTQBYKARfqMgDOcuBJ-Zej2pvFy96WM,652
|
|
42
42
|
etlplus/config/__init__.py,sha256=VZWzOg7d2YR9NT6UwKTv44yf2FRUMjTHynkm1Dl5Qzo,1486
|
|
43
43
|
etlplus/config/connector.py,sha256=0-TIwevHbKRHVmucvyGpPd-3tB1dKHB-dj0yJ6kq5eY,9809
|
|
@@ -57,9 +57,9 @@ etlplus/templates/ddl.sql.j2,sha256=s8fMWvcb4eaJVXkifuib1aQPljtZ8buuyB_uA-ZdU3Q,
|
|
|
57
57
|
etlplus/templates/view.sql.j2,sha256=Iy8DHfhq5yyvrUKDxqp_aHIEXY4Tm6j4wT7YDEFWAhk,2180
|
|
58
58
|
etlplus/validation/__init__.py,sha256=Pe5Xg1_EA4uiNZGYu5WTF3j7odjmyxnAJ8rcioaplSQ,1254
|
|
59
59
|
etlplus/validation/utils.py,sha256=Mtqg449VIke0ziy_wd2r6yrwJzQkA1iulZC87FzXMjo,10201
|
|
60
|
-
etlplus-0.
|
|
61
|
-
etlplus-0.
|
|
62
|
-
etlplus-0.
|
|
63
|
-
etlplus-0.
|
|
64
|
-
etlplus-0.
|
|
65
|
-
etlplus-0.
|
|
60
|
+
etlplus-0.10.4.dist-info/licenses/LICENSE,sha256=MuNO63i6kWmgnV2pbP2SLqP54mk1BGmu7CmbtxMmT-U,1069
|
|
61
|
+
etlplus-0.10.4.dist-info/METADATA,sha256=M_lQUZ5o-JaD1KuZk_t0LeHbaOj_SdqqaJQSbDCW-zY,21036
|
|
62
|
+
etlplus-0.10.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
63
|
+
etlplus-0.10.4.dist-info/entry_points.txt,sha256=6w-2-jzuPa55spzK34h-UKh2JTEShh38adFRONNP9QE,45
|
|
64
|
+
etlplus-0.10.4.dist-info/top_level.txt,sha256=aWWF-udn_sLGuHTM6W6MLh99ArS9ROkUWO8Mi8y1_2U,8
|
|
65
|
+
etlplus-0.10.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|