etlplus 0.4.1__py3-none-any.whl → 0.4.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/cli/app.py +669 -430
- etlplus/cli/handlers.py +175 -196
- etlplus/cli/main.py +131 -74
- {etlplus-0.4.1.dist-info → etlplus-0.4.6.dist-info}/METADATA +44 -36
- {etlplus-0.4.1.dist-info → etlplus-0.4.6.dist-info}/RECORD +9 -9
- {etlplus-0.4.1.dist-info → etlplus-0.4.6.dist-info}/WHEEL +0 -0
- {etlplus-0.4.1.dist-info → etlplus-0.4.6.dist-info}/entry_points.txt +0 -0
- {etlplus-0.4.1.dist-info → etlplus-0.4.6.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.4.1.dist-info → etlplus-0.4.6.dist-info}/top_level.txt +0 -0
etlplus/cli/app.py
CHANGED
|
@@ -28,12 +28,18 @@ Subcommands
|
|
|
28
28
|
|
|
29
29
|
Notes
|
|
30
30
|
-----
|
|
31
|
-
- Use ``-`` to read from stdin
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
31
|
+
- Use ``-`` to read from stdin or to write to stdout.
|
|
32
|
+
- Commands ``extract`` and ``transform`` support the command-line option
|
|
33
|
+
``--from`` to override inferred resource types.
|
|
34
|
+
- Commands ``transform`` and ``load`` support the command-line option ``--to``
|
|
35
|
+
to override inferred resource types.
|
|
35
36
|
"""
|
|
36
37
|
|
|
38
|
+
# Pylint struggles with large CLI surfaces that legitimately require
|
|
39
|
+
# numerous arguments in a single module.
|
|
40
|
+
# pylint: disable=too-many-lines
|
|
41
|
+
# pylint: disable=too-many-arguments,too-many-positional-arguments
|
|
42
|
+
|
|
37
43
|
from __future__ import annotations
|
|
38
44
|
|
|
39
45
|
import argparse
|
|
@@ -41,6 +47,7 @@ import sys
|
|
|
41
47
|
from collections.abc import Collection
|
|
42
48
|
from dataclasses import dataclass
|
|
43
49
|
from pathlib import Path
|
|
50
|
+
from typing import Annotated
|
|
44
51
|
from typing import Final
|
|
45
52
|
|
|
46
53
|
import typer
|
|
@@ -66,6 +73,19 @@ __all__ = [
|
|
|
66
73
|
]
|
|
67
74
|
|
|
68
75
|
|
|
76
|
+
# SECTION: INTERNAL CONSTANTS =============================================== #
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
_DB_SCHEMES = (
|
|
80
|
+
'postgres://',
|
|
81
|
+
'postgresql://',
|
|
82
|
+
'mysql://',
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
_SOURCE_CHOICES: Final[frozenset[str]] = frozenset(DataConnectorType.choices())
|
|
86
|
+
_FORMAT_CHOICES: Final[frozenset[str]] = frozenset(FileFormat.choices())
|
|
87
|
+
|
|
88
|
+
|
|
69
89
|
# SECTION: CONSTANTS ======================================================== #
|
|
70
90
|
|
|
71
91
|
|
|
@@ -75,67 +95,168 @@ CLI_DESCRIPTION: Final[str] = '\n'.join(
|
|
|
75
95
|
'',
|
|
76
96
|
' Provide a subcommand and options. Examples:',
|
|
77
97
|
'',
|
|
78
|
-
' etlplus extract
|
|
98
|
+
' etlplus extract in.csv > out.json',
|
|
79
99
|
' etlplus validate in.json --rules \'{"required": ["id"]}\'',
|
|
80
|
-
|
|
81
|
-
|
|
100
|
+
(
|
|
101
|
+
' etlplus transform --from file in.json '
|
|
102
|
+
'--operations \'{"select": ["id"]}\' --to file -o out.json'
|
|
103
|
+
),
|
|
104
|
+
' etlplus extract in.csv | etlplus load --to file out.json',
|
|
105
|
+
' cat data.json | etlplus load --to api https://example.com/data',
|
|
82
106
|
'',
|
|
83
|
-
'
|
|
107
|
+
' Override format inference when extensions are misleading:',
|
|
84
108
|
'',
|
|
85
|
-
' etlplus extract
|
|
86
|
-
' etlplus load
|
|
109
|
+
' etlplus extract data.txt --source-format csv',
|
|
110
|
+
' etlplus load payload.bin --target-format json',
|
|
87
111
|
],
|
|
88
112
|
)
|
|
89
113
|
|
|
90
114
|
CLI_EPILOG: Final[str] = '\n'.join(
|
|
91
115
|
[
|
|
92
|
-
'
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
'--format is provided for files.'
|
|
96
|
-
),
|
|
97
|
-
' Values:',
|
|
98
|
-
' - error|fail|strict: treat as error',
|
|
99
|
-
' - warn (default): print a warning',
|
|
100
|
-
' - ignore|silent: no message',
|
|
101
|
-
'',
|
|
102
|
-
'Note:',
|
|
103
|
-
' --strict-format overrides the environment behavior.',
|
|
116
|
+
'Tip:',
|
|
117
|
+
' --source-format and --target-format override format inference '
|
|
118
|
+
'based on filename extensions when needed.',
|
|
104
119
|
],
|
|
105
120
|
)
|
|
106
121
|
|
|
107
122
|
PROJECT_URL: Final[str] = 'https://github.com/Dagitali/ETLPlus'
|
|
108
123
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
124
|
+
|
|
125
|
+
# SECTION: TYPE ALIASES ==================================================== #
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
SourceInputArg = Annotated[
|
|
129
|
+
str,
|
|
130
|
+
typer.Argument(
|
|
131
|
+
...,
|
|
132
|
+
metavar='SOURCE',
|
|
133
|
+
help=(
|
|
134
|
+
'Extract from SOURCE. Use --from/--source-type to override the '
|
|
135
|
+
'inferred connector when needed.'
|
|
136
|
+
),
|
|
116
137
|
),
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
'
|
|
138
|
+
]
|
|
139
|
+
|
|
140
|
+
StreamingSourceArg = Annotated[
|
|
141
|
+
str,
|
|
142
|
+
typer.Argument(
|
|
143
|
+
...,
|
|
144
|
+
metavar='SOURCE',
|
|
145
|
+
help=(
|
|
146
|
+
'Data source to transform or validate (path, JSON payload, or '
|
|
147
|
+
'- for stdin).'
|
|
148
|
+
),
|
|
124
149
|
),
|
|
125
|
-
|
|
150
|
+
]
|
|
126
151
|
|
|
152
|
+
TargetInputArg = Annotated[
|
|
153
|
+
str,
|
|
154
|
+
typer.Argument(
|
|
155
|
+
...,
|
|
156
|
+
metavar='TARGET',
|
|
157
|
+
help=(
|
|
158
|
+
'Load JSON data from stdin into TARGET. Use --to/--target-type '
|
|
159
|
+
'to override connector inference when needed. Source data must '
|
|
160
|
+
'be piped into stdin.'
|
|
161
|
+
),
|
|
162
|
+
),
|
|
163
|
+
]
|
|
127
164
|
|
|
128
|
-
|
|
165
|
+
SourceOverrideOption = Annotated[
|
|
166
|
+
str | None,
|
|
167
|
+
typer.Option(
|
|
168
|
+
'--source-type',
|
|
169
|
+
metavar='CONNECTOR',
|
|
170
|
+
show_default=False,
|
|
171
|
+
rich_help_panel='I/O overrides',
|
|
172
|
+
help='Override the inferred source type (file, database, api).',
|
|
173
|
+
),
|
|
174
|
+
]
|
|
129
175
|
|
|
176
|
+
TargetOverrideOption = Annotated[
|
|
177
|
+
str | None,
|
|
178
|
+
typer.Option(
|
|
179
|
+
'--target-type',
|
|
180
|
+
metavar='CONNECTOR',
|
|
181
|
+
show_default=False,
|
|
182
|
+
rich_help_panel='I/O overrides',
|
|
183
|
+
help='Override the inferred target type (file, database, api).',
|
|
184
|
+
),
|
|
185
|
+
]
|
|
130
186
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
187
|
+
SourceFormatOption = Annotated[
|
|
188
|
+
str | None,
|
|
189
|
+
typer.Option(
|
|
190
|
+
'--source-format',
|
|
191
|
+
metavar='FORMAT',
|
|
192
|
+
show_default=False,
|
|
193
|
+
rich_help_panel='Format overrides',
|
|
194
|
+
help=(
|
|
195
|
+
'Input payload format when SOURCE is - or an inline payload. '
|
|
196
|
+
'File sources infer format from the extension.'
|
|
197
|
+
),
|
|
198
|
+
),
|
|
199
|
+
]
|
|
136
200
|
|
|
137
|
-
|
|
138
|
-
|
|
201
|
+
StdinFormatOption = Annotated[
|
|
202
|
+
str | None,
|
|
203
|
+
typer.Option(
|
|
204
|
+
'--source-format',
|
|
205
|
+
metavar='FORMAT',
|
|
206
|
+
show_default=False,
|
|
207
|
+
rich_help_panel='Format overrides',
|
|
208
|
+
help='Input payload format when reading from stdin (default: json).',
|
|
209
|
+
),
|
|
210
|
+
]
|
|
211
|
+
|
|
212
|
+
TargetFormatOption = Annotated[
|
|
213
|
+
str | None,
|
|
214
|
+
typer.Option(
|
|
215
|
+
'--target-format',
|
|
216
|
+
metavar='FORMAT',
|
|
217
|
+
show_default=False,
|
|
218
|
+
rich_help_panel='Format overrides',
|
|
219
|
+
help=(
|
|
220
|
+
'Payload format when TARGET is - or a non-file connector. File '
|
|
221
|
+
'targets infer format from the extension.'
|
|
222
|
+
),
|
|
223
|
+
),
|
|
224
|
+
]
|
|
225
|
+
|
|
226
|
+
OperationsJSONOption = Annotated[
|
|
227
|
+
str,
|
|
228
|
+
typer.Option(
|
|
229
|
+
'--operations',
|
|
230
|
+
help='Transformation operations as JSON string.',
|
|
231
|
+
),
|
|
232
|
+
]
|
|
233
|
+
|
|
234
|
+
RulesJSONOption = Annotated[
|
|
235
|
+
str,
|
|
236
|
+
typer.Option(
|
|
237
|
+
'--rules',
|
|
238
|
+
help='Validation rules as JSON string.',
|
|
239
|
+
),
|
|
240
|
+
]
|
|
241
|
+
|
|
242
|
+
TargetPathOption = Annotated[
|
|
243
|
+
str | None,
|
|
244
|
+
typer.Option(
|
|
245
|
+
'--target',
|
|
246
|
+
metavar='PATH',
|
|
247
|
+
help='Target file for transformed or validated output (- for stdout).',
|
|
248
|
+
),
|
|
249
|
+
]
|
|
250
|
+
|
|
251
|
+
PipelineConfigOption = Annotated[
|
|
252
|
+
str,
|
|
253
|
+
typer.Option(
|
|
254
|
+
...,
|
|
255
|
+
'--config',
|
|
256
|
+
metavar='PATH',
|
|
257
|
+
help='Path to pipeline YAML configuration file.',
|
|
258
|
+
),
|
|
259
|
+
]
|
|
139
260
|
|
|
140
261
|
|
|
141
262
|
# SECTION: DATA CLASSES ===================================================== #
|
|
@@ -174,6 +295,32 @@ def _ensure_state(
|
|
|
174
295
|
return ctx.obj
|
|
175
296
|
|
|
176
297
|
|
|
298
|
+
def _format_namespace_kwargs(
|
|
299
|
+
*,
|
|
300
|
+
format_value: str | None,
|
|
301
|
+
default: str,
|
|
302
|
+
) -> dict[str, object]:
|
|
303
|
+
"""
|
|
304
|
+
Return common namespace kwargs for format handling.
|
|
305
|
+
|
|
306
|
+
Parameters
|
|
307
|
+
----------
|
|
308
|
+
format_value : str | None
|
|
309
|
+
User-provided format value from the CLI option.
|
|
310
|
+
default : str
|
|
311
|
+
Default format to use when none is provided.
|
|
312
|
+
|
|
313
|
+
Returns
|
|
314
|
+
-------
|
|
315
|
+
dict[str, object]
|
|
316
|
+
Keyword arguments for format-related namespace attributes.
|
|
317
|
+
"""
|
|
318
|
+
return {
|
|
319
|
+
'format': (format_value or default),
|
|
320
|
+
'_format_explicit': (format_value is not None),
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
|
|
177
324
|
def _infer_resource_type(
|
|
178
325
|
value: str,
|
|
179
326
|
) -> str:
|
|
@@ -198,12 +345,13 @@ def _infer_resource_type(
|
|
|
198
345
|
val = (value or '').strip()
|
|
199
346
|
low = val.lower()
|
|
200
347
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
348
|
+
match (val, low):
|
|
349
|
+
case ('-', _):
|
|
350
|
+
return 'file'
|
|
351
|
+
case (_, inferred) if inferred.startswith(('http://', 'https://')):
|
|
352
|
+
return 'api'
|
|
353
|
+
case (_, inferred) if inferred.startswith(_DB_SCHEMES):
|
|
354
|
+
return 'database'
|
|
207
355
|
|
|
208
356
|
path = Path(val)
|
|
209
357
|
if path.exists() or path.suffix:
|
|
@@ -240,6 +388,59 @@ def _infer_resource_type_or_exit(
|
|
|
240
388
|
raise typer.BadParameter(str(exc)) from exc
|
|
241
389
|
|
|
242
390
|
|
|
391
|
+
def _infer_resource_type_soft(
|
|
392
|
+
value: str | None,
|
|
393
|
+
) -> str | None:
|
|
394
|
+
"""
|
|
395
|
+
Make a best-effort inference that tolerates inline payloads.
|
|
396
|
+
|
|
397
|
+
Parameters
|
|
398
|
+
----------
|
|
399
|
+
value : str | None
|
|
400
|
+
CLI value describing a source/target.
|
|
401
|
+
|
|
402
|
+
Returns
|
|
403
|
+
-------
|
|
404
|
+
str | None
|
|
405
|
+
Inferred resource type, or ``None`` if inference failed.
|
|
406
|
+
"""
|
|
407
|
+
if value is None:
|
|
408
|
+
return None
|
|
409
|
+
try:
|
|
410
|
+
return _infer_resource_type(value)
|
|
411
|
+
except ValueError:
|
|
412
|
+
return None
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
def _log_inferred_resource(
|
|
416
|
+
state: CliState,
|
|
417
|
+
*,
|
|
418
|
+
role: str,
|
|
419
|
+
value: str,
|
|
420
|
+
resource_type: str | None,
|
|
421
|
+
) -> None:
|
|
422
|
+
"""
|
|
423
|
+
Emit a uniform verbose message for inferred resource types.
|
|
424
|
+
|
|
425
|
+
Parameters
|
|
426
|
+
----------
|
|
427
|
+
state : CliState
|
|
428
|
+
Current CLI state stored on the Typer context.
|
|
429
|
+
role : str
|
|
430
|
+
Friendly label for the resource (e.g., ``source`` or ``target``).
|
|
431
|
+
value : str
|
|
432
|
+
Resource value provided on the CLI.
|
|
433
|
+
resource_type : str | None
|
|
434
|
+
Inferred resource type or ``None`` if not inferred.
|
|
435
|
+
"""
|
|
436
|
+
if not state.verbose or resource_type is None:
|
|
437
|
+
return
|
|
438
|
+
print(
|
|
439
|
+
f'Inferred {role}_type={resource_type} for {role}={value}',
|
|
440
|
+
file=sys.stderr,
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
|
|
243
444
|
def _ns(
|
|
244
445
|
**kwargs: object,
|
|
245
446
|
) -> argparse.Namespace:
|
|
@@ -286,13 +487,65 @@ def _optional_choice(
|
|
|
286
487
|
return _validate_choice(value, choices, label=label)
|
|
287
488
|
|
|
288
489
|
|
|
490
|
+
def _resolve_resource_type(
|
|
491
|
+
*,
|
|
492
|
+
explicit_type: str | None,
|
|
493
|
+
override_type: str | None,
|
|
494
|
+
value: str,
|
|
495
|
+
label: str,
|
|
496
|
+
conflict_error: str | None = None,
|
|
497
|
+
legacy_file_error: str | None = None,
|
|
498
|
+
) -> str:
|
|
499
|
+
"""
|
|
500
|
+
Resolve resource type preference order and validate it.
|
|
501
|
+
|
|
502
|
+
Parameters
|
|
503
|
+
----------
|
|
504
|
+
explicit_type : str | None
|
|
505
|
+
Explicit resource type provided by the user.
|
|
506
|
+
override_type : str | None
|
|
507
|
+
Resource type provided by an overriding option.
|
|
508
|
+
value : str
|
|
509
|
+
Resource value to infer type from if no explicit or override type is
|
|
510
|
+
given.
|
|
511
|
+
label : str
|
|
512
|
+
Friendly label for error messages.
|
|
513
|
+
conflict_error : str | None
|
|
514
|
+
Error message to raise if there is a conflict between explicit and
|
|
515
|
+
override types.
|
|
516
|
+
legacy_file_error : str | None
|
|
517
|
+
Error message to raise if the explicit type is a legacy 'file' type.
|
|
518
|
+
|
|
519
|
+
Returns
|
|
520
|
+
-------
|
|
521
|
+
str
|
|
522
|
+
Resolved and validated resource type.
|
|
523
|
+
|
|
524
|
+
Raises
|
|
525
|
+
------
|
|
526
|
+
typer.BadParameter
|
|
527
|
+
If there is a conflict between explicit and override types, or if the
|
|
528
|
+
explicit type is a legacy 'file' type.
|
|
529
|
+
"""
|
|
530
|
+
if explicit_type is not None:
|
|
531
|
+
if override_type is not None and conflict_error:
|
|
532
|
+
raise typer.BadParameter(conflict_error)
|
|
533
|
+
if legacy_file_error and explicit_type.strip().lower() == 'file':
|
|
534
|
+
raise typer.BadParameter(legacy_file_error)
|
|
535
|
+
candidate = explicit_type
|
|
536
|
+
else:
|
|
537
|
+
candidate = override_type or _infer_resource_type_or_exit(value)
|
|
538
|
+
return _validate_choice(candidate, _SOURCE_CHOICES, label=label)
|
|
539
|
+
|
|
540
|
+
|
|
289
541
|
def _stateful_namespace(
|
|
290
542
|
state: CliState,
|
|
291
543
|
*,
|
|
292
544
|
command: str,
|
|
293
545
|
**kwargs: object,
|
|
294
546
|
) -> argparse.Namespace:
|
|
295
|
-
"""
|
|
547
|
+
"""
|
|
548
|
+
Attach CLI state toggles to a handler namespace.
|
|
296
549
|
|
|
297
550
|
Parameters
|
|
298
551
|
----------
|
|
@@ -432,34 +685,9 @@ def _root(
|
|
|
432
685
|
@app.command('extract')
|
|
433
686
|
def extract_cmd(
|
|
434
687
|
ctx: typer.Context,
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
'--from',
|
|
439
|
-
help='Override the inferred source type (file, database, api).',
|
|
440
|
-
),
|
|
441
|
-
output: str | None = typer.Option(
|
|
442
|
-
None,
|
|
443
|
-
'-o',
|
|
444
|
-
'--output',
|
|
445
|
-
help='Output file to save extracted data (JSON). Use - for stdout.',
|
|
446
|
-
),
|
|
447
|
-
strict_format: bool = typer.Option(
|
|
448
|
-
False,
|
|
449
|
-
'--strict-format',
|
|
450
|
-
help=(
|
|
451
|
-
'Treat providing --format for file sources as an error '
|
|
452
|
-
'(overrides environment behavior)'
|
|
453
|
-
),
|
|
454
|
-
),
|
|
455
|
-
source_format: str | None = typer.Option(
|
|
456
|
-
None,
|
|
457
|
-
'--format',
|
|
458
|
-
help=(
|
|
459
|
-
'Payload format when not a file (or when SOURCE is -). '
|
|
460
|
-
'For normal file paths, format is inferred from extension.'
|
|
461
|
-
),
|
|
462
|
-
),
|
|
688
|
+
source: SourceInputArg,
|
|
689
|
+
source_format: SourceFormatOption | None = None,
|
|
690
|
+
source_type: SourceOverrideOption | None = None,
|
|
463
691
|
) -> int:
|
|
464
692
|
"""
|
|
465
693
|
Extract data from files, databases, or REST APIs.
|
|
@@ -468,267 +696,146 @@ def extract_cmd(
|
|
|
468
696
|
----------
|
|
469
697
|
ctx : typer.Context
|
|
470
698
|
Typer execution context provided to the command.
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
Override the inferred source type.
|
|
475
|
-
output : str | None
|
|
476
|
-
Output file to save extracted data.
|
|
477
|
-
strict_format : bool
|
|
478
|
-
Whether to enforce strict format behavior.
|
|
479
|
-
source_format : str | None
|
|
699
|
+
source : SourceInputArg
|
|
700
|
+
Data source (file path, URL, DSN, or ``-`` for stdin).
|
|
701
|
+
source_format : SourceFormatOption | None, optional
|
|
480
702
|
Payload format when not a file.
|
|
703
|
+
source_type : SourceOverrideOption | None, optional
|
|
704
|
+
Override the inferred source type.
|
|
481
705
|
|
|
482
706
|
Returns
|
|
483
707
|
-------
|
|
484
708
|
int
|
|
485
709
|
Zero on success.
|
|
486
710
|
|
|
487
|
-
Raises
|
|
488
|
-
------
|
|
489
|
-
typer.BadParameter
|
|
490
|
-
If invalid parameters are provided.
|
|
491
|
-
|
|
492
711
|
Examples
|
|
493
712
|
--------
|
|
494
713
|
- Extract from a file (type inferred):
|
|
495
714
|
etlplus extract in.csv
|
|
496
|
-
|
|
497
|
-
- Extract from a file (explicit):
|
|
498
|
-
etlplus extract file in.csv
|
|
715
|
+
- Extract from a file (explicit via flag):
|
|
499
716
|
etlplus extract --from file in.csv
|
|
500
|
-
|
|
501
717
|
- Extract from an API:
|
|
502
718
|
etlplus extract https://example.com/data.json
|
|
503
719
|
etlplus extract --from api https://example.com/data.json
|
|
504
|
-
|
|
505
720
|
- Extract from a database DSN:
|
|
506
721
|
etlplus extract --from database postgresql://user:pass@host/db
|
|
507
|
-
|
|
508
722
|
- Pipe into transform/load:
|
|
509
723
|
etlplus extract in.csv \
|
|
510
724
|
| etlplus transform --operations '{"select":["a"]}'
|
|
725
|
+
|
|
726
|
+
Notes
|
|
727
|
+
-----
|
|
728
|
+
- The ``extract`` command always writes JSON to stdout.
|
|
729
|
+
- CSV output is unsupported for this command.
|
|
730
|
+
- Use shell redirection (``>``) or pipelines to persist the output.
|
|
511
731
|
"""
|
|
512
732
|
state = _ensure_state(ctx)
|
|
513
733
|
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
734
|
+
source_type = _optional_choice(
|
|
735
|
+
source_type,
|
|
736
|
+
_SOURCE_CHOICES,
|
|
737
|
+
label='source_type',
|
|
738
|
+
)
|
|
518
739
|
source_format = _optional_choice(
|
|
519
740
|
source_format,
|
|
520
741
|
_FORMAT_CHOICES,
|
|
521
|
-
label='
|
|
742
|
+
label='source_format',
|
|
522
743
|
)
|
|
523
744
|
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
)
|
|
529
|
-
source_type = _validate_choice(
|
|
530
|
-
args[0],
|
|
531
|
-
_SOURCE_CHOICES,
|
|
532
|
-
label='source_type',
|
|
533
|
-
)
|
|
534
|
-
source = args[1]
|
|
535
|
-
else:
|
|
536
|
-
source = args[0]
|
|
537
|
-
if from_ is not None:
|
|
538
|
-
source_type = from_
|
|
539
|
-
else:
|
|
540
|
-
source_type = _infer_resource_type_or_exit(source)
|
|
541
|
-
|
|
542
|
-
source_type = _validate_choice(
|
|
543
|
-
source_type,
|
|
544
|
-
_SOURCE_CHOICES,
|
|
545
|
-
label='source_type',
|
|
546
|
-
)
|
|
547
|
-
|
|
548
|
-
if state.verbose:
|
|
549
|
-
print(
|
|
550
|
-
f'Inferred source_type={source_type} for source={source}',
|
|
551
|
-
file=sys.stderr,
|
|
552
|
-
)
|
|
745
|
+
resolved_source = source
|
|
746
|
+
resolved_source_type = source_type or _infer_resource_type_or_exit(
|
|
747
|
+
resolved_source,
|
|
748
|
+
)
|
|
553
749
|
|
|
554
|
-
|
|
750
|
+
_log_inferred_resource(
|
|
555
751
|
state,
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
output=output,
|
|
560
|
-
strict_format=strict_format,
|
|
561
|
-
format=(source_format or 'json'),
|
|
562
|
-
_format_explicit=(source_format is not None),
|
|
752
|
+
role='source',
|
|
753
|
+
value=resolved_source,
|
|
754
|
+
resource_type=resolved_source_type,
|
|
563
755
|
)
|
|
564
|
-
return int(cmd_extract(ns))
|
|
565
756
|
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
ctx: typer.Context,
|
|
570
|
-
source: str = typer.Argument(
|
|
571
|
-
'-',
|
|
572
|
-
metavar='SOURCE',
|
|
573
|
-
help=(
|
|
574
|
-
'Data source to validate (file path, JSON string, or - for stdin).'
|
|
575
|
-
),
|
|
576
|
-
),
|
|
577
|
-
rules: str = typer.Option(
|
|
578
|
-
'{}',
|
|
579
|
-
'--rules',
|
|
580
|
-
help='Validation rules as JSON string',
|
|
581
|
-
),
|
|
582
|
-
output: str | None = typer.Option(
|
|
583
|
-
None,
|
|
584
|
-
'-o',
|
|
585
|
-
'--output',
|
|
586
|
-
help='Output file to save validated data (JSON). Use - for stdout.',
|
|
587
|
-
),
|
|
588
|
-
input_format: str | None = typer.Option(
|
|
589
|
-
None,
|
|
590
|
-
'--input-format',
|
|
591
|
-
help='Input payload format for stdin (json or csv).',
|
|
592
|
-
),
|
|
593
|
-
) -> int:
|
|
594
|
-
"""
|
|
595
|
-
Validate data against JSON-described rules.
|
|
596
|
-
|
|
597
|
-
Parameters
|
|
598
|
-
----------
|
|
599
|
-
ctx : typer.Context
|
|
600
|
-
Typer execution context provided to the command.
|
|
601
|
-
source : str
|
|
602
|
-
Data source (file path or ``-`` for stdin).
|
|
603
|
-
rules : str
|
|
604
|
-
Validation rules as a JSON string.
|
|
605
|
-
output : str | None
|
|
606
|
-
Optional output path. Use ``-`` for stdout.
|
|
607
|
-
input_format : str | None
|
|
608
|
-
Optional stdin format hint (json or csv).
|
|
609
|
-
|
|
610
|
-
Returns
|
|
611
|
-
-------
|
|
612
|
-
int
|
|
613
|
-
Zero on success.
|
|
614
|
-
"""
|
|
615
|
-
input_format = _optional_choice(
|
|
616
|
-
input_format,
|
|
617
|
-
_FORMAT_CHOICES,
|
|
618
|
-
label='input_format',
|
|
757
|
+
format_kwargs = _format_namespace_kwargs(
|
|
758
|
+
format_value=source_format,
|
|
759
|
+
default='json',
|
|
619
760
|
)
|
|
620
|
-
|
|
621
|
-
state = _ensure_state(ctx)
|
|
622
|
-
|
|
623
761
|
ns = _stateful_namespace(
|
|
624
762
|
state,
|
|
625
|
-
command='
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
input_format=input_format,
|
|
763
|
+
command='extract',
|
|
764
|
+
source_type=resolved_source_type,
|
|
765
|
+
source=resolved_source,
|
|
766
|
+
**format_kwargs,
|
|
630
767
|
)
|
|
631
|
-
return int(
|
|
768
|
+
return int(cmd_extract(ns))
|
|
632
769
|
|
|
633
770
|
|
|
634
|
-
@app.command('
|
|
635
|
-
def
|
|
771
|
+
@app.command('list')
|
|
772
|
+
def list_cmd(
|
|
636
773
|
ctx: typer.Context,
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
'(file path, JSON string, or - for stdin).'
|
|
643
|
-
),
|
|
644
|
-
),
|
|
645
|
-
operations: str = typer.Option(
|
|
646
|
-
'{}',
|
|
647
|
-
'--operations',
|
|
648
|
-
help='Transformation operations as JSON string',
|
|
774
|
+
config: PipelineConfigOption,
|
|
775
|
+
jobs: bool = typer.Option(
|
|
776
|
+
False,
|
|
777
|
+
'--jobs',
|
|
778
|
+
help='List available job names and exit',
|
|
649
779
|
),
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
'
|
|
653
|
-
'
|
|
654
|
-
help='Output file to save transformed data (JSON). Use - for stdout.',
|
|
780
|
+
pipelines: bool = typer.Option(
|
|
781
|
+
False,
|
|
782
|
+
'--pipelines',
|
|
783
|
+
help='List ETL pipelines',
|
|
655
784
|
),
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
785
|
+
sources: bool = typer.Option(False, '--sources', help='List data sources'),
|
|
786
|
+
targets: bool = typer.Option(False, '--targets', help='List data targets'),
|
|
787
|
+
transforms: bool = typer.Option(
|
|
788
|
+
False,
|
|
789
|
+
'--transforms',
|
|
790
|
+
help='List data transforms',
|
|
660
791
|
),
|
|
661
792
|
) -> int:
|
|
662
793
|
"""
|
|
663
|
-
|
|
794
|
+
Print ETL entities from a pipeline YAML configuration.
|
|
664
795
|
|
|
665
796
|
Parameters
|
|
666
797
|
----------
|
|
667
798
|
ctx : typer.Context
|
|
668
799
|
Typer execution context provided to the command.
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
800
|
+
config : PipelineConfigOption
|
|
801
|
+
Path to pipeline YAML configuration file.
|
|
802
|
+
jobs : bool, optional
|
|
803
|
+
If True, list available job names and exit.
|
|
804
|
+
pipelines : bool, optional
|
|
805
|
+
If True, list ETL pipelines.
|
|
806
|
+
sources : bool, optional
|
|
807
|
+
If True, list data sources.
|
|
808
|
+
targets : bool, optional
|
|
809
|
+
If True, list data targets.
|
|
810
|
+
transforms : bool, optional
|
|
811
|
+
If True, list data transforms.
|
|
677
812
|
|
|
678
813
|
Returns
|
|
679
814
|
-------
|
|
680
815
|
int
|
|
681
816
|
Zero on success.
|
|
682
817
|
"""
|
|
683
|
-
input_format = _optional_choice(
|
|
684
|
-
input_format,
|
|
685
|
-
_FORMAT_CHOICES,
|
|
686
|
-
label='input_format',
|
|
687
|
-
)
|
|
688
|
-
|
|
689
818
|
state = _ensure_state(ctx)
|
|
690
|
-
|
|
691
819
|
ns = _stateful_namespace(
|
|
692
820
|
state,
|
|
693
|
-
command='
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
821
|
+
command='list',
|
|
822
|
+
config=config,
|
|
823
|
+
pipelines=pipelines,
|
|
824
|
+
jobs=jobs,
|
|
825
|
+
sources=sources,
|
|
826
|
+
targets=targets,
|
|
827
|
+
transforms=transforms,
|
|
698
828
|
)
|
|
699
|
-
return int(
|
|
829
|
+
return int(cmd_list(ns))
|
|
700
830
|
|
|
701
831
|
|
|
702
832
|
@app.command('load')
|
|
703
833
|
def load_cmd(
|
|
704
834
|
ctx: typer.Context,
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
help='Override the inferred target type (file, database, api).',
|
|
710
|
-
),
|
|
711
|
-
strict_format: bool = typer.Option(
|
|
712
|
-
False,
|
|
713
|
-
'--strict-format',
|
|
714
|
-
help=(
|
|
715
|
-
'Treat providing --format for file targets as an error '
|
|
716
|
-
'(overrides environment behavior)'
|
|
717
|
-
),
|
|
718
|
-
),
|
|
719
|
-
target_format: str | None = typer.Option(
|
|
720
|
-
None,
|
|
721
|
-
'--format',
|
|
722
|
-
help=(
|
|
723
|
-
'Payload format when not a file (or when TARGET is -). '
|
|
724
|
-
'For normal file targets, format is inferred from extension.'
|
|
725
|
-
),
|
|
726
|
-
),
|
|
727
|
-
input_format: str | None = typer.Option(
|
|
728
|
-
None,
|
|
729
|
-
'--input-format',
|
|
730
|
-
help='Input payload format for stdin (json or csv).',
|
|
731
|
-
),
|
|
835
|
+
target: TargetInputArg,
|
|
836
|
+
source_format: StdinFormatOption | None = None,
|
|
837
|
+
target_format: TargetFormatOption | None = None,
|
|
838
|
+
target_type: TargetOverrideOption | None = None,
|
|
732
839
|
) -> int:
|
|
733
840
|
"""
|
|
734
841
|
Load data into a file, database, or REST API.
|
|
@@ -737,107 +844,89 @@ def load_cmd(
|
|
|
737
844
|
----------
|
|
738
845
|
ctx : typer.Context
|
|
739
846
|
Typer execution context provided to the command.
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
847
|
+
target : TargetInputArg
|
|
848
|
+
Load destination (file path, URL/DSN, or ``-`` for stdout).
|
|
849
|
+
source_format : StdinFormatOption | None, optional
|
|
850
|
+
Hint for parsing stdin payloads (json or csv).
|
|
851
|
+
target_format : TargetFormatOption | None, optional
|
|
852
|
+
Payload format when not a file target (or when TARGET is ``-``).
|
|
853
|
+
target_type : TargetOverrideOption | None, optional
|
|
744
854
|
Override the inferred target type.
|
|
745
|
-
strict_format : bool
|
|
746
|
-
Whether to enforce strict format behavior.
|
|
747
|
-
target_format : str | None
|
|
748
|
-
Payload format when not a file.
|
|
749
|
-
input_format : str | None
|
|
750
|
-
Input payload format for stdin.
|
|
751
855
|
|
|
752
856
|
Returns
|
|
753
857
|
-------
|
|
754
858
|
int
|
|
755
859
|
Zero on success.
|
|
756
860
|
|
|
757
|
-
Raises
|
|
758
|
-
------
|
|
759
|
-
typer.BadParameter
|
|
760
|
-
If the arguments are invalid
|
|
761
|
-
|
|
762
861
|
Examples
|
|
763
862
|
--------
|
|
764
863
|
- Pipe into a file:
|
|
765
864
|
etlplus extract in.csv \
|
|
766
865
|
| etlplus transform --operations '{"select":["a"]}' \
|
|
767
866
|
| etlplus load --to file out.json
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
etlplus load in.json file out.json
|
|
771
|
-
|
|
867
|
+
- Read from stdin and write to a file:
|
|
868
|
+
etlplus load out.json
|
|
772
869
|
- Write to stdout:
|
|
773
|
-
etlplus load
|
|
870
|
+
etlplus load --to file -
|
|
871
|
+
|
|
872
|
+
Notes
|
|
873
|
+
-----
|
|
874
|
+
- The ``load`` command reads JSON from stdin.
|
|
875
|
+
- CSV input is unsupported unless ``--source-format csv`` is provided.
|
|
876
|
+
- Convert upstream before piping into ``load`` when working with other
|
|
877
|
+
formats.
|
|
774
878
|
"""
|
|
775
879
|
state = _ensure_state(ctx)
|
|
776
880
|
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
'Provide TARGET, SOURCE TARGET, or SOURCE TARGET_TYPE TARGET.',
|
|
780
|
-
)
|
|
781
|
-
|
|
782
|
-
to = _optional_choice(to, _SOURCE_CHOICES, label='to')
|
|
783
|
-
target_format = _optional_choice(
|
|
784
|
-
target_format,
|
|
881
|
+
source_format = _optional_choice(
|
|
882
|
+
source_format,
|
|
785
883
|
_FORMAT_CHOICES,
|
|
786
|
-
label='
|
|
884
|
+
label='source_format',
|
|
787
885
|
)
|
|
788
|
-
|
|
789
|
-
input_format,
|
|
790
|
-
_FORMAT_CHOICES,
|
|
791
|
-
label='input_format',
|
|
792
|
-
)
|
|
793
|
-
|
|
794
|
-
# Parse positional args.
|
|
795
|
-
match args:
|
|
796
|
-
case [source, target_type_raw, target] if to is None:
|
|
797
|
-
target_type = _validate_choice(
|
|
798
|
-
target_type_raw,
|
|
799
|
-
_SOURCE_CHOICES,
|
|
800
|
-
label='target_type',
|
|
801
|
-
)
|
|
802
|
-
case [_, _, _]:
|
|
803
|
-
raise typer.BadParameter(
|
|
804
|
-
'Do not combine --to with the legacy SOURCE TARGET_TYPE '
|
|
805
|
-
'TARGET form.',
|
|
806
|
-
)
|
|
807
|
-
case [source, target]:
|
|
808
|
-
target_type = to or _infer_resource_type_or_exit(target)
|
|
809
|
-
case [solo_target]:
|
|
810
|
-
source = '-'
|
|
811
|
-
target = solo_target
|
|
812
|
-
target_type = to or _infer_resource_type_or_exit(target)
|
|
813
|
-
case []:
|
|
814
|
-
raise typer.BadParameter(
|
|
815
|
-
'Provide TARGET, SOURCE TARGET, or legacy SOURCE '
|
|
816
|
-
'TARGET_TYPE TARGET.',
|
|
817
|
-
)
|
|
818
|
-
|
|
819
|
-
target_type = _validate_choice(
|
|
886
|
+
target_type = _optional_choice(
|
|
820
887
|
target_type,
|
|
821
888
|
_SOURCE_CHOICES,
|
|
822
889
|
label='target_type',
|
|
823
890
|
)
|
|
891
|
+
target_format = _optional_choice(
|
|
892
|
+
target_format,
|
|
893
|
+
_FORMAT_CHOICES,
|
|
894
|
+
label='target_format',
|
|
895
|
+
)
|
|
824
896
|
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
897
|
+
resolved_target = target
|
|
898
|
+
resolved_target_type = target_type or _infer_resource_type_or_exit(
|
|
899
|
+
resolved_target,
|
|
900
|
+
)
|
|
901
|
+
|
|
902
|
+
resolved_source_value = '-'
|
|
903
|
+
resolved_source_type = _infer_resource_type_soft(resolved_source_value)
|
|
904
|
+
|
|
905
|
+
_log_inferred_resource(
|
|
906
|
+
state,
|
|
907
|
+
role='source',
|
|
908
|
+
value=resolved_source_value,
|
|
909
|
+
resource_type=resolved_source_type,
|
|
910
|
+
)
|
|
911
|
+
_log_inferred_resource(
|
|
912
|
+
state,
|
|
913
|
+
role='target',
|
|
914
|
+
value=resolved_target,
|
|
915
|
+
resource_type=resolved_target_type,
|
|
916
|
+
)
|
|
830
917
|
|
|
918
|
+
format_kwargs = _format_namespace_kwargs(
|
|
919
|
+
format_value=target_format,
|
|
920
|
+
default='json',
|
|
921
|
+
)
|
|
831
922
|
ns = _stateful_namespace(
|
|
832
923
|
state,
|
|
833
924
|
command='load',
|
|
834
|
-
source=
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
_format_explicit=(target_format is not None),
|
|
840
|
-
input_format=input_format,
|
|
925
|
+
source=resolved_source_value,
|
|
926
|
+
source_format=source_format,
|
|
927
|
+
target_type=resolved_target_type,
|
|
928
|
+
target=resolved_target,
|
|
929
|
+
**format_kwargs,
|
|
841
930
|
)
|
|
842
931
|
return int(cmd_load(ns))
|
|
843
932
|
|
|
@@ -845,21 +934,22 @@ def load_cmd(
|
|
|
845
934
|
@app.command('pipeline')
|
|
846
935
|
def pipeline_cmd(
|
|
847
936
|
ctx: typer.Context,
|
|
848
|
-
config:
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
937
|
+
config: PipelineConfigOption,
|
|
938
|
+
job: str | None = typer.Option(
|
|
939
|
+
None,
|
|
940
|
+
'--job',
|
|
941
|
+
metavar='JOB',
|
|
942
|
+
help='Run a specific job by name',
|
|
852
943
|
),
|
|
853
|
-
|
|
944
|
+
jobs: bool = typer.Option(
|
|
854
945
|
False,
|
|
855
|
-
'--
|
|
946
|
+
'--jobs',
|
|
856
947
|
help='List available job names and exit',
|
|
857
948
|
),
|
|
858
|
-
|
|
949
|
+
pipeline: str | None = typer.Option(
|
|
859
950
|
None,
|
|
860
|
-
'--
|
|
861
|
-
|
|
862
|
-
help='Run a specific job by name',
|
|
951
|
+
'--pipeline',
|
|
952
|
+
help='Run a specific pipeline by name',
|
|
863
953
|
),
|
|
864
954
|
) -> int:
|
|
865
955
|
"""
|
|
@@ -869,12 +959,14 @@ def pipeline_cmd(
|
|
|
869
959
|
----------
|
|
870
960
|
ctx : typer.Context
|
|
871
961
|
Typer execution context provided to the command.
|
|
872
|
-
config :
|
|
962
|
+
config : PipelineConfigOption
|
|
873
963
|
Path to pipeline YAML configuration file.
|
|
874
|
-
|
|
875
|
-
If True, list available job names and exit.
|
|
876
|
-
run_job : str | None
|
|
964
|
+
job : str | None, optional
|
|
877
965
|
Name of a specific job to run.
|
|
966
|
+
jobs : bool, optional
|
|
967
|
+
If True, list available job names and exit.
|
|
968
|
+
pipeline : str | None, optional
|
|
969
|
+
Name of a specific pipeline to run.
|
|
878
970
|
|
|
879
971
|
Returns
|
|
880
972
|
-------
|
|
@@ -882,54 +974,47 @@ def pipeline_cmd(
|
|
|
882
974
|
Zero on success.
|
|
883
975
|
"""
|
|
884
976
|
state = _ensure_state(ctx)
|
|
977
|
+
run_target = job or pipeline
|
|
885
978
|
ns = _stateful_namespace(
|
|
886
979
|
state,
|
|
887
980
|
command='pipeline',
|
|
888
981
|
config=config,
|
|
889
|
-
list=
|
|
890
|
-
run=
|
|
982
|
+
list=jobs,
|
|
983
|
+
run=run_target,
|
|
891
984
|
)
|
|
892
985
|
return int(cmd_pipeline(ns))
|
|
893
986
|
|
|
894
987
|
|
|
895
|
-
@app.command('
|
|
896
|
-
def
|
|
988
|
+
@app.command('run')
|
|
989
|
+
def run_cmd(
|
|
897
990
|
ctx: typer.Context,
|
|
898
|
-
config:
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
False,
|
|
905
|
-
'--pipelines',
|
|
906
|
-
help='List ETL pipelines',
|
|
991
|
+
config: PipelineConfigOption,
|
|
992
|
+
job: str | None = typer.Option(
|
|
993
|
+
None,
|
|
994
|
+
'-j',
|
|
995
|
+
'--job',
|
|
996
|
+
help='Name of the job to run',
|
|
907
997
|
),
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
'
|
|
913
|
-
help='List data transforms',
|
|
998
|
+
pipeline: str | None = typer.Option(
|
|
999
|
+
None,
|
|
1000
|
+
'-p',
|
|
1001
|
+
'--pipeline',
|
|
1002
|
+
help='Name of the pipeline to run',
|
|
914
1003
|
),
|
|
915
1004
|
) -> int:
|
|
916
1005
|
"""
|
|
917
|
-
|
|
1006
|
+
Execute an ETL job or pipeline from a YAML configuration.
|
|
918
1007
|
|
|
919
1008
|
Parameters
|
|
920
1009
|
----------
|
|
921
1010
|
ctx : typer.Context
|
|
922
1011
|
Typer execution context provided to the command.
|
|
923
|
-
config :
|
|
1012
|
+
config : PipelineConfigOption
|
|
924
1013
|
Path to pipeline YAML configuration file.
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
targets : bool
|
|
930
|
-
If True, list data targets.
|
|
931
|
-
transforms : bool
|
|
932
|
-
If True, list data transforms.
|
|
1014
|
+
job : str | None, optional
|
|
1015
|
+
Name of the job to run.
|
|
1016
|
+
pipeline : str | None, optional
|
|
1017
|
+
Name of the pipeline to run.
|
|
933
1018
|
|
|
934
1019
|
Returns
|
|
935
1020
|
-------
|
|
@@ -939,62 +1024,216 @@ def list_cmd(
|
|
|
939
1024
|
state = _ensure_state(ctx)
|
|
940
1025
|
ns = _stateful_namespace(
|
|
941
1026
|
state,
|
|
942
|
-
command='
|
|
1027
|
+
command='run',
|
|
943
1028
|
config=config,
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
targets=targets,
|
|
947
|
-
transforms=transforms,
|
|
1029
|
+
job=job,
|
|
1030
|
+
pipeline=pipeline,
|
|
948
1031
|
)
|
|
949
|
-
return int(
|
|
1032
|
+
return int(cmd_run(ns))
|
|
950
1033
|
|
|
951
1034
|
|
|
952
|
-
@app.command('
|
|
953
|
-
def
|
|
1035
|
+
@app.command('transform')
|
|
1036
|
+
def transform_cmd(
|
|
954
1037
|
ctx: typer.Context,
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
'-j',
|
|
963
|
-
'--job',
|
|
964
|
-
help='Name of the job to run',
|
|
965
|
-
),
|
|
966
|
-
pipeline: str | None = typer.Option(
|
|
967
|
-
None,
|
|
968
|
-
'-p',
|
|
969
|
-
'--pipeline',
|
|
970
|
-
help='Name of the pipeline to run',
|
|
971
|
-
),
|
|
1038
|
+
operations: OperationsJSONOption = '{}',
|
|
1039
|
+
source: StreamingSourceArg = '-',
|
|
1040
|
+
source_format: SourceFormatOption | None = None,
|
|
1041
|
+
source_type: SourceOverrideOption | None = None,
|
|
1042
|
+
target: TargetPathOption | None = None,
|
|
1043
|
+
target_format: TargetFormatOption | None = None,
|
|
1044
|
+
target_type: TargetOverrideOption | None = None,
|
|
972
1045
|
) -> int:
|
|
973
1046
|
"""
|
|
974
|
-
|
|
1047
|
+
Transform records using JSON-described operations.
|
|
975
1048
|
|
|
976
1049
|
Parameters
|
|
977
1050
|
----------
|
|
978
1051
|
ctx : typer.Context
|
|
979
1052
|
Typer execution context provided to the command.
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
1053
|
+
operations : OperationsJSONOption, optional
|
|
1054
|
+
Transformation operations as a JSON string.
|
|
1055
|
+
source : StreamingSourceArg, optional
|
|
1056
|
+
Data source (file path or ``-`` for stdin).
|
|
1057
|
+
source_format : SourceFormatOption | None, optional
|
|
1058
|
+
Input payload format when not a file (or when SOURCE is -).
|
|
1059
|
+
source_type : SourceOverrideOption | None, optional
|
|
1060
|
+
Override the inferred source type.
|
|
1061
|
+
target : TargetPathOption | None, optional
|
|
1062
|
+
Optional output path. Use ``-`` for stdout.
|
|
1063
|
+
target_format : TargetFormatOption | None, optional
|
|
1064
|
+
Output payload format when not a file target (or when OUTPUT is -).
|
|
1065
|
+
Accepts ``--target-format``.
|
|
1066
|
+
target_type : TargetOverrideOption | None, optional
|
|
1067
|
+
Override the inferred target type.
|
|
986
1068
|
|
|
987
1069
|
Returns
|
|
988
1070
|
-------
|
|
989
1071
|
int
|
|
990
1072
|
Zero on success.
|
|
1073
|
+
|
|
1074
|
+
Examples
|
|
1075
|
+
--------
|
|
1076
|
+
- Transform data from a file and write to another file:
|
|
1077
|
+
etlplus transform --from file in.json \
|
|
1078
|
+
--operations '{"select": ["id", "name"]}' \
|
|
1079
|
+
--to file out.json
|
|
1080
|
+
- Transform data from stdin and write to stdout:
|
|
1081
|
+
cat in.json \
|
|
1082
|
+
| etlplus transform \
|
|
1083
|
+
--operations '{"filter": {"field": "age", "gt": 30}}'
|
|
1084
|
+
- Transform data from a file and write to stdout:
|
|
1085
|
+
etlplus transform --from file in.csv \
|
|
1086
|
+
--source-format csv \
|
|
1087
|
+
--operations '{"select": ["id", "email"]}'
|
|
1088
|
+
- Transform data from stdin and write to a file:
|
|
1089
|
+
cat in.json \
|
|
1090
|
+
| etlplus transform --operations '{"sort": ["-created_at"]}' \
|
|
1091
|
+
--to file out.json
|
|
1092
|
+
|
|
1093
|
+
Notes
|
|
1094
|
+
-----
|
|
1095
|
+
- The ``transform`` command reads JSON from stdin when SOURCE is ``-``.
|
|
1096
|
+
- CSV input is unsupported for this command.
|
|
1097
|
+
- Convert upstream before piping into ``transform``.
|
|
991
1098
|
"""
|
|
992
1099
|
state = _ensure_state(ctx)
|
|
1100
|
+
|
|
1101
|
+
source_format = _optional_choice(
|
|
1102
|
+
source_format,
|
|
1103
|
+
_FORMAT_CHOICES,
|
|
1104
|
+
label='source_format',
|
|
1105
|
+
)
|
|
1106
|
+
source_type = _optional_choice(
|
|
1107
|
+
source_type,
|
|
1108
|
+
_SOURCE_CHOICES,
|
|
1109
|
+
label='source_type',
|
|
1110
|
+
)
|
|
1111
|
+
target_format = _optional_choice(
|
|
1112
|
+
target_format,
|
|
1113
|
+
_FORMAT_CHOICES,
|
|
1114
|
+
label='target_format',
|
|
1115
|
+
)
|
|
1116
|
+
target_format_kwargs = _format_namespace_kwargs(
|
|
1117
|
+
format_value=target_format,
|
|
1118
|
+
default='json',
|
|
1119
|
+
)
|
|
1120
|
+
target_type = _optional_choice(
|
|
1121
|
+
target_type,
|
|
1122
|
+
_SOURCE_CHOICES,
|
|
1123
|
+
label='target_type',
|
|
1124
|
+
)
|
|
1125
|
+
|
|
1126
|
+
resolved_source_type = source_type or _infer_resource_type_soft(source)
|
|
1127
|
+
resolved_source_value = source if source is not None else '-'
|
|
1128
|
+
resolved_target_value = target if target is not None else '-'
|
|
1129
|
+
|
|
1130
|
+
if resolved_source_type is not None:
|
|
1131
|
+
resolved_source_type = _validate_choice(
|
|
1132
|
+
resolved_source_type,
|
|
1133
|
+
_SOURCE_CHOICES,
|
|
1134
|
+
label='source_type',
|
|
1135
|
+
)
|
|
1136
|
+
|
|
1137
|
+
resolved_target_type = _resolve_resource_type(
|
|
1138
|
+
explicit_type=None,
|
|
1139
|
+
override_type=target_type,
|
|
1140
|
+
value=resolved_target_value,
|
|
1141
|
+
label='target_type',
|
|
1142
|
+
)
|
|
1143
|
+
|
|
1144
|
+
_log_inferred_resource(
|
|
1145
|
+
state,
|
|
1146
|
+
role='source',
|
|
1147
|
+
value=resolved_source_value,
|
|
1148
|
+
resource_type=resolved_source_type,
|
|
1149
|
+
)
|
|
1150
|
+
_log_inferred_resource(
|
|
1151
|
+
state,
|
|
1152
|
+
role='target',
|
|
1153
|
+
value=resolved_target_value,
|
|
1154
|
+
resource_type=resolved_target_type,
|
|
1155
|
+
)
|
|
1156
|
+
|
|
993
1157
|
ns = _stateful_namespace(
|
|
994
1158
|
state,
|
|
995
|
-
command='
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
1159
|
+
command='transform',
|
|
1160
|
+
source=resolved_source_value,
|
|
1161
|
+
source_type=resolved_source_type,
|
|
1162
|
+
operations=json_type(operations),
|
|
1163
|
+
target=resolved_target_value,
|
|
1164
|
+
source_format=source_format,
|
|
1165
|
+
target_type=resolved_target_type,
|
|
1166
|
+
target_format=target_format_kwargs['format'],
|
|
1167
|
+
**target_format_kwargs,
|
|
999
1168
|
)
|
|
1000
|
-
return int(
|
|
1169
|
+
return int(cmd_transform(ns))
|
|
1170
|
+
|
|
1171
|
+
|
|
1172
|
+
@app.command('validate')
|
|
1173
|
+
def validate_cmd(
|
|
1174
|
+
ctx: typer.Context,
|
|
1175
|
+
rules: RulesJSONOption = '{}',
|
|
1176
|
+
source: StreamingSourceArg = '-',
|
|
1177
|
+
source_format: SourceFormatOption | None = None,
|
|
1178
|
+
source_type: SourceOverrideOption | None = None,
|
|
1179
|
+
target: TargetPathOption | None = None,
|
|
1180
|
+
) -> int:
|
|
1181
|
+
"""
|
|
1182
|
+
Validate data against JSON-described rules.
|
|
1183
|
+
|
|
1184
|
+
Parameters
|
|
1185
|
+
----------
|
|
1186
|
+
ctx : typer.Context
|
|
1187
|
+
Typer execution context provided to the command.
|
|
1188
|
+
rules : RulesJSONOption, optional
|
|
1189
|
+
Validation rules as a JSON string.
|
|
1190
|
+
source : StreamingSourceArg, optional
|
|
1191
|
+
Data source (file path or ``-`` for stdin).
|
|
1192
|
+
source_format : SourceFormatOption | None, optional
|
|
1193
|
+
Optional stdin format hint (JSON or CSV) when SOURCE is ``-``.
|
|
1194
|
+
source_type : SourceOverrideOption | None, optional
|
|
1195
|
+
Override the inferred source type when heuristics fail.
|
|
1196
|
+
target : TargetPathOption | None, optional
|
|
1197
|
+
Optional output path. Use ``-`` for stdout.
|
|
1198
|
+
|
|
1199
|
+
Returns
|
|
1200
|
+
-------
|
|
1201
|
+
int
|
|
1202
|
+
Zero on success.
|
|
1203
|
+
"""
|
|
1204
|
+
source_format = _optional_choice(
|
|
1205
|
+
source_format,
|
|
1206
|
+
_FORMAT_CHOICES,
|
|
1207
|
+
label='source_format',
|
|
1208
|
+
)
|
|
1209
|
+
source_type = _optional_choice(
|
|
1210
|
+
source_type,
|
|
1211
|
+
_SOURCE_CHOICES,
|
|
1212
|
+
label='source_type',
|
|
1213
|
+
)
|
|
1214
|
+
source_format_kwargs = _format_namespace_kwargs(
|
|
1215
|
+
format_value=source_format,
|
|
1216
|
+
default='json',
|
|
1217
|
+
)
|
|
1218
|
+
|
|
1219
|
+
state = _ensure_state(ctx)
|
|
1220
|
+
resolved_source_type = source_type or _infer_resource_type_soft(source)
|
|
1221
|
+
|
|
1222
|
+
_log_inferred_resource(
|
|
1223
|
+
state,
|
|
1224
|
+
role='source',
|
|
1225
|
+
value=source,
|
|
1226
|
+
resource_type=resolved_source_type,
|
|
1227
|
+
)
|
|
1228
|
+
|
|
1229
|
+
ns = _stateful_namespace(
|
|
1230
|
+
state,
|
|
1231
|
+
command='validate',
|
|
1232
|
+
source=source,
|
|
1233
|
+
source_type=resolved_source_type,
|
|
1234
|
+
rules=json_type(rules), # convert CLI string to dict
|
|
1235
|
+
target=target,
|
|
1236
|
+
source_format=source_format,
|
|
1237
|
+
**source_format_kwargs,
|
|
1238
|
+
)
|
|
1239
|
+
return int(cmd_validate(ns))
|