etlplus 0.3.23__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/__main__.py +1 -2
- etlplus/cli/__init__.py +15 -0
- etlplus/cli/app.py +1000 -0
- etlplus/cli/handlers.py +686 -0
- etlplus/cli/main.py +404 -0
- {etlplus-0.3.23.dist-info → etlplus-0.4.1.dist-info}/METADATA +2 -1
- {etlplus-0.3.23.dist-info → etlplus-0.4.1.dist-info}/RECORD +11 -8
- etlplus/cli.py +0 -868
- {etlplus-0.3.23.dist-info → etlplus-0.4.1.dist-info}/WHEEL +0 -0
- {etlplus-0.3.23.dist-info → etlplus-0.4.1.dist-info}/entry_points.txt +0 -0
- {etlplus-0.3.23.dist-info → etlplus-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.3.23.dist-info → etlplus-0.4.1.dist-info}/top_level.txt +0 -0
etlplus/cli/app.py
ADDED
|
@@ -0,0 +1,1000 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.cli.app` module.
|
|
3
|
+
|
|
4
|
+
Defines the main `Typer` application for the ``etlplus`` command-line
|
|
5
|
+
interface (CLI).
|
|
6
|
+
|
|
7
|
+
Typer-First Interface
|
|
8
|
+
---------------------
|
|
9
|
+
The CLI is implemented using `Typer` (Click) for parsing, help text, and
|
|
10
|
+
subcommand dispatch. The Typer layer focuses on ergonomics (git-style
|
|
11
|
+
subcommands, optional inference of resource types, stdin/stdout piping, and
|
|
12
|
+
quality-of-life flags), while delegating business logic to the existing
|
|
13
|
+
``cmd_*`` handlers.
|
|
14
|
+
|
|
15
|
+
Namespace Adapter
|
|
16
|
+
-----------------
|
|
17
|
+
The command handlers continue to accept an ``argparse.Namespace`` for
|
|
18
|
+
backwards compatibility with existing ``cmd_*`` functions and tests. The
|
|
19
|
+
Typer commands adapt parsed arguments into an ``argparse.Namespace`` and then
|
|
20
|
+
call the corresponding ``cmd_*`` handler.
|
|
21
|
+
|
|
22
|
+
Subcommands
|
|
23
|
+
-----------
|
|
24
|
+
- ``extract``: extract data from files, databases, or REST APIs
|
|
25
|
+
- ``validate``: validate data against rules
|
|
26
|
+
- ``transform``: transform records
|
|
27
|
+
- ``load``: load data to files, databases, or REST APIs
|
|
28
|
+
|
|
29
|
+
Notes
|
|
30
|
+
-----
|
|
31
|
+
- Use ``-`` to read from stdin and ``--output -`` (or ``load ... file -``) to
|
|
32
|
+
write to stdout.
|
|
33
|
+
- ``extract`` supports ``--from`` and ``load`` supports ``--to`` to override
|
|
34
|
+
inferred resource types.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
from __future__ import annotations
|
|
38
|
+
|
|
39
|
+
import argparse
|
|
40
|
+
import sys
|
|
41
|
+
from collections.abc import Collection
|
|
42
|
+
from dataclasses import dataclass
|
|
43
|
+
from pathlib import Path
|
|
44
|
+
from typing import Final
|
|
45
|
+
|
|
46
|
+
import typer
|
|
47
|
+
|
|
48
|
+
from .. import __version__
|
|
49
|
+
from ..enums import DataConnectorType
|
|
50
|
+
from ..enums import FileFormat
|
|
51
|
+
from ..utils import json_type
|
|
52
|
+
from .handlers import cmd_extract
|
|
53
|
+
from .handlers import cmd_list
|
|
54
|
+
from .handlers import cmd_load
|
|
55
|
+
from .handlers import cmd_pipeline
|
|
56
|
+
from .handlers import cmd_run
|
|
57
|
+
from .handlers import cmd_transform
|
|
58
|
+
from .handlers import cmd_validate
|
|
59
|
+
|
|
60
|
+
# SECTION: EXPORTS ========================================================== #
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
__all__ = [
|
|
64
|
+
# Apps
|
|
65
|
+
'app',
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# SECTION: CONSTANTS ======================================================== #
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
CLI_DESCRIPTION: Final[str] = '\n'.join(
|
|
73
|
+
[
|
|
74
|
+
'ETLPlus - A Swiss Army knife for simple ETL operations.',
|
|
75
|
+
'',
|
|
76
|
+
' Provide a subcommand and options. Examples:',
|
|
77
|
+
'',
|
|
78
|
+
' etlplus extract file in.csv -o out.json',
|
|
79
|
+
' etlplus validate in.json --rules \'{"required": ["id"]}\'',
|
|
80
|
+
' etlplus transform in.json --operations \'{"select": ["id"]}\'',
|
|
81
|
+
' etlplus load in.json file out.json',
|
|
82
|
+
'',
|
|
83
|
+
' Enforce error if --format is provided for files. Examples:',
|
|
84
|
+
'',
|
|
85
|
+
' etlplus extract file in.csv --format csv --strict-format',
|
|
86
|
+
' etlplus load in.json file out.csv --format csv --strict-format',
|
|
87
|
+
],
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
CLI_EPILOG: Final[str] = '\n'.join(
|
|
91
|
+
[
|
|
92
|
+
'Environment:',
|
|
93
|
+
(
|
|
94
|
+
' ETLPLUS_FORMAT_BEHAVIOR controls behavior when '
|
|
95
|
+
'--format is provided for files.'
|
|
96
|
+
),
|
|
97
|
+
' Values:',
|
|
98
|
+
' - error|fail|strict: treat as error',
|
|
99
|
+
' - warn (default): print a warning',
|
|
100
|
+
' - ignore|silent: no message',
|
|
101
|
+
'',
|
|
102
|
+
'Note:',
|
|
103
|
+
' --strict-format overrides the environment behavior.',
|
|
104
|
+
],
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
PROJECT_URL: Final[str] = 'https://github.com/Dagitali/ETLPlus'
|
|
108
|
+
|
|
109
|
+
EXTRACT_ARGS = typer.Argument(
|
|
110
|
+
...,
|
|
111
|
+
metavar='[SOURCE_TYPE] SOURCE',
|
|
112
|
+
help=(
|
|
113
|
+
'Extract from a SOURCE. You may provide SOURCE_TYPE explicitly as '
|
|
114
|
+
'the first positional argument, or omit it and use --from or let '
|
|
115
|
+
'etlplus infer it from the SOURCE.'
|
|
116
|
+
),
|
|
117
|
+
)
|
|
118
|
+
LOAD_ARGS = typer.Argument(
|
|
119
|
+
...,
|
|
120
|
+
metavar='[SOURCE] [TARGET_TYPE] TARGET',
|
|
121
|
+
help=(
|
|
122
|
+
'Load SOURCE into a target. SOURCE defaults to - (stdin). You may '
|
|
123
|
+
'provide legacy positional form: SOURCE TARGET_TYPE TARGET.'
|
|
124
|
+
),
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
# SECTION: INTERNAL CONSTANTS =============================================== #
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
_DB_SCHEMES = (
|
|
132
|
+
'postgres://',
|
|
133
|
+
'postgresql://',
|
|
134
|
+
'mysql://',
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
_SOURCE_CHOICES: Final[frozenset[str]] = frozenset(DataConnectorType.choices())
|
|
138
|
+
_FORMAT_CHOICES: Final[frozenset[str]] = frozenset(FileFormat.choices())
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
# SECTION: DATA CLASSES ===================================================== #
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
@dataclass(slots=True)
|
|
145
|
+
class CliState:
|
|
146
|
+
"""Mutable container for runtime CLI toggles."""
|
|
147
|
+
|
|
148
|
+
pretty: bool = True
|
|
149
|
+
quiet: bool = False
|
|
150
|
+
verbose: bool = False
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _ensure_state(
|
|
157
|
+
ctx: typer.Context,
|
|
158
|
+
) -> CliState:
|
|
159
|
+
"""
|
|
160
|
+
Return the :class:`CliState` stored on the :mod:`typer` context.
|
|
161
|
+
|
|
162
|
+
Parameters
|
|
163
|
+
----------
|
|
164
|
+
ctx : typer.Context
|
|
165
|
+
Typer execution context provided to the command.
|
|
166
|
+
|
|
167
|
+
Returns
|
|
168
|
+
-------
|
|
169
|
+
CliState
|
|
170
|
+
Mutable CLI flag container stored on ``ctx``.
|
|
171
|
+
"""
|
|
172
|
+
if not isinstance(getattr(ctx, 'obj', None), CliState):
|
|
173
|
+
ctx.obj = CliState()
|
|
174
|
+
return ctx.obj
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _infer_resource_type(
|
|
178
|
+
value: str,
|
|
179
|
+
) -> str:
|
|
180
|
+
"""
|
|
181
|
+
Infer the resource type from a path, URL, or DSN string.
|
|
182
|
+
|
|
183
|
+
Parameters
|
|
184
|
+
----------
|
|
185
|
+
value : str
|
|
186
|
+
Raw CLI argument that represents a source or target.
|
|
187
|
+
|
|
188
|
+
Returns
|
|
189
|
+
-------
|
|
190
|
+
str
|
|
191
|
+
One of ``file``, ``database``, or ``api`` based on heuristics.
|
|
192
|
+
|
|
193
|
+
Raises
|
|
194
|
+
------
|
|
195
|
+
ValueError
|
|
196
|
+
If the resource type could not be inferred.
|
|
197
|
+
"""
|
|
198
|
+
val = (value or '').strip()
|
|
199
|
+
low = val.lower()
|
|
200
|
+
|
|
201
|
+
if val == '-':
|
|
202
|
+
return 'file'
|
|
203
|
+
if low.startswith(('http://', 'https://')):
|
|
204
|
+
return 'api'
|
|
205
|
+
if low.startswith(_DB_SCHEMES):
|
|
206
|
+
return 'database'
|
|
207
|
+
|
|
208
|
+
path = Path(val)
|
|
209
|
+
if path.exists() or path.suffix:
|
|
210
|
+
return 'file'
|
|
211
|
+
|
|
212
|
+
raise ValueError(
|
|
213
|
+
'Could not infer resource type. Use --from/--to to specify it.',
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def _infer_resource_type_or_exit(
|
|
218
|
+
value: str,
|
|
219
|
+
) -> str:
|
|
220
|
+
"""Infer a resource type and map ``ValueError`` to ``BadParameter``.
|
|
221
|
+
|
|
222
|
+
Parameters
|
|
223
|
+
----------
|
|
224
|
+
value : str
|
|
225
|
+
CLI value describing a source/target.
|
|
226
|
+
|
|
227
|
+
Returns
|
|
228
|
+
-------
|
|
229
|
+
str
|
|
230
|
+
Inferred resource type.
|
|
231
|
+
|
|
232
|
+
Raises
|
|
233
|
+
------
|
|
234
|
+
typer.BadParameter
|
|
235
|
+
If heuristics fail to infer a resource type.
|
|
236
|
+
"""
|
|
237
|
+
try:
|
|
238
|
+
return _infer_resource_type(value)
|
|
239
|
+
except ValueError as exc: # pragma: no cover - exercised indirectly
|
|
240
|
+
raise typer.BadParameter(str(exc)) from exc
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def _ns(
|
|
244
|
+
**kwargs: object,
|
|
245
|
+
) -> argparse.Namespace:
|
|
246
|
+
"""Build an :class:`argparse.Namespace` for the legacy handlers.
|
|
247
|
+
|
|
248
|
+
Parameters
|
|
249
|
+
----------
|
|
250
|
+
**kwargs : object
|
|
251
|
+
Attributes applied to the resulting namespace.
|
|
252
|
+
|
|
253
|
+
Returns
|
|
254
|
+
-------
|
|
255
|
+
argparse.Namespace
|
|
256
|
+
Namespace compatible with the ``cmd_*`` handler signatures.
|
|
257
|
+
"""
|
|
258
|
+
return argparse.Namespace(**kwargs)
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _optional_choice(
|
|
262
|
+
value: str | None,
|
|
263
|
+
choices: Collection[str],
|
|
264
|
+
*,
|
|
265
|
+
label: str,
|
|
266
|
+
) -> str | None:
|
|
267
|
+
"""
|
|
268
|
+
Validate optional CLI choice inputs while preserving ``None``.
|
|
269
|
+
|
|
270
|
+
Parameters
|
|
271
|
+
----------
|
|
272
|
+
value : str | None
|
|
273
|
+
Candidate value provided by the CLI option.
|
|
274
|
+
choices : Collection[str]
|
|
275
|
+
Allowed options for the parameter.
|
|
276
|
+
label : str
|
|
277
|
+
Friendly label rendered in error messages.
|
|
278
|
+
|
|
279
|
+
Returns
|
|
280
|
+
-------
|
|
281
|
+
str | None
|
|
282
|
+
Sanitized choice or ``None`` when the option is omitted.
|
|
283
|
+
"""
|
|
284
|
+
if value is None:
|
|
285
|
+
return None
|
|
286
|
+
return _validate_choice(value, choices, label=label)
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def _stateful_namespace(
|
|
290
|
+
state: CliState,
|
|
291
|
+
*,
|
|
292
|
+
command: str,
|
|
293
|
+
**kwargs: object,
|
|
294
|
+
) -> argparse.Namespace:
|
|
295
|
+
"""Attach CLI state toggles to a handler namespace.
|
|
296
|
+
|
|
297
|
+
Parameters
|
|
298
|
+
----------
|
|
299
|
+
state : CliState
|
|
300
|
+
Current CLI state stored on the Typer context.
|
|
301
|
+
command : str
|
|
302
|
+
Logical command name (e.g., ``extract``).
|
|
303
|
+
**kwargs : object
|
|
304
|
+
Additional attributes required by the handler.
|
|
305
|
+
|
|
306
|
+
Returns
|
|
307
|
+
-------
|
|
308
|
+
argparse.Namespace
|
|
309
|
+
Namespace compatible with the ``cmd_*`` handler signatures.
|
|
310
|
+
"""
|
|
311
|
+
return _ns(
|
|
312
|
+
command=command,
|
|
313
|
+
pretty=state.pretty,
|
|
314
|
+
quiet=state.quiet,
|
|
315
|
+
verbose=state.verbose,
|
|
316
|
+
**kwargs,
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def _validate_choice(
|
|
321
|
+
value: str,
|
|
322
|
+
choices: Collection[str],
|
|
323
|
+
*,
|
|
324
|
+
label: str,
|
|
325
|
+
) -> str:
|
|
326
|
+
"""
|
|
327
|
+
Validate CLI input against a whitelist of choices.
|
|
328
|
+
|
|
329
|
+
Parameters
|
|
330
|
+
----------
|
|
331
|
+
value : str
|
|
332
|
+
Candidate value from the CLI option or argument.
|
|
333
|
+
choices: Collection[str]
|
|
334
|
+
Allowed values for the option.
|
|
335
|
+
label : str
|
|
336
|
+
Friendly label rendered in the validation error message.
|
|
337
|
+
|
|
338
|
+
Returns
|
|
339
|
+
-------
|
|
340
|
+
str
|
|
341
|
+
Sanitized and validated value.
|
|
342
|
+
|
|
343
|
+
Raises
|
|
344
|
+
------
|
|
345
|
+
typer.BadParameter
|
|
346
|
+
If ``value`` is not present in ``choices``.
|
|
347
|
+
"""
|
|
348
|
+
v = (value or '').strip()
|
|
349
|
+
if v in choices:
|
|
350
|
+
return v
|
|
351
|
+
allowed = ', '.join(sorted(choices))
|
|
352
|
+
raise typer.BadParameter(
|
|
353
|
+
f"Invalid {label} '{value}'. Choose from: {allowed}",
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
# SECTION: TYPER APP ======================================================== #
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
# Typer application instance (subcommands are registered below).
|
|
361
|
+
app = typer.Typer(
|
|
362
|
+
name='etlplus',
|
|
363
|
+
# help='ETLPlus - A Swiss Army knife for simple ETL operations.',
|
|
364
|
+
help=CLI_DESCRIPTION,
|
|
365
|
+
epilog=CLI_EPILOG,
|
|
366
|
+
add_completion=True,
|
|
367
|
+
no_args_is_help=False,
|
|
368
|
+
rich_markup_mode='markdown',
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
@app.callback(invoke_without_command=True)
|
|
373
|
+
def _root(
|
|
374
|
+
ctx: typer.Context,
|
|
375
|
+
version: bool = typer.Option(
|
|
376
|
+
False,
|
|
377
|
+
'--version',
|
|
378
|
+
'-V',
|
|
379
|
+
is_eager=True,
|
|
380
|
+
help='Show the version and exit.',
|
|
381
|
+
),
|
|
382
|
+
pretty: bool = typer.Option(
|
|
383
|
+
True,
|
|
384
|
+
'--pretty/--no-pretty',
|
|
385
|
+
help='Pretty-print JSON output (default: pretty).',
|
|
386
|
+
),
|
|
387
|
+
quiet: bool = typer.Option(
|
|
388
|
+
False,
|
|
389
|
+
'--quiet',
|
|
390
|
+
'-q',
|
|
391
|
+
help='Suppress warnings and non-essential output.',
|
|
392
|
+
),
|
|
393
|
+
verbose: bool = typer.Option(
|
|
394
|
+
False,
|
|
395
|
+
'--verbose',
|
|
396
|
+
'-v',
|
|
397
|
+
help='Emit extra diagnostics to stderr.',
|
|
398
|
+
),
|
|
399
|
+
) -> None:
|
|
400
|
+
"""
|
|
401
|
+
Seed the Typer context with runtime flags and handle root-only options.
|
|
402
|
+
|
|
403
|
+
Parameters
|
|
404
|
+
----------
|
|
405
|
+
ctx : typer.Context
|
|
406
|
+
Typer execution context provided to the command.
|
|
407
|
+
version : bool
|
|
408
|
+
If True, print the etlplus version and exit.
|
|
409
|
+
pretty : bool
|
|
410
|
+
Whether to pretty-print JSON output.
|
|
411
|
+
quiet : bool
|
|
412
|
+
Whether to suppress warnings and non-essential output.
|
|
413
|
+
verbose : bool
|
|
414
|
+
Whether to emit extra diagnostics to stderr.
|
|
415
|
+
|
|
416
|
+
Raises
|
|
417
|
+
------
|
|
418
|
+
typer.Exit
|
|
419
|
+
If ``--version`` is provided or no subcommand is invoked.
|
|
420
|
+
"""
|
|
421
|
+
ctx.obj = CliState(pretty=pretty, quiet=quiet, verbose=verbose)
|
|
422
|
+
|
|
423
|
+
if version:
|
|
424
|
+
typer.echo(f'etlplus {__version__}')
|
|
425
|
+
raise typer.Exit(0)
|
|
426
|
+
|
|
427
|
+
if ctx.invoked_subcommand is None and not ctx.resilient_parsing:
|
|
428
|
+
typer.echo(ctx.command.get_help(ctx))
|
|
429
|
+
raise typer.Exit(0)
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
@app.command('extract')
|
|
433
|
+
def extract_cmd(
|
|
434
|
+
ctx: typer.Context,
|
|
435
|
+
args: list[str] = EXTRACT_ARGS,
|
|
436
|
+
from_: str | None = typer.Option(
|
|
437
|
+
None,
|
|
438
|
+
'--from',
|
|
439
|
+
help='Override the inferred source type (file, database, api).',
|
|
440
|
+
),
|
|
441
|
+
output: str | None = typer.Option(
|
|
442
|
+
None,
|
|
443
|
+
'-o',
|
|
444
|
+
'--output',
|
|
445
|
+
help='Output file to save extracted data (JSON). Use - for stdout.',
|
|
446
|
+
),
|
|
447
|
+
strict_format: bool = typer.Option(
|
|
448
|
+
False,
|
|
449
|
+
'--strict-format',
|
|
450
|
+
help=(
|
|
451
|
+
'Treat providing --format for file sources as an error '
|
|
452
|
+
'(overrides environment behavior)'
|
|
453
|
+
),
|
|
454
|
+
),
|
|
455
|
+
source_format: str | None = typer.Option(
|
|
456
|
+
None,
|
|
457
|
+
'--format',
|
|
458
|
+
help=(
|
|
459
|
+
'Payload format when not a file (or when SOURCE is -). '
|
|
460
|
+
'For normal file paths, format is inferred from extension.'
|
|
461
|
+
),
|
|
462
|
+
),
|
|
463
|
+
) -> int:
|
|
464
|
+
"""
|
|
465
|
+
Extract data from files, databases, or REST APIs.
|
|
466
|
+
|
|
467
|
+
Parameters
|
|
468
|
+
----------
|
|
469
|
+
ctx : typer.Context
|
|
470
|
+
Typer execution context provided to the command.
|
|
471
|
+
args : list[str]
|
|
472
|
+
Positional arguments: either SOURCE, or SOURCE_TYPE SOURCE.
|
|
473
|
+
from_ : str | None
|
|
474
|
+
Override the inferred source type.
|
|
475
|
+
output : str | None
|
|
476
|
+
Output file to save extracted data.
|
|
477
|
+
strict_format : bool
|
|
478
|
+
Whether to enforce strict format behavior.
|
|
479
|
+
source_format : str | None
|
|
480
|
+
Payload format when not a file.
|
|
481
|
+
|
|
482
|
+
Returns
|
|
483
|
+
-------
|
|
484
|
+
int
|
|
485
|
+
Zero on success.
|
|
486
|
+
|
|
487
|
+
Raises
|
|
488
|
+
------
|
|
489
|
+
typer.BadParameter
|
|
490
|
+
If invalid parameters are provided.
|
|
491
|
+
|
|
492
|
+
Examples
|
|
493
|
+
--------
|
|
494
|
+
- Extract from a file (type inferred):
|
|
495
|
+
etlplus extract in.csv
|
|
496
|
+
|
|
497
|
+
- Extract from a file (explicit):
|
|
498
|
+
etlplus extract file in.csv
|
|
499
|
+
etlplus extract --from file in.csv
|
|
500
|
+
|
|
501
|
+
- Extract from an API:
|
|
502
|
+
etlplus extract https://example.com/data.json
|
|
503
|
+
etlplus extract --from api https://example.com/data.json
|
|
504
|
+
|
|
505
|
+
- Extract from a database DSN:
|
|
506
|
+
etlplus extract --from database postgresql://user:pass@host/db
|
|
507
|
+
|
|
508
|
+
- Pipe into transform/load:
|
|
509
|
+
etlplus extract in.csv \
|
|
510
|
+
| etlplus transform --operations '{"select":["a"]}'
|
|
511
|
+
"""
|
|
512
|
+
state = _ensure_state(ctx)
|
|
513
|
+
|
|
514
|
+
if len(args) > 2:
|
|
515
|
+
raise typer.BadParameter('Provide SOURCE, or SOURCE_TYPE SOURCE.')
|
|
516
|
+
|
|
517
|
+
from_ = _optional_choice(from_, _SOURCE_CHOICES, label='from')
|
|
518
|
+
source_format = _optional_choice(
|
|
519
|
+
source_format,
|
|
520
|
+
_FORMAT_CHOICES,
|
|
521
|
+
label='format',
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
if len(args) == 2:
|
|
525
|
+
if from_ is not None:
|
|
526
|
+
raise typer.BadParameter(
|
|
527
|
+
'Do not combine --from with an explicit SOURCE_TYPE.',
|
|
528
|
+
)
|
|
529
|
+
source_type = _validate_choice(
|
|
530
|
+
args[0],
|
|
531
|
+
_SOURCE_CHOICES,
|
|
532
|
+
label='source_type',
|
|
533
|
+
)
|
|
534
|
+
source = args[1]
|
|
535
|
+
else:
|
|
536
|
+
source = args[0]
|
|
537
|
+
if from_ is not None:
|
|
538
|
+
source_type = from_
|
|
539
|
+
else:
|
|
540
|
+
source_type = _infer_resource_type_or_exit(source)
|
|
541
|
+
|
|
542
|
+
source_type = _validate_choice(
|
|
543
|
+
source_type,
|
|
544
|
+
_SOURCE_CHOICES,
|
|
545
|
+
label='source_type',
|
|
546
|
+
)
|
|
547
|
+
|
|
548
|
+
if state.verbose:
|
|
549
|
+
print(
|
|
550
|
+
f'Inferred source_type={source_type} for source={source}',
|
|
551
|
+
file=sys.stderr,
|
|
552
|
+
)
|
|
553
|
+
|
|
554
|
+
ns = _stateful_namespace(
|
|
555
|
+
state,
|
|
556
|
+
command='extract',
|
|
557
|
+
source_type=source_type,
|
|
558
|
+
source=source,
|
|
559
|
+
output=output,
|
|
560
|
+
strict_format=strict_format,
|
|
561
|
+
format=(source_format or 'json'),
|
|
562
|
+
_format_explicit=(source_format is not None),
|
|
563
|
+
)
|
|
564
|
+
return int(cmd_extract(ns))
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
@app.command('validate')
|
|
568
|
+
def validate_cmd(
|
|
569
|
+
ctx: typer.Context,
|
|
570
|
+
source: str = typer.Argument(
|
|
571
|
+
'-',
|
|
572
|
+
metavar='SOURCE',
|
|
573
|
+
help=(
|
|
574
|
+
'Data source to validate (file path, JSON string, or - for stdin).'
|
|
575
|
+
),
|
|
576
|
+
),
|
|
577
|
+
rules: str = typer.Option(
|
|
578
|
+
'{}',
|
|
579
|
+
'--rules',
|
|
580
|
+
help='Validation rules as JSON string',
|
|
581
|
+
),
|
|
582
|
+
output: str | None = typer.Option(
|
|
583
|
+
None,
|
|
584
|
+
'-o',
|
|
585
|
+
'--output',
|
|
586
|
+
help='Output file to save validated data (JSON). Use - for stdout.',
|
|
587
|
+
),
|
|
588
|
+
input_format: str | None = typer.Option(
|
|
589
|
+
None,
|
|
590
|
+
'--input-format',
|
|
591
|
+
help='Input payload format for stdin (json or csv).',
|
|
592
|
+
),
|
|
593
|
+
) -> int:
|
|
594
|
+
"""
|
|
595
|
+
Validate data against JSON-described rules.
|
|
596
|
+
|
|
597
|
+
Parameters
|
|
598
|
+
----------
|
|
599
|
+
ctx : typer.Context
|
|
600
|
+
Typer execution context provided to the command.
|
|
601
|
+
source : str
|
|
602
|
+
Data source (file path or ``-`` for stdin).
|
|
603
|
+
rules : str
|
|
604
|
+
Validation rules as a JSON string.
|
|
605
|
+
output : str | None
|
|
606
|
+
Optional output path. Use ``-`` for stdout.
|
|
607
|
+
input_format : str | None
|
|
608
|
+
Optional stdin format hint (json or csv).
|
|
609
|
+
|
|
610
|
+
Returns
|
|
611
|
+
-------
|
|
612
|
+
int
|
|
613
|
+
Zero on success.
|
|
614
|
+
"""
|
|
615
|
+
input_format = _optional_choice(
|
|
616
|
+
input_format,
|
|
617
|
+
_FORMAT_CHOICES,
|
|
618
|
+
label='input_format',
|
|
619
|
+
)
|
|
620
|
+
|
|
621
|
+
state = _ensure_state(ctx)
|
|
622
|
+
|
|
623
|
+
ns = _stateful_namespace(
|
|
624
|
+
state,
|
|
625
|
+
command='validate',
|
|
626
|
+
source=source,
|
|
627
|
+
rules=json_type(rules),
|
|
628
|
+
output=output,
|
|
629
|
+
input_format=input_format,
|
|
630
|
+
)
|
|
631
|
+
return int(cmd_validate(ns))
|
|
632
|
+
|
|
633
|
+
|
|
634
|
+
@app.command('transform')
|
|
635
|
+
def transform_cmd(
|
|
636
|
+
ctx: typer.Context,
|
|
637
|
+
source: str = typer.Argument(
|
|
638
|
+
'-',
|
|
639
|
+
metavar='SOURCE',
|
|
640
|
+
help=(
|
|
641
|
+
'Data source to transform '
|
|
642
|
+
'(file path, JSON string, or - for stdin).'
|
|
643
|
+
),
|
|
644
|
+
),
|
|
645
|
+
operations: str = typer.Option(
|
|
646
|
+
'{}',
|
|
647
|
+
'--operations',
|
|
648
|
+
help='Transformation operations as JSON string',
|
|
649
|
+
),
|
|
650
|
+
output: str | None = typer.Option(
|
|
651
|
+
None,
|
|
652
|
+
'-o',
|
|
653
|
+
'--output',
|
|
654
|
+
help='Output file to save transformed data (JSON). Use - for stdout.',
|
|
655
|
+
),
|
|
656
|
+
input_format: str | None = typer.Option(
|
|
657
|
+
None,
|
|
658
|
+
'--input-format',
|
|
659
|
+
help='Input payload format for stdin (json or csv).',
|
|
660
|
+
),
|
|
661
|
+
) -> int:
|
|
662
|
+
"""
|
|
663
|
+
Transform records using JSON-described operations.
|
|
664
|
+
|
|
665
|
+
Parameters
|
|
666
|
+
----------
|
|
667
|
+
ctx : typer.Context
|
|
668
|
+
Typer execution context provided to the command.
|
|
669
|
+
source : str
|
|
670
|
+
Data source (file path or ``-`` for stdin).
|
|
671
|
+
operations : str
|
|
672
|
+
Transformation operations as a JSON string.
|
|
673
|
+
output : str | None
|
|
674
|
+
Optional output path. Use ``-`` for stdout.
|
|
675
|
+
input_format : str | None
|
|
676
|
+
Optional stdin format hint (json or csv).
|
|
677
|
+
|
|
678
|
+
Returns
|
|
679
|
+
-------
|
|
680
|
+
int
|
|
681
|
+
Zero on success.
|
|
682
|
+
"""
|
|
683
|
+
input_format = _optional_choice(
|
|
684
|
+
input_format,
|
|
685
|
+
_FORMAT_CHOICES,
|
|
686
|
+
label='input_format',
|
|
687
|
+
)
|
|
688
|
+
|
|
689
|
+
state = _ensure_state(ctx)
|
|
690
|
+
|
|
691
|
+
ns = _stateful_namespace(
|
|
692
|
+
state,
|
|
693
|
+
command='transform',
|
|
694
|
+
source=source,
|
|
695
|
+
operations=json_type(operations),
|
|
696
|
+
output=output,
|
|
697
|
+
input_format=input_format,
|
|
698
|
+
)
|
|
699
|
+
return int(cmd_transform(ns))
|
|
700
|
+
|
|
701
|
+
|
|
702
|
+
@app.command('load')
|
|
703
|
+
def load_cmd(
|
|
704
|
+
ctx: typer.Context,
|
|
705
|
+
args: list[str] = LOAD_ARGS,
|
|
706
|
+
to: str | None = typer.Option(
|
|
707
|
+
None,
|
|
708
|
+
'--to',
|
|
709
|
+
help='Override the inferred target type (file, database, api).',
|
|
710
|
+
),
|
|
711
|
+
strict_format: bool = typer.Option(
|
|
712
|
+
False,
|
|
713
|
+
'--strict-format',
|
|
714
|
+
help=(
|
|
715
|
+
'Treat providing --format for file targets as an error '
|
|
716
|
+
'(overrides environment behavior)'
|
|
717
|
+
),
|
|
718
|
+
),
|
|
719
|
+
target_format: str | None = typer.Option(
|
|
720
|
+
None,
|
|
721
|
+
'--format',
|
|
722
|
+
help=(
|
|
723
|
+
'Payload format when not a file (or when TARGET is -). '
|
|
724
|
+
'For normal file targets, format is inferred from extension.'
|
|
725
|
+
),
|
|
726
|
+
),
|
|
727
|
+
input_format: str | None = typer.Option(
|
|
728
|
+
None,
|
|
729
|
+
'--input-format',
|
|
730
|
+
help='Input payload format for stdin (json or csv).',
|
|
731
|
+
),
|
|
732
|
+
) -> int:
|
|
733
|
+
"""
|
|
734
|
+
Load data into a file, database, or REST API.
|
|
735
|
+
|
|
736
|
+
Parameters
|
|
737
|
+
----------
|
|
738
|
+
ctx : typer.Context
|
|
739
|
+
Typer execution context provided to the command.
|
|
740
|
+
args : list[str]
|
|
741
|
+
Positional arguments: TARGET, SOURCE TARGET, or SOURCE TARGET_TYPE
|
|
742
|
+
TARGET.
|
|
743
|
+
to : str | None
|
|
744
|
+
Override the inferred target type.
|
|
745
|
+
strict_format : bool
|
|
746
|
+
Whether to enforce strict format behavior.
|
|
747
|
+
target_format : str | None
|
|
748
|
+
Payload format when not a file.
|
|
749
|
+
input_format : str | None
|
|
750
|
+
Input payload format for stdin.
|
|
751
|
+
|
|
752
|
+
Returns
|
|
753
|
+
-------
|
|
754
|
+
int
|
|
755
|
+
Zero on success.
|
|
756
|
+
|
|
757
|
+
Raises
|
|
758
|
+
------
|
|
759
|
+
typer.BadParameter
|
|
760
|
+
If the arguments are invalid
|
|
761
|
+
|
|
762
|
+
Examples
|
|
763
|
+
--------
|
|
764
|
+
- Pipe into a file:
|
|
765
|
+
etlplus extract in.csv \
|
|
766
|
+
| etlplus transform --operations '{"select":["a"]}' \
|
|
767
|
+
| etlplus load --to file out.json
|
|
768
|
+
|
|
769
|
+
- Legacy form:
|
|
770
|
+
etlplus load in.json file out.json
|
|
771
|
+
|
|
772
|
+
- Write to stdout:
|
|
773
|
+
etlplus load in.json file -
|
|
774
|
+
"""
|
|
775
|
+
state = _ensure_state(ctx)
|
|
776
|
+
|
|
777
|
+
if len(args) > 3:
|
|
778
|
+
raise typer.BadParameter(
|
|
779
|
+
'Provide TARGET, SOURCE TARGET, or SOURCE TARGET_TYPE TARGET.',
|
|
780
|
+
)
|
|
781
|
+
|
|
782
|
+
to = _optional_choice(to, _SOURCE_CHOICES, label='to')
|
|
783
|
+
target_format = _optional_choice(
|
|
784
|
+
target_format,
|
|
785
|
+
_FORMAT_CHOICES,
|
|
786
|
+
label='format',
|
|
787
|
+
)
|
|
788
|
+
input_format = _optional_choice(
|
|
789
|
+
input_format,
|
|
790
|
+
_FORMAT_CHOICES,
|
|
791
|
+
label='input_format',
|
|
792
|
+
)
|
|
793
|
+
|
|
794
|
+
# Parse positional args.
|
|
795
|
+
match args:
|
|
796
|
+
case [source, target_type_raw, target] if to is None:
|
|
797
|
+
target_type = _validate_choice(
|
|
798
|
+
target_type_raw,
|
|
799
|
+
_SOURCE_CHOICES,
|
|
800
|
+
label='target_type',
|
|
801
|
+
)
|
|
802
|
+
case [_, _, _]:
|
|
803
|
+
raise typer.BadParameter(
|
|
804
|
+
'Do not combine --to with the legacy SOURCE TARGET_TYPE '
|
|
805
|
+
'TARGET form.',
|
|
806
|
+
)
|
|
807
|
+
case [source, target]:
|
|
808
|
+
target_type = to or _infer_resource_type_or_exit(target)
|
|
809
|
+
case [solo_target]:
|
|
810
|
+
source = '-'
|
|
811
|
+
target = solo_target
|
|
812
|
+
target_type = to or _infer_resource_type_or_exit(target)
|
|
813
|
+
case []:
|
|
814
|
+
raise typer.BadParameter(
|
|
815
|
+
'Provide TARGET, SOURCE TARGET, or legacy SOURCE '
|
|
816
|
+
'TARGET_TYPE TARGET.',
|
|
817
|
+
)
|
|
818
|
+
|
|
819
|
+
target_type = _validate_choice(
|
|
820
|
+
target_type,
|
|
821
|
+
_SOURCE_CHOICES,
|
|
822
|
+
label='target_type',
|
|
823
|
+
)
|
|
824
|
+
|
|
825
|
+
if state.verbose:
|
|
826
|
+
print(
|
|
827
|
+
f'Inferred target_type={target_type} for target={target}',
|
|
828
|
+
file=sys.stderr,
|
|
829
|
+
)
|
|
830
|
+
|
|
831
|
+
ns = _stateful_namespace(
|
|
832
|
+
state,
|
|
833
|
+
command='load',
|
|
834
|
+
source=source,
|
|
835
|
+
target_type=target_type,
|
|
836
|
+
target=target,
|
|
837
|
+
strict_format=strict_format,
|
|
838
|
+
format=(target_format or 'json'),
|
|
839
|
+
_format_explicit=(target_format is not None),
|
|
840
|
+
input_format=input_format,
|
|
841
|
+
)
|
|
842
|
+
return int(cmd_load(ns))
|
|
843
|
+
|
|
844
|
+
|
|
845
|
+
@app.command('pipeline')
|
|
846
|
+
def pipeline_cmd(
|
|
847
|
+
ctx: typer.Context,
|
|
848
|
+
config: str = typer.Option(
|
|
849
|
+
...,
|
|
850
|
+
'--config',
|
|
851
|
+
help='Path to pipeline YAML configuration file',
|
|
852
|
+
),
|
|
853
|
+
list_: bool = typer.Option(
|
|
854
|
+
False,
|
|
855
|
+
'--list',
|
|
856
|
+
help='List available job names and exit',
|
|
857
|
+
),
|
|
858
|
+
run_job: str | None = typer.Option(
|
|
859
|
+
None,
|
|
860
|
+
'--run',
|
|
861
|
+
metavar='JOB',
|
|
862
|
+
help='Run a specific job by name',
|
|
863
|
+
),
|
|
864
|
+
) -> int:
|
|
865
|
+
"""
|
|
866
|
+
Inspect or run a pipeline YAML configuration.
|
|
867
|
+
|
|
868
|
+
Parameters
|
|
869
|
+
----------
|
|
870
|
+
ctx : typer.Context
|
|
871
|
+
Typer execution context provided to the command.
|
|
872
|
+
config : str
|
|
873
|
+
Path to pipeline YAML configuration file.
|
|
874
|
+
list_ : bool
|
|
875
|
+
If True, list available job names and exit.
|
|
876
|
+
run_job : str | None
|
|
877
|
+
Name of a specific job to run.
|
|
878
|
+
|
|
879
|
+
Returns
|
|
880
|
+
-------
|
|
881
|
+
int
|
|
882
|
+
Zero on success.
|
|
883
|
+
"""
|
|
884
|
+
state = _ensure_state(ctx)
|
|
885
|
+
ns = _stateful_namespace(
|
|
886
|
+
state,
|
|
887
|
+
command='pipeline',
|
|
888
|
+
config=config,
|
|
889
|
+
list=list_,
|
|
890
|
+
run=run_job,
|
|
891
|
+
)
|
|
892
|
+
return int(cmd_pipeline(ns))
|
|
893
|
+
|
|
894
|
+
|
|
895
|
+
@app.command('list')
|
|
896
|
+
def list_cmd(
|
|
897
|
+
ctx: typer.Context,
|
|
898
|
+
config: str = typer.Option(
|
|
899
|
+
...,
|
|
900
|
+
'--config',
|
|
901
|
+
help='Path to pipeline YAML configuration file',
|
|
902
|
+
),
|
|
903
|
+
pipelines: bool = typer.Option(
|
|
904
|
+
False,
|
|
905
|
+
'--pipelines',
|
|
906
|
+
help='List ETL pipelines',
|
|
907
|
+
),
|
|
908
|
+
sources: bool = typer.Option(False, '--sources', help='List data sources'),
|
|
909
|
+
targets: bool = typer.Option(False, '--targets', help='List data targets'),
|
|
910
|
+
transforms: bool = typer.Option(
|
|
911
|
+
False,
|
|
912
|
+
'--transforms',
|
|
913
|
+
help='List data transforms',
|
|
914
|
+
),
|
|
915
|
+
) -> int:
|
|
916
|
+
"""
|
|
917
|
+
Print ETL entities from a pipeline YAML configuration.
|
|
918
|
+
|
|
919
|
+
Parameters
|
|
920
|
+
----------
|
|
921
|
+
ctx : typer.Context
|
|
922
|
+
Typer execution context provided to the command.
|
|
923
|
+
config : str
|
|
924
|
+
Path to pipeline YAML configuration file.
|
|
925
|
+
pipelines : bool
|
|
926
|
+
If True, list ETL pipelines.
|
|
927
|
+
sources : bool
|
|
928
|
+
If True, list data sources.
|
|
929
|
+
targets : bool
|
|
930
|
+
If True, list data targets.
|
|
931
|
+
transforms : bool
|
|
932
|
+
If True, list data transforms.
|
|
933
|
+
|
|
934
|
+
Returns
|
|
935
|
+
-------
|
|
936
|
+
int
|
|
937
|
+
Zero on success.
|
|
938
|
+
"""
|
|
939
|
+
state = _ensure_state(ctx)
|
|
940
|
+
ns = _stateful_namespace(
|
|
941
|
+
state,
|
|
942
|
+
command='list',
|
|
943
|
+
config=config,
|
|
944
|
+
pipelines=pipelines,
|
|
945
|
+
sources=sources,
|
|
946
|
+
targets=targets,
|
|
947
|
+
transforms=transforms,
|
|
948
|
+
)
|
|
949
|
+
return int(cmd_list(ns))
|
|
950
|
+
|
|
951
|
+
|
|
952
|
+
@app.command('run')
|
|
953
|
+
def run_cmd(
|
|
954
|
+
ctx: typer.Context,
|
|
955
|
+
config: str = typer.Option(
|
|
956
|
+
...,
|
|
957
|
+
'--config',
|
|
958
|
+
help='Path to pipeline YAML configuration file',
|
|
959
|
+
),
|
|
960
|
+
job: str | None = typer.Option(
|
|
961
|
+
None,
|
|
962
|
+
'-j',
|
|
963
|
+
'--job',
|
|
964
|
+
help='Name of the job to run',
|
|
965
|
+
),
|
|
966
|
+
pipeline: str | None = typer.Option(
|
|
967
|
+
None,
|
|
968
|
+
'-p',
|
|
969
|
+
'--pipeline',
|
|
970
|
+
help='Name of the pipeline to run',
|
|
971
|
+
),
|
|
972
|
+
) -> int:
|
|
973
|
+
"""
|
|
974
|
+
Execute an ETL job or pipeline from a YAML configuration.
|
|
975
|
+
|
|
976
|
+
Parameters
|
|
977
|
+
----------
|
|
978
|
+
ctx : typer.Context
|
|
979
|
+
Typer execution context provided to the command.
|
|
980
|
+
config : str
|
|
981
|
+
Path to pipeline YAML configuration file.
|
|
982
|
+
job : str | None
|
|
983
|
+
Name of the job to run.
|
|
984
|
+
pipeline : str | None
|
|
985
|
+
Name of the pipeline to run.
|
|
986
|
+
|
|
987
|
+
Returns
|
|
988
|
+
-------
|
|
989
|
+
int
|
|
990
|
+
Zero on success.
|
|
991
|
+
"""
|
|
992
|
+
state = _ensure_state(ctx)
|
|
993
|
+
ns = _stateful_namespace(
|
|
994
|
+
state,
|
|
995
|
+
command='run',
|
|
996
|
+
config=config,
|
|
997
|
+
job=job,
|
|
998
|
+
pipeline=pipeline,
|
|
999
|
+
)
|
|
1000
|
+
return int(cmd_run(ns))
|