etlplus 0.5.2__py3-none-any.whl → 0.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/api/README.md +24 -26
- etlplus/cli/commands.py +924 -0
- etlplus/cli/constants.py +71 -0
- etlplus/cli/handlers.py +369 -484
- etlplus/cli/io.py +336 -0
- etlplus/cli/main.py +16 -418
- etlplus/cli/options.py +49 -0
- etlplus/cli/state.py +336 -0
- etlplus/cli/types.py +33 -0
- etlplus/database/__init__.py +44 -0
- etlplus/database/ddl.py +319 -0
- etlplus/database/engine.py +151 -0
- etlplus/database/orm.py +354 -0
- etlplus/database/schema.py +274 -0
- etlplus/database/types.py +33 -0
- etlplus/enums.py +51 -1
- etlplus/load.py +1 -1
- etlplus/run.py +2 -4
- etlplus/types.py +5 -0
- etlplus/utils.py +1 -32
- {etlplus-0.5.2.dist-info → etlplus-0.9.1.dist-info}/METADATA +84 -40
- {etlplus-0.5.2.dist-info → etlplus-0.9.1.dist-info}/RECORD +26 -16
- etlplus/cli/app.py +0 -1367
- etlplus/ddl.py +0 -197
- {etlplus-0.5.2.dist-info → etlplus-0.9.1.dist-info}/WHEEL +0 -0
- {etlplus-0.5.2.dist-info → etlplus-0.9.1.dist-info}/entry_points.txt +0 -0
- {etlplus-0.5.2.dist-info → etlplus-0.9.1.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.5.2.dist-info → etlplus-0.9.1.dist-info}/top_level.txt +0 -0
etlplus/cli/app.py
DELETED
|
@@ -1,1367 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
:mod:`etlplus.cli.app` module.
|
|
3
|
-
|
|
4
|
-
Defines the main `Typer` application for the ``etlplus`` command-line
|
|
5
|
-
interface (CLI).
|
|
6
|
-
|
|
7
|
-
Typer-First Interface
|
|
8
|
-
---------------------
|
|
9
|
-
The CLI is implemented using `Typer` (Click) for parsing, help text, and
|
|
10
|
-
subcommand dispatch. The Typer layer focuses on ergonomics (git-style
|
|
11
|
-
subcommands, optional inference of resource types, stdin/stdout piping, and
|
|
12
|
-
quality-of-life flags), while delegating business logic to the existing
|
|
13
|
-
``cmd_*`` handlers.
|
|
14
|
-
|
|
15
|
-
Namespace Adapter
|
|
16
|
-
-----------------
|
|
17
|
-
The command handlers continue to accept an ``argparse.Namespace`` for
|
|
18
|
-
backwards compatibility with existing ``cmd_*`` functions and tests. The
|
|
19
|
-
Typer commands adapt parsed arguments into an ``argparse.Namespace`` and then
|
|
20
|
-
call the corresponding ``cmd_*`` handler.
|
|
21
|
-
|
|
22
|
-
Subcommands
|
|
23
|
-
-----------
|
|
24
|
-
- ``extract``: extract data from files, databases, or REST APIs
|
|
25
|
-
- ``validate``: validate data against rules
|
|
26
|
-
- ``transform``: transform records
|
|
27
|
-
- ``load``: load data to files, databases, or REST APIs
|
|
28
|
-
- ``render``: render SQL DDL from table schema specs
|
|
29
|
-
|
|
30
|
-
Notes
|
|
31
|
-
-----
|
|
32
|
-
- Use ``-`` to read from stdin or to write to stdout.
|
|
33
|
-
- Commands ``extract`` and ``transform`` support the command-line option
|
|
34
|
-
``--from`` to override inferred resource types.
|
|
35
|
-
- Commands ``transform`` and ``load`` support the command-line option ``--to``
|
|
36
|
-
to override inferred resource types.
|
|
37
|
-
"""
|
|
38
|
-
|
|
39
|
-
# Pylint struggles with large CLI surfaces that legitimately require
|
|
40
|
-
# numerous arguments in a single module.
|
|
41
|
-
# pylint: disable=too-many-lines
|
|
42
|
-
# pylint: disable=too-many-arguments,too-many-positional-arguments
|
|
43
|
-
|
|
44
|
-
from __future__ import annotations
|
|
45
|
-
|
|
46
|
-
import argparse
|
|
47
|
-
import sys
|
|
48
|
-
from collections.abc import Collection
|
|
49
|
-
from dataclasses import dataclass
|
|
50
|
-
from pathlib import Path
|
|
51
|
-
from typing import Annotated
|
|
52
|
-
from typing import Final
|
|
53
|
-
|
|
54
|
-
import typer
|
|
55
|
-
|
|
56
|
-
from .. import __version__
|
|
57
|
-
from ..enums import DataConnectorType
|
|
58
|
-
from ..enums import FileFormat
|
|
59
|
-
from ..utils import json_type
|
|
60
|
-
from .handlers import cmd_extract
|
|
61
|
-
from .handlers import cmd_list
|
|
62
|
-
from .handlers import cmd_load
|
|
63
|
-
from .handlers import cmd_pipeline
|
|
64
|
-
from .handlers import cmd_render
|
|
65
|
-
from .handlers import cmd_run
|
|
66
|
-
from .handlers import cmd_transform
|
|
67
|
-
from .handlers import cmd_validate
|
|
68
|
-
|
|
69
|
-
# SECTION: EXPORTS ========================================================== #
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
__all__ = [
|
|
73
|
-
# Apps
|
|
74
|
-
'app',
|
|
75
|
-
]
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
# SECTION: INTERNAL CONSTANTS =============================================== #
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
_DB_SCHEMES = (
|
|
82
|
-
'postgres://',
|
|
83
|
-
'postgresql://',
|
|
84
|
-
'mysql://',
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
_SOURCE_CHOICES: Final[frozenset[str]] = frozenset(DataConnectorType.choices())
|
|
88
|
-
_FORMAT_CHOICES: Final[frozenset[str]] = frozenset(FileFormat.choices())
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
# SECTION: CONSTANTS ======================================================== #
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
CLI_DESCRIPTION: Final[str] = '\n'.join(
|
|
95
|
-
[
|
|
96
|
-
'ETLPlus - A Swiss Army knife for simple ETL operations.',
|
|
97
|
-
'',
|
|
98
|
-
' Provide a subcommand and options. Examples:',
|
|
99
|
-
'',
|
|
100
|
-
' etlplus extract in.csv > out.json',
|
|
101
|
-
' etlplus validate in.json --rules \'{"required": ["id"]}\'',
|
|
102
|
-
(
|
|
103
|
-
' etlplus transform --from file in.json '
|
|
104
|
-
'--operations \'{"select": ["id"]}\' --to file -o out.json'
|
|
105
|
-
),
|
|
106
|
-
' etlplus extract in.csv | etlplus load --to file out.json',
|
|
107
|
-
' cat data.json | etlplus load --to api https://example.com/data',
|
|
108
|
-
'',
|
|
109
|
-
' Override format inference when extensions are misleading:',
|
|
110
|
-
'',
|
|
111
|
-
' etlplus extract data.txt --source-format csv',
|
|
112
|
-
' etlplus load payload.bin --target-format json',
|
|
113
|
-
],
|
|
114
|
-
)
|
|
115
|
-
|
|
116
|
-
CLI_EPILOG: Final[str] = '\n'.join(
|
|
117
|
-
[
|
|
118
|
-
'Tip:',
|
|
119
|
-
' --source-format and --target-format override format inference '
|
|
120
|
-
'based on filename extensions when needed.',
|
|
121
|
-
],
|
|
122
|
-
)
|
|
123
|
-
|
|
124
|
-
PROJECT_URL: Final[str] = 'https://github.com/Dagitali/ETLPlus'
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
# SECTION: TYPE ALIASES ==================================================== #
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
SourceInputArg = Annotated[
|
|
131
|
-
str,
|
|
132
|
-
typer.Argument(
|
|
133
|
-
...,
|
|
134
|
-
metavar='SOURCE',
|
|
135
|
-
help=(
|
|
136
|
-
'Extract from SOURCE. Use --from/--source-type to override the '
|
|
137
|
-
'inferred connector when needed.'
|
|
138
|
-
),
|
|
139
|
-
),
|
|
140
|
-
]
|
|
141
|
-
|
|
142
|
-
StreamingSourceArg = Annotated[
|
|
143
|
-
str,
|
|
144
|
-
typer.Argument(
|
|
145
|
-
...,
|
|
146
|
-
metavar='SOURCE',
|
|
147
|
-
help=(
|
|
148
|
-
'Data source to transform or validate (path, JSON payload, or '
|
|
149
|
-
'- for stdin).'
|
|
150
|
-
),
|
|
151
|
-
),
|
|
152
|
-
]
|
|
153
|
-
|
|
154
|
-
TargetInputArg = Annotated[
|
|
155
|
-
str,
|
|
156
|
-
typer.Argument(
|
|
157
|
-
...,
|
|
158
|
-
metavar='TARGET',
|
|
159
|
-
help=(
|
|
160
|
-
'Load JSON data from stdin into TARGET. Use --to/--target-type '
|
|
161
|
-
'to override connector inference when needed. Source data must '
|
|
162
|
-
'be piped into stdin.'
|
|
163
|
-
),
|
|
164
|
-
),
|
|
165
|
-
]
|
|
166
|
-
|
|
167
|
-
SourceOverrideOption = Annotated[
|
|
168
|
-
str | None,
|
|
169
|
-
typer.Option(
|
|
170
|
-
'--source-type',
|
|
171
|
-
metavar='CONNECTOR',
|
|
172
|
-
show_default=False,
|
|
173
|
-
rich_help_panel='I/O overrides',
|
|
174
|
-
help='Override the inferred source type (file, database, api).',
|
|
175
|
-
),
|
|
176
|
-
]
|
|
177
|
-
|
|
178
|
-
TargetOverrideOption = Annotated[
|
|
179
|
-
str | None,
|
|
180
|
-
typer.Option(
|
|
181
|
-
'--target-type',
|
|
182
|
-
metavar='CONNECTOR',
|
|
183
|
-
show_default=False,
|
|
184
|
-
rich_help_panel='I/O overrides',
|
|
185
|
-
help='Override the inferred target type (file, database, api).',
|
|
186
|
-
),
|
|
187
|
-
]
|
|
188
|
-
|
|
189
|
-
SourceFormatOption = Annotated[
|
|
190
|
-
str | None,
|
|
191
|
-
typer.Option(
|
|
192
|
-
'--source-format',
|
|
193
|
-
metavar='FORMAT',
|
|
194
|
-
show_default=False,
|
|
195
|
-
rich_help_panel='Format overrides',
|
|
196
|
-
help=(
|
|
197
|
-
'Input payload format when SOURCE is - or an inline payload. '
|
|
198
|
-
'File sources infer format from the extension.'
|
|
199
|
-
),
|
|
200
|
-
),
|
|
201
|
-
]
|
|
202
|
-
|
|
203
|
-
StdinFormatOption = Annotated[
|
|
204
|
-
str | None,
|
|
205
|
-
typer.Option(
|
|
206
|
-
'--source-format',
|
|
207
|
-
metavar='FORMAT',
|
|
208
|
-
show_default=False,
|
|
209
|
-
rich_help_panel='Format overrides',
|
|
210
|
-
help='Input payload format when reading from stdin (default: json).',
|
|
211
|
-
),
|
|
212
|
-
]
|
|
213
|
-
|
|
214
|
-
TargetFormatOption = Annotated[
|
|
215
|
-
str | None,
|
|
216
|
-
typer.Option(
|
|
217
|
-
'--target-format',
|
|
218
|
-
metavar='FORMAT',
|
|
219
|
-
show_default=False,
|
|
220
|
-
rich_help_panel='Format overrides',
|
|
221
|
-
help=(
|
|
222
|
-
'Payload format when TARGET is - or a non-file connector. File '
|
|
223
|
-
'targets infer format from the extension.'
|
|
224
|
-
),
|
|
225
|
-
),
|
|
226
|
-
]
|
|
227
|
-
|
|
228
|
-
OperationsJSONOption = Annotated[
|
|
229
|
-
str,
|
|
230
|
-
typer.Option(
|
|
231
|
-
'--operations',
|
|
232
|
-
help='Transformation operations as JSON string.',
|
|
233
|
-
),
|
|
234
|
-
]
|
|
235
|
-
|
|
236
|
-
RulesJSONOption = Annotated[
|
|
237
|
-
str,
|
|
238
|
-
typer.Option(
|
|
239
|
-
'--rules',
|
|
240
|
-
help='Validation rules as JSON string.',
|
|
241
|
-
),
|
|
242
|
-
]
|
|
243
|
-
|
|
244
|
-
TargetPathOption = Annotated[
|
|
245
|
-
str | None,
|
|
246
|
-
typer.Option(
|
|
247
|
-
'--target',
|
|
248
|
-
metavar='PATH',
|
|
249
|
-
help='Target file for transformed or validated output (- for stdout).',
|
|
250
|
-
),
|
|
251
|
-
]
|
|
252
|
-
|
|
253
|
-
PipelineConfigOption = Annotated[
|
|
254
|
-
str,
|
|
255
|
-
typer.Option(
|
|
256
|
-
...,
|
|
257
|
-
'--config',
|
|
258
|
-
metavar='PATH',
|
|
259
|
-
help='Path to pipeline YAML configuration file.',
|
|
260
|
-
),
|
|
261
|
-
]
|
|
262
|
-
|
|
263
|
-
RenderConfigOption = Annotated[
|
|
264
|
-
str | None,
|
|
265
|
-
typer.Option(
|
|
266
|
-
'--config',
|
|
267
|
-
metavar='PATH',
|
|
268
|
-
help='Pipeline YAML that includes table_schemas for rendering.',
|
|
269
|
-
show_default=False,
|
|
270
|
-
),
|
|
271
|
-
]
|
|
272
|
-
|
|
273
|
-
RenderOutputOption = Annotated[
|
|
274
|
-
str | None,
|
|
275
|
-
typer.Option(
|
|
276
|
-
'--output',
|
|
277
|
-
'-o',
|
|
278
|
-
metavar='PATH',
|
|
279
|
-
help='Write rendered SQL to PATH (default: stdout).',
|
|
280
|
-
),
|
|
281
|
-
]
|
|
282
|
-
|
|
283
|
-
RenderSpecOption = Annotated[
|
|
284
|
-
str | None,
|
|
285
|
-
typer.Option(
|
|
286
|
-
'--spec',
|
|
287
|
-
metavar='PATH',
|
|
288
|
-
help='Standalone table spec file (.yml/.yaml/.json).',
|
|
289
|
-
show_default=False,
|
|
290
|
-
),
|
|
291
|
-
]
|
|
292
|
-
|
|
293
|
-
RenderTableOption = Annotated[
|
|
294
|
-
str | None,
|
|
295
|
-
typer.Option(
|
|
296
|
-
'--table',
|
|
297
|
-
metavar='NAME',
|
|
298
|
-
help='Filter to a single table name from table_schemas.',
|
|
299
|
-
),
|
|
300
|
-
]
|
|
301
|
-
|
|
302
|
-
RenderTemplateOption = Annotated[
|
|
303
|
-
str,
|
|
304
|
-
typer.Option(
|
|
305
|
-
'--template',
|
|
306
|
-
'-t',
|
|
307
|
-
metavar='KEY|PATH',
|
|
308
|
-
help='Template key (ddl/view) or path to a Jinja template file.',
|
|
309
|
-
show_default=True,
|
|
310
|
-
),
|
|
311
|
-
]
|
|
312
|
-
|
|
313
|
-
RenderTemplatePathOption = Annotated[
|
|
314
|
-
str | None,
|
|
315
|
-
typer.Option(
|
|
316
|
-
'--template-path',
|
|
317
|
-
metavar='PATH',
|
|
318
|
-
help=(
|
|
319
|
-
'Explicit path to a Jinja template file (overrides template key).'
|
|
320
|
-
),
|
|
321
|
-
),
|
|
322
|
-
]
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
# SECTION: DATA CLASSES ===================================================== #
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
@dataclass(slots=True)
|
|
329
|
-
class CliState:
|
|
330
|
-
"""Mutable container for runtime CLI toggles."""
|
|
331
|
-
|
|
332
|
-
pretty: bool = True
|
|
333
|
-
quiet: bool = False
|
|
334
|
-
verbose: bool = False
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
def _ensure_state(
|
|
341
|
-
ctx: typer.Context,
|
|
342
|
-
) -> CliState:
|
|
343
|
-
"""
|
|
344
|
-
Return the :class:`CliState` stored on the :mod:`typer` context.
|
|
345
|
-
|
|
346
|
-
Parameters
|
|
347
|
-
----------
|
|
348
|
-
ctx : typer.Context
|
|
349
|
-
Typer execution context provided to the command.
|
|
350
|
-
|
|
351
|
-
Returns
|
|
352
|
-
-------
|
|
353
|
-
CliState
|
|
354
|
-
Mutable CLI flag container stored on ``ctx``.
|
|
355
|
-
"""
|
|
356
|
-
if not isinstance(getattr(ctx, 'obj', None), CliState):
|
|
357
|
-
ctx.obj = CliState()
|
|
358
|
-
return ctx.obj
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
def _format_namespace_kwargs(
|
|
362
|
-
*,
|
|
363
|
-
format_value: str | None,
|
|
364
|
-
default: str,
|
|
365
|
-
) -> dict[str, object]:
|
|
366
|
-
"""
|
|
367
|
-
Return common namespace kwargs for format handling.
|
|
368
|
-
|
|
369
|
-
Parameters
|
|
370
|
-
----------
|
|
371
|
-
format_value : str | None
|
|
372
|
-
User-provided format value from the CLI option.
|
|
373
|
-
default : str
|
|
374
|
-
Default format to use when none is provided.
|
|
375
|
-
|
|
376
|
-
Returns
|
|
377
|
-
-------
|
|
378
|
-
dict[str, object]
|
|
379
|
-
Keyword arguments for format-related namespace attributes.
|
|
380
|
-
"""
|
|
381
|
-
return {
|
|
382
|
-
'format': (format_value or default),
|
|
383
|
-
'_format_explicit': (format_value is not None),
|
|
384
|
-
}
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
def _infer_resource_type(
|
|
388
|
-
value: str,
|
|
389
|
-
) -> str:
|
|
390
|
-
"""
|
|
391
|
-
Infer the resource type from a path, URL, or DSN string.
|
|
392
|
-
|
|
393
|
-
Parameters
|
|
394
|
-
----------
|
|
395
|
-
value : str
|
|
396
|
-
Raw CLI argument that represents a source or target.
|
|
397
|
-
|
|
398
|
-
Returns
|
|
399
|
-
-------
|
|
400
|
-
str
|
|
401
|
-
One of ``file``, ``database``, or ``api`` based on heuristics.
|
|
402
|
-
|
|
403
|
-
Raises
|
|
404
|
-
------
|
|
405
|
-
ValueError
|
|
406
|
-
If the resource type could not be inferred.
|
|
407
|
-
"""
|
|
408
|
-
val = (value or '').strip()
|
|
409
|
-
low = val.lower()
|
|
410
|
-
|
|
411
|
-
match (val, low):
|
|
412
|
-
case ('-', _):
|
|
413
|
-
return 'file'
|
|
414
|
-
case (_, inferred) if inferred.startswith(('http://', 'https://')):
|
|
415
|
-
return 'api'
|
|
416
|
-
case (_, inferred) if inferred.startswith(_DB_SCHEMES):
|
|
417
|
-
return 'database'
|
|
418
|
-
|
|
419
|
-
path = Path(val)
|
|
420
|
-
if path.exists() or path.suffix:
|
|
421
|
-
return 'file'
|
|
422
|
-
|
|
423
|
-
raise ValueError(
|
|
424
|
-
'Could not infer resource type. Use --from/--to to specify it.',
|
|
425
|
-
)
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
def _infer_resource_type_or_exit(
|
|
429
|
-
value: str,
|
|
430
|
-
) -> str:
|
|
431
|
-
"""Infer a resource type and map ``ValueError`` to ``BadParameter``.
|
|
432
|
-
|
|
433
|
-
Parameters
|
|
434
|
-
----------
|
|
435
|
-
value : str
|
|
436
|
-
CLI value describing a source/target.
|
|
437
|
-
|
|
438
|
-
Returns
|
|
439
|
-
-------
|
|
440
|
-
str
|
|
441
|
-
Inferred resource type.
|
|
442
|
-
|
|
443
|
-
Raises
|
|
444
|
-
------
|
|
445
|
-
typer.BadParameter
|
|
446
|
-
If heuristics fail to infer a resource type.
|
|
447
|
-
"""
|
|
448
|
-
try:
|
|
449
|
-
return _infer_resource_type(value)
|
|
450
|
-
except ValueError as exc: # pragma: no cover - exercised indirectly
|
|
451
|
-
raise typer.BadParameter(str(exc)) from exc
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
def _infer_resource_type_soft(
|
|
455
|
-
value: str | None,
|
|
456
|
-
) -> str | None:
|
|
457
|
-
"""
|
|
458
|
-
Make a best-effort inference that tolerates inline payloads.
|
|
459
|
-
|
|
460
|
-
Parameters
|
|
461
|
-
----------
|
|
462
|
-
value : str | None
|
|
463
|
-
CLI value describing a source/target.
|
|
464
|
-
|
|
465
|
-
Returns
|
|
466
|
-
-------
|
|
467
|
-
str | None
|
|
468
|
-
Inferred resource type, or ``None`` if inference failed.
|
|
469
|
-
"""
|
|
470
|
-
if value is None:
|
|
471
|
-
return None
|
|
472
|
-
try:
|
|
473
|
-
return _infer_resource_type(value)
|
|
474
|
-
except ValueError:
|
|
475
|
-
return None
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
def _log_inferred_resource(
|
|
479
|
-
state: CliState,
|
|
480
|
-
*,
|
|
481
|
-
role: str,
|
|
482
|
-
value: str,
|
|
483
|
-
resource_type: str | None,
|
|
484
|
-
) -> None:
|
|
485
|
-
"""
|
|
486
|
-
Emit a uniform verbose message for inferred resource types.
|
|
487
|
-
|
|
488
|
-
Parameters
|
|
489
|
-
----------
|
|
490
|
-
state : CliState
|
|
491
|
-
Current CLI state stored on the Typer context.
|
|
492
|
-
role : str
|
|
493
|
-
Friendly label for the resource (e.g., ``source`` or ``target``).
|
|
494
|
-
value : str
|
|
495
|
-
Resource value provided on the CLI.
|
|
496
|
-
resource_type : str | None
|
|
497
|
-
Inferred resource type or ``None`` if not inferred.
|
|
498
|
-
"""
|
|
499
|
-
if not state.verbose or resource_type is None:
|
|
500
|
-
return
|
|
501
|
-
print(
|
|
502
|
-
f'Inferred {role}_type={resource_type} for {role}={value}',
|
|
503
|
-
file=sys.stderr,
|
|
504
|
-
)
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
def _ns(
|
|
508
|
-
**kwargs: object,
|
|
509
|
-
) -> argparse.Namespace:
|
|
510
|
-
"""Build an :class:`argparse.Namespace` for the legacy handlers.
|
|
511
|
-
|
|
512
|
-
Parameters
|
|
513
|
-
----------
|
|
514
|
-
**kwargs : object
|
|
515
|
-
Attributes applied to the resulting namespace.
|
|
516
|
-
|
|
517
|
-
Returns
|
|
518
|
-
-------
|
|
519
|
-
argparse.Namespace
|
|
520
|
-
Namespace compatible with the ``cmd_*`` handler signatures.
|
|
521
|
-
"""
|
|
522
|
-
return argparse.Namespace(**kwargs)
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
def _optional_choice(
|
|
526
|
-
value: str | None,
|
|
527
|
-
choices: Collection[str],
|
|
528
|
-
*,
|
|
529
|
-
label: str,
|
|
530
|
-
) -> str | None:
|
|
531
|
-
"""
|
|
532
|
-
Validate optional CLI choice inputs while preserving ``None``.
|
|
533
|
-
|
|
534
|
-
Parameters
|
|
535
|
-
----------
|
|
536
|
-
value : str | None
|
|
537
|
-
Candidate value provided by the CLI option.
|
|
538
|
-
choices : Collection[str]
|
|
539
|
-
Allowed options for the parameter.
|
|
540
|
-
label : str
|
|
541
|
-
Friendly label rendered in error messages.
|
|
542
|
-
|
|
543
|
-
Returns
|
|
544
|
-
-------
|
|
545
|
-
str | None
|
|
546
|
-
Sanitized choice or ``None`` when the option is omitted.
|
|
547
|
-
"""
|
|
548
|
-
if value is None:
|
|
549
|
-
return None
|
|
550
|
-
return _validate_choice(value, choices, label=label)
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
def _resolve_resource_type(
|
|
554
|
-
*,
|
|
555
|
-
explicit_type: str | None,
|
|
556
|
-
override_type: str | None,
|
|
557
|
-
value: str,
|
|
558
|
-
label: str,
|
|
559
|
-
conflict_error: str | None = None,
|
|
560
|
-
legacy_file_error: str | None = None,
|
|
561
|
-
) -> str:
|
|
562
|
-
"""
|
|
563
|
-
Resolve resource type preference order and validate it.
|
|
564
|
-
|
|
565
|
-
Parameters
|
|
566
|
-
----------
|
|
567
|
-
explicit_type : str | None
|
|
568
|
-
Explicit resource type provided by the user.
|
|
569
|
-
override_type : str | None
|
|
570
|
-
Resource type provided by an overriding option.
|
|
571
|
-
value : str
|
|
572
|
-
Resource value to infer type from if no explicit or override type is
|
|
573
|
-
given.
|
|
574
|
-
label : str
|
|
575
|
-
Friendly label for error messages.
|
|
576
|
-
conflict_error : str | None
|
|
577
|
-
Error message to raise if there is a conflict between explicit and
|
|
578
|
-
override types.
|
|
579
|
-
legacy_file_error : str | None
|
|
580
|
-
Error message to raise if the explicit type is a legacy 'file' type.
|
|
581
|
-
|
|
582
|
-
Returns
|
|
583
|
-
-------
|
|
584
|
-
str
|
|
585
|
-
Resolved and validated resource type.
|
|
586
|
-
|
|
587
|
-
Raises
|
|
588
|
-
------
|
|
589
|
-
typer.BadParameter
|
|
590
|
-
If there is a conflict between explicit and override types, or if the
|
|
591
|
-
explicit type is a legacy 'file' type.
|
|
592
|
-
"""
|
|
593
|
-
if explicit_type is not None:
|
|
594
|
-
if override_type is not None and conflict_error:
|
|
595
|
-
raise typer.BadParameter(conflict_error)
|
|
596
|
-
if legacy_file_error and explicit_type.strip().lower() == 'file':
|
|
597
|
-
raise typer.BadParameter(legacy_file_error)
|
|
598
|
-
candidate = explicit_type
|
|
599
|
-
else:
|
|
600
|
-
candidate = override_type or _infer_resource_type_or_exit(value)
|
|
601
|
-
return _validate_choice(candidate, _SOURCE_CHOICES, label=label)
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
def _stateful_namespace(
|
|
605
|
-
state: CliState,
|
|
606
|
-
*,
|
|
607
|
-
command: str,
|
|
608
|
-
**kwargs: object,
|
|
609
|
-
) -> argparse.Namespace:
|
|
610
|
-
"""
|
|
611
|
-
Attach CLI state toggles to a handler namespace.
|
|
612
|
-
|
|
613
|
-
Parameters
|
|
614
|
-
----------
|
|
615
|
-
state : CliState
|
|
616
|
-
Current CLI state stored on the Typer context.
|
|
617
|
-
command : str
|
|
618
|
-
Logical command name (e.g., ``extract``).
|
|
619
|
-
**kwargs : object
|
|
620
|
-
Additional attributes required by the handler.
|
|
621
|
-
|
|
622
|
-
Returns
|
|
623
|
-
-------
|
|
624
|
-
argparse.Namespace
|
|
625
|
-
Namespace compatible with the ``cmd_*`` handler signatures.
|
|
626
|
-
"""
|
|
627
|
-
return _ns(
|
|
628
|
-
command=command,
|
|
629
|
-
pretty=state.pretty,
|
|
630
|
-
quiet=state.quiet,
|
|
631
|
-
verbose=state.verbose,
|
|
632
|
-
**kwargs,
|
|
633
|
-
)
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
def _validate_choice(
|
|
637
|
-
value: str,
|
|
638
|
-
choices: Collection[str],
|
|
639
|
-
*,
|
|
640
|
-
label: str,
|
|
641
|
-
) -> str:
|
|
642
|
-
"""
|
|
643
|
-
Validate CLI input against a whitelist of choices.
|
|
644
|
-
|
|
645
|
-
Parameters
|
|
646
|
-
----------
|
|
647
|
-
value : str
|
|
648
|
-
Candidate value from the CLI option or argument.
|
|
649
|
-
choices: Collection[str]
|
|
650
|
-
Allowed values for the option.
|
|
651
|
-
label : str
|
|
652
|
-
Friendly label rendered in the validation error message.
|
|
653
|
-
|
|
654
|
-
Returns
|
|
655
|
-
-------
|
|
656
|
-
str
|
|
657
|
-
Sanitized and validated value.
|
|
658
|
-
|
|
659
|
-
Raises
|
|
660
|
-
------
|
|
661
|
-
typer.BadParameter
|
|
662
|
-
If ``value`` is not present in ``choices``.
|
|
663
|
-
"""
|
|
664
|
-
v = (value or '').strip()
|
|
665
|
-
if v in choices:
|
|
666
|
-
return v
|
|
667
|
-
allowed = ', '.join(sorted(choices))
|
|
668
|
-
raise typer.BadParameter(
|
|
669
|
-
f"Invalid {label} '{value}'. Choose from: {allowed}",
|
|
670
|
-
)
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
# SECTION: TYPER APP ======================================================== #
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
# Typer application instance (subcommands are registered below).
|
|
677
|
-
app = typer.Typer(
|
|
678
|
-
name='etlplus',
|
|
679
|
-
# help='ETLPlus - A Swiss Army knife for simple ETL operations.',
|
|
680
|
-
help=CLI_DESCRIPTION,
|
|
681
|
-
epilog=CLI_EPILOG,
|
|
682
|
-
add_completion=True,
|
|
683
|
-
no_args_is_help=False,
|
|
684
|
-
rich_markup_mode='markdown',
|
|
685
|
-
)
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
@app.callback(invoke_without_command=True)
|
|
689
|
-
def _root(
|
|
690
|
-
ctx: typer.Context,
|
|
691
|
-
version: bool = typer.Option(
|
|
692
|
-
False,
|
|
693
|
-
'--version',
|
|
694
|
-
'-V',
|
|
695
|
-
is_eager=True,
|
|
696
|
-
help='Show the version and exit.',
|
|
697
|
-
),
|
|
698
|
-
pretty: bool = typer.Option(
|
|
699
|
-
True,
|
|
700
|
-
'--pretty/--no-pretty',
|
|
701
|
-
help='Pretty-print JSON output (default: pretty).',
|
|
702
|
-
),
|
|
703
|
-
quiet: bool = typer.Option(
|
|
704
|
-
False,
|
|
705
|
-
'--quiet',
|
|
706
|
-
'-q',
|
|
707
|
-
help='Suppress warnings and non-essential output.',
|
|
708
|
-
),
|
|
709
|
-
verbose: bool = typer.Option(
|
|
710
|
-
False,
|
|
711
|
-
'--verbose',
|
|
712
|
-
'-v',
|
|
713
|
-
help='Emit extra diagnostics to stderr.',
|
|
714
|
-
),
|
|
715
|
-
) -> None:
|
|
716
|
-
"""
|
|
717
|
-
Seed the Typer context with runtime flags and handle root-only options.
|
|
718
|
-
|
|
719
|
-
Parameters
|
|
720
|
-
----------
|
|
721
|
-
ctx : typer.Context
|
|
722
|
-
Typer execution context provided to the command.
|
|
723
|
-
version : bool
|
|
724
|
-
If True, print the etlplus version and exit.
|
|
725
|
-
pretty : bool
|
|
726
|
-
Whether to pretty-print JSON output.
|
|
727
|
-
quiet : bool
|
|
728
|
-
Whether to suppress warnings and non-essential output.
|
|
729
|
-
verbose : bool
|
|
730
|
-
Whether to emit extra diagnostics to stderr.
|
|
731
|
-
|
|
732
|
-
Raises
|
|
733
|
-
------
|
|
734
|
-
typer.Exit
|
|
735
|
-
If ``--version`` is provided or no subcommand is invoked.
|
|
736
|
-
"""
|
|
737
|
-
ctx.obj = CliState(pretty=pretty, quiet=quiet, verbose=verbose)
|
|
738
|
-
|
|
739
|
-
if version:
|
|
740
|
-
typer.echo(f'etlplus {__version__}')
|
|
741
|
-
raise typer.Exit(0)
|
|
742
|
-
|
|
743
|
-
if ctx.invoked_subcommand is None and not ctx.resilient_parsing:
|
|
744
|
-
typer.echo(ctx.command.get_help(ctx))
|
|
745
|
-
raise typer.Exit(0)
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
@app.command('extract')
|
|
749
|
-
def extract_cmd(
|
|
750
|
-
ctx: typer.Context,
|
|
751
|
-
source: SourceInputArg,
|
|
752
|
-
source_format: SourceFormatOption | None = None,
|
|
753
|
-
source_type: SourceOverrideOption | None = None,
|
|
754
|
-
) -> int:
|
|
755
|
-
"""
|
|
756
|
-
Extract data from files, databases, or REST APIs.
|
|
757
|
-
|
|
758
|
-
Parameters
|
|
759
|
-
----------
|
|
760
|
-
ctx : typer.Context
|
|
761
|
-
Typer execution context provided to the command.
|
|
762
|
-
source : SourceInputArg
|
|
763
|
-
Data source (file path, URL, DSN, or ``-`` for stdin).
|
|
764
|
-
source_format : SourceFormatOption | None, optional
|
|
765
|
-
Payload format when not a file.
|
|
766
|
-
source_type : SourceOverrideOption | None, optional
|
|
767
|
-
Override the inferred source type.
|
|
768
|
-
|
|
769
|
-
Returns
|
|
770
|
-
-------
|
|
771
|
-
int
|
|
772
|
-
Zero on success.
|
|
773
|
-
|
|
774
|
-
Examples
|
|
775
|
-
--------
|
|
776
|
-
- Extract from a file (type inferred):
|
|
777
|
-
etlplus extract in.csv
|
|
778
|
-
- Extract from a file (explicit via flag):
|
|
779
|
-
etlplus extract --from file in.csv
|
|
780
|
-
- Extract from an API:
|
|
781
|
-
etlplus extract https://example.com/data.json
|
|
782
|
-
etlplus extract --from api https://example.com/data.json
|
|
783
|
-
- Extract from a database DSN:
|
|
784
|
-
etlplus extract --from database postgresql://user:pass@host/db
|
|
785
|
-
- Pipe into transform/load:
|
|
786
|
-
etlplus extract in.csv \
|
|
787
|
-
| etlplus transform --operations '{"select":["a"]}'
|
|
788
|
-
|
|
789
|
-
Notes
|
|
790
|
-
-----
|
|
791
|
-
- The ``extract`` command always writes JSON to stdout.
|
|
792
|
-
- CSV output is unsupported for this command.
|
|
793
|
-
- Use shell redirection (``>``) or pipelines to persist the output.
|
|
794
|
-
"""
|
|
795
|
-
state = _ensure_state(ctx)
|
|
796
|
-
|
|
797
|
-
source_type = _optional_choice(
|
|
798
|
-
source_type,
|
|
799
|
-
_SOURCE_CHOICES,
|
|
800
|
-
label='source_type',
|
|
801
|
-
)
|
|
802
|
-
source_format = _optional_choice(
|
|
803
|
-
source_format,
|
|
804
|
-
_FORMAT_CHOICES,
|
|
805
|
-
label='source_format',
|
|
806
|
-
)
|
|
807
|
-
|
|
808
|
-
resolved_source = source
|
|
809
|
-
resolved_source_type = source_type or _infer_resource_type_or_exit(
|
|
810
|
-
resolved_source,
|
|
811
|
-
)
|
|
812
|
-
|
|
813
|
-
_log_inferred_resource(
|
|
814
|
-
state,
|
|
815
|
-
role='source',
|
|
816
|
-
value=resolved_source,
|
|
817
|
-
resource_type=resolved_source_type,
|
|
818
|
-
)
|
|
819
|
-
|
|
820
|
-
format_kwargs = _format_namespace_kwargs(
|
|
821
|
-
format_value=source_format,
|
|
822
|
-
default='json',
|
|
823
|
-
)
|
|
824
|
-
ns = _stateful_namespace(
|
|
825
|
-
state,
|
|
826
|
-
command='extract',
|
|
827
|
-
source_type=resolved_source_type,
|
|
828
|
-
source=resolved_source,
|
|
829
|
-
**format_kwargs,
|
|
830
|
-
)
|
|
831
|
-
return int(cmd_extract(ns))
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
@app.command('list')
|
|
835
|
-
def list_cmd(
|
|
836
|
-
ctx: typer.Context,
|
|
837
|
-
config: PipelineConfigOption,
|
|
838
|
-
jobs: bool = typer.Option(
|
|
839
|
-
False,
|
|
840
|
-
'--jobs',
|
|
841
|
-
help='List available job names and exit',
|
|
842
|
-
),
|
|
843
|
-
pipelines: bool = typer.Option(
|
|
844
|
-
False,
|
|
845
|
-
'--pipelines',
|
|
846
|
-
help='List ETL pipelines',
|
|
847
|
-
),
|
|
848
|
-
sources: bool = typer.Option(
|
|
849
|
-
False,
|
|
850
|
-
'--sources',
|
|
851
|
-
help='List data sources',
|
|
852
|
-
),
|
|
853
|
-
summary: bool = typer.Option(
|
|
854
|
-
False,
|
|
855
|
-
'--summary',
|
|
856
|
-
help='Show pipeline summary (name, version, sources, targets, jobs)',
|
|
857
|
-
),
|
|
858
|
-
targets: bool = typer.Option(
|
|
859
|
-
False,
|
|
860
|
-
'--targets',
|
|
861
|
-
help='List data targets',
|
|
862
|
-
),
|
|
863
|
-
transforms: bool = typer.Option(
|
|
864
|
-
False,
|
|
865
|
-
'--transforms',
|
|
866
|
-
help='List data transforms',
|
|
867
|
-
),
|
|
868
|
-
) -> int:
|
|
869
|
-
"""
|
|
870
|
-
Print ETL entities from a pipeline YAML configuration.
|
|
871
|
-
|
|
872
|
-
Parameters
|
|
873
|
-
----------
|
|
874
|
-
ctx : typer.Context
|
|
875
|
-
Typer execution context provided to the command.
|
|
876
|
-
config : PipelineConfigOption
|
|
877
|
-
Path to pipeline YAML configuration file.
|
|
878
|
-
jobs : bool, optional
|
|
879
|
-
If True, list available job names and exit.
|
|
880
|
-
pipelines : bool, optional
|
|
881
|
-
If True, list ETL pipelines.
|
|
882
|
-
sources : bool, optional
|
|
883
|
-
If True, list data sources.
|
|
884
|
-
summary : bool, optional
|
|
885
|
-
If True, show pipeline summary (name, version, sources, targets, jobs).
|
|
886
|
-
targets : bool, optional
|
|
887
|
-
If True, list data targets.
|
|
888
|
-
transforms : bool, optional
|
|
889
|
-
If True, list data transforms.
|
|
890
|
-
|
|
891
|
-
Returns
|
|
892
|
-
-------
|
|
893
|
-
int
|
|
894
|
-
Zero on success.
|
|
895
|
-
"""
|
|
896
|
-
state = _ensure_state(ctx)
|
|
897
|
-
ns = _stateful_namespace(
|
|
898
|
-
state,
|
|
899
|
-
command='list',
|
|
900
|
-
config=config,
|
|
901
|
-
summary=summary,
|
|
902
|
-
pipelines=pipelines,
|
|
903
|
-
jobs=jobs,
|
|
904
|
-
sources=sources,
|
|
905
|
-
targets=targets,
|
|
906
|
-
transforms=transforms,
|
|
907
|
-
)
|
|
908
|
-
return int(cmd_list(ns))
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
@app.command('load')
|
|
912
|
-
def load_cmd(
|
|
913
|
-
ctx: typer.Context,
|
|
914
|
-
target: TargetInputArg,
|
|
915
|
-
source_format: StdinFormatOption | None = None,
|
|
916
|
-
target_format: TargetFormatOption | None = None,
|
|
917
|
-
target_type: TargetOverrideOption | None = None,
|
|
918
|
-
) -> int:
|
|
919
|
-
"""
|
|
920
|
-
Load data into a file, database, or REST API.
|
|
921
|
-
|
|
922
|
-
Parameters
|
|
923
|
-
----------
|
|
924
|
-
ctx : typer.Context
|
|
925
|
-
Typer execution context provided to the command.
|
|
926
|
-
target : TargetInputArg
|
|
927
|
-
Load destination (file path, URL/DSN, or ``-`` for stdout).
|
|
928
|
-
source_format : StdinFormatOption | None, optional
|
|
929
|
-
Hint for parsing stdin payloads (json or csv).
|
|
930
|
-
target_format : TargetFormatOption | None, optional
|
|
931
|
-
Payload format when not a file target (or when TARGET is ``-``).
|
|
932
|
-
target_type : TargetOverrideOption | None, optional
|
|
933
|
-
Override the inferred target type.
|
|
934
|
-
|
|
935
|
-
Returns
|
|
936
|
-
-------
|
|
937
|
-
int
|
|
938
|
-
Zero on success.
|
|
939
|
-
|
|
940
|
-
Examples
|
|
941
|
-
--------
|
|
942
|
-
- Pipe into a file:
|
|
943
|
-
etlplus extract in.csv \
|
|
944
|
-
| etlplus transform --operations '{"select":["a"]}' \
|
|
945
|
-
| etlplus load --to file out.json
|
|
946
|
-
- Read from stdin and write to a file:
|
|
947
|
-
etlplus load out.json
|
|
948
|
-
- Write to stdout:
|
|
949
|
-
etlplus load --to file -
|
|
950
|
-
|
|
951
|
-
Notes
|
|
952
|
-
-----
|
|
953
|
-
- The ``load`` command reads JSON from stdin.
|
|
954
|
-
- CSV input is unsupported unless ``--source-format csv`` is provided.
|
|
955
|
-
- Convert upstream before piping into ``load`` when working with other
|
|
956
|
-
formats.
|
|
957
|
-
"""
|
|
958
|
-
state = _ensure_state(ctx)
|
|
959
|
-
|
|
960
|
-
source_format = _optional_choice(
|
|
961
|
-
source_format,
|
|
962
|
-
_FORMAT_CHOICES,
|
|
963
|
-
label='source_format',
|
|
964
|
-
)
|
|
965
|
-
target_type = _optional_choice(
|
|
966
|
-
target_type,
|
|
967
|
-
_SOURCE_CHOICES,
|
|
968
|
-
label='target_type',
|
|
969
|
-
)
|
|
970
|
-
target_format = _optional_choice(
|
|
971
|
-
target_format,
|
|
972
|
-
_FORMAT_CHOICES,
|
|
973
|
-
label='target_format',
|
|
974
|
-
)
|
|
975
|
-
|
|
976
|
-
resolved_target = target
|
|
977
|
-
resolved_target_type = target_type or _infer_resource_type_or_exit(
|
|
978
|
-
resolved_target,
|
|
979
|
-
)
|
|
980
|
-
|
|
981
|
-
resolved_source_value = '-'
|
|
982
|
-
resolved_source_type = _infer_resource_type_soft(resolved_source_value)
|
|
983
|
-
|
|
984
|
-
_log_inferred_resource(
|
|
985
|
-
state,
|
|
986
|
-
role='source',
|
|
987
|
-
value=resolved_source_value,
|
|
988
|
-
resource_type=resolved_source_type,
|
|
989
|
-
)
|
|
990
|
-
_log_inferred_resource(
|
|
991
|
-
state,
|
|
992
|
-
role='target',
|
|
993
|
-
value=resolved_target,
|
|
994
|
-
resource_type=resolved_target_type,
|
|
995
|
-
)
|
|
996
|
-
|
|
997
|
-
format_kwargs = _format_namespace_kwargs(
|
|
998
|
-
format_value=target_format,
|
|
999
|
-
default='json',
|
|
1000
|
-
)
|
|
1001
|
-
ns = _stateful_namespace(
|
|
1002
|
-
state,
|
|
1003
|
-
command='load',
|
|
1004
|
-
source=resolved_source_value,
|
|
1005
|
-
source_format=source_format,
|
|
1006
|
-
target_type=resolved_target_type,
|
|
1007
|
-
target=resolved_target,
|
|
1008
|
-
**format_kwargs,
|
|
1009
|
-
)
|
|
1010
|
-
return int(cmd_load(ns))
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
@app.command('pipeline')
|
|
1014
|
-
def pipeline_cmd(
|
|
1015
|
-
ctx: typer.Context,
|
|
1016
|
-
config: PipelineConfigOption,
|
|
1017
|
-
job: str | None = typer.Option(
|
|
1018
|
-
None,
|
|
1019
|
-
'--job',
|
|
1020
|
-
metavar='JOB',
|
|
1021
|
-
help='Run a specific job by name',
|
|
1022
|
-
),
|
|
1023
|
-
jobs: bool = typer.Option(
|
|
1024
|
-
False,
|
|
1025
|
-
'--jobs',
|
|
1026
|
-
help='List available job names and exit',
|
|
1027
|
-
),
|
|
1028
|
-
pipeline: str | None = typer.Option(
|
|
1029
|
-
None,
|
|
1030
|
-
'--pipeline',
|
|
1031
|
-
help='Run a specific pipeline by name',
|
|
1032
|
-
),
|
|
1033
|
-
) -> int:
|
|
1034
|
-
"""
|
|
1035
|
-
Deprecated wrapper to inspect or run a pipeline YAML configuration.
|
|
1036
|
-
|
|
1037
|
-
Parameters
|
|
1038
|
-
----------
|
|
1039
|
-
ctx : typer.Context
|
|
1040
|
-
Typer execution context provided to the command.
|
|
1041
|
-
config : PipelineConfigOption
|
|
1042
|
-
Path to pipeline YAML configuration file.
|
|
1043
|
-
job : str | None, optional
|
|
1044
|
-
Name of a specific job to run.
|
|
1045
|
-
jobs : bool, optional
|
|
1046
|
-
If True, list available job names and exit.
|
|
1047
|
-
pipeline : str | None, optional
|
|
1048
|
-
Name of a specific pipeline to run.
|
|
1049
|
-
|
|
1050
|
-
Returns
|
|
1051
|
-
-------
|
|
1052
|
-
int
|
|
1053
|
-
Zero on success.
|
|
1054
|
-
"""
|
|
1055
|
-
state = _ensure_state(ctx)
|
|
1056
|
-
run_target = job or pipeline
|
|
1057
|
-
ns = _stateful_namespace(
|
|
1058
|
-
state,
|
|
1059
|
-
command='pipeline',
|
|
1060
|
-
config=config,
|
|
1061
|
-
list=jobs,
|
|
1062
|
-
run=run_target,
|
|
1063
|
-
)
|
|
1064
|
-
return int(cmd_pipeline(ns))
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
@app.command('render')
|
|
1068
|
-
def render_cmd(
|
|
1069
|
-
ctx: typer.Context,
|
|
1070
|
-
config: RenderConfigOption = None,
|
|
1071
|
-
spec: RenderSpecOption = None,
|
|
1072
|
-
table: RenderTableOption = None,
|
|
1073
|
-
template: RenderTemplateOption = 'ddl',
|
|
1074
|
-
template_path: RenderTemplatePathOption = None,
|
|
1075
|
-
output: RenderOutputOption = None,
|
|
1076
|
-
) -> int:
|
|
1077
|
-
"""
|
|
1078
|
-
Render SQL DDL from table schemas defined in YAML/JSON configs.
|
|
1079
|
-
|
|
1080
|
-
Parameters
|
|
1081
|
-
----------
|
|
1082
|
-
ctx : typer.Context
|
|
1083
|
-
Typer execution context provided to the command.
|
|
1084
|
-
config : RenderConfigOption, optional
|
|
1085
|
-
Pipeline YAML containing ``table_schemas`` entries.
|
|
1086
|
-
spec : RenderSpecOption, optional
|
|
1087
|
-
Standalone table spec file (.yml/.yaml/.json).
|
|
1088
|
-
table : RenderTableOption, optional
|
|
1089
|
-
Filter to a single table name within the available specs.
|
|
1090
|
-
template : RenderTemplateOption, optional
|
|
1091
|
-
Built-in template key or template file path.
|
|
1092
|
-
template_path : RenderTemplatePathOption, optional
|
|
1093
|
-
Explicit template file path to render with.
|
|
1094
|
-
output : RenderOutputOption, optional
|
|
1095
|
-
Path to write SQL to (stdout when omitted).
|
|
1096
|
-
|
|
1097
|
-
Returns
|
|
1098
|
-
-------
|
|
1099
|
-
int
|
|
1100
|
-
Zero on success.
|
|
1101
|
-
"""
|
|
1102
|
-
state = _ensure_state(ctx)
|
|
1103
|
-
ns = _stateful_namespace(
|
|
1104
|
-
state,
|
|
1105
|
-
command='render',
|
|
1106
|
-
config=config,
|
|
1107
|
-
spec=spec,
|
|
1108
|
-
table=table,
|
|
1109
|
-
template=template,
|
|
1110
|
-
template_path=template_path,
|
|
1111
|
-
output=output,
|
|
1112
|
-
)
|
|
1113
|
-
return int(cmd_render(ns))
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
@app.command('run')
|
|
1117
|
-
def run_cmd(
|
|
1118
|
-
ctx: typer.Context,
|
|
1119
|
-
config: PipelineConfigOption,
|
|
1120
|
-
job: str | None = typer.Option(
|
|
1121
|
-
None,
|
|
1122
|
-
'-j',
|
|
1123
|
-
'--job',
|
|
1124
|
-
help='Name of the job to run',
|
|
1125
|
-
),
|
|
1126
|
-
pipeline: str | None = typer.Option(
|
|
1127
|
-
None,
|
|
1128
|
-
'-p',
|
|
1129
|
-
'--pipeline',
|
|
1130
|
-
help='Name of the pipeline to run',
|
|
1131
|
-
),
|
|
1132
|
-
) -> int:
|
|
1133
|
-
"""
|
|
1134
|
-
Execute an ETL job or pipeline from a YAML configuration.
|
|
1135
|
-
|
|
1136
|
-
Parameters
|
|
1137
|
-
----------
|
|
1138
|
-
ctx : typer.Context
|
|
1139
|
-
Typer execution context provided to the command.
|
|
1140
|
-
config : PipelineConfigOption
|
|
1141
|
-
Path to pipeline YAML configuration file.
|
|
1142
|
-
job : str | None, optional
|
|
1143
|
-
Name of the job to run.
|
|
1144
|
-
pipeline : str | None, optional
|
|
1145
|
-
Name of the pipeline to run.
|
|
1146
|
-
|
|
1147
|
-
Returns
|
|
1148
|
-
-------
|
|
1149
|
-
int
|
|
1150
|
-
Zero on success.
|
|
1151
|
-
"""
|
|
1152
|
-
state = _ensure_state(ctx)
|
|
1153
|
-
ns = _stateful_namespace(
|
|
1154
|
-
state,
|
|
1155
|
-
command='run',
|
|
1156
|
-
config=config,
|
|
1157
|
-
job=job,
|
|
1158
|
-
pipeline=pipeline,
|
|
1159
|
-
)
|
|
1160
|
-
return int(cmd_run(ns))
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
@app.command('transform')
|
|
1164
|
-
def transform_cmd(
|
|
1165
|
-
ctx: typer.Context,
|
|
1166
|
-
operations: OperationsJSONOption = '{}',
|
|
1167
|
-
source: StreamingSourceArg = '-',
|
|
1168
|
-
source_format: SourceFormatOption | None = None,
|
|
1169
|
-
source_type: SourceOverrideOption | None = None,
|
|
1170
|
-
target: TargetPathOption | None = None,
|
|
1171
|
-
target_format: TargetFormatOption | None = None,
|
|
1172
|
-
target_type: TargetOverrideOption | None = None,
|
|
1173
|
-
) -> int:
|
|
1174
|
-
"""
|
|
1175
|
-
Transform records using JSON-described operations.
|
|
1176
|
-
|
|
1177
|
-
Parameters
|
|
1178
|
-
----------
|
|
1179
|
-
ctx : typer.Context
|
|
1180
|
-
Typer execution context provided to the command.
|
|
1181
|
-
operations : OperationsJSONOption, optional
|
|
1182
|
-
Transformation operations as a JSON string.
|
|
1183
|
-
source : StreamingSourceArg, optional
|
|
1184
|
-
Data source (file path or ``-`` for stdin).
|
|
1185
|
-
source_format : SourceFormatOption | None, optional
|
|
1186
|
-
Input payload format when not a file (or when SOURCE is -).
|
|
1187
|
-
source_type : SourceOverrideOption | None, optional
|
|
1188
|
-
Override the inferred source type.
|
|
1189
|
-
target : TargetPathOption | None, optional
|
|
1190
|
-
Optional output path. Use ``-`` for stdout.
|
|
1191
|
-
target_format : TargetFormatOption | None, optional
|
|
1192
|
-
Output payload format when not a file target (or when OUTPUT is -).
|
|
1193
|
-
Accepts ``--target-format``.
|
|
1194
|
-
target_type : TargetOverrideOption | None, optional
|
|
1195
|
-
Override the inferred target type.
|
|
1196
|
-
|
|
1197
|
-
Returns
|
|
1198
|
-
-------
|
|
1199
|
-
int
|
|
1200
|
-
Zero on success.
|
|
1201
|
-
|
|
1202
|
-
Examples
|
|
1203
|
-
--------
|
|
1204
|
-
- Transform data from a file and write to another file:
|
|
1205
|
-
etlplus transform --from file in.json \
|
|
1206
|
-
--operations '{"select": ["id", "name"]}' \
|
|
1207
|
-
--to file out.json
|
|
1208
|
-
- Transform data from stdin and write to stdout:
|
|
1209
|
-
cat in.json \
|
|
1210
|
-
| etlplus transform \
|
|
1211
|
-
--operations '{"filter": {"field": "age", "gt": 30}}'
|
|
1212
|
-
- Transform data from a file and write to stdout:
|
|
1213
|
-
etlplus transform --from file in.csv \
|
|
1214
|
-
--source-format csv \
|
|
1215
|
-
--operations '{"select": ["id", "email"]}'
|
|
1216
|
-
- Transform data from stdin and write to a file:
|
|
1217
|
-
cat in.json \
|
|
1218
|
-
| etlplus transform --operations '{"sort": ["-created_at"]}' \
|
|
1219
|
-
--to file out.json
|
|
1220
|
-
|
|
1221
|
-
Notes
|
|
1222
|
-
-----
|
|
1223
|
-
- The ``transform`` command reads JSON from stdin when SOURCE is ``-``.
|
|
1224
|
-
- CSV input is unsupported for this command.
|
|
1225
|
-
- Convert upstream before piping into ``transform``.
|
|
1226
|
-
"""
|
|
1227
|
-
state = _ensure_state(ctx)
|
|
1228
|
-
|
|
1229
|
-
source_format = _optional_choice(
|
|
1230
|
-
source_format,
|
|
1231
|
-
_FORMAT_CHOICES,
|
|
1232
|
-
label='source_format',
|
|
1233
|
-
)
|
|
1234
|
-
source_type = _optional_choice(
|
|
1235
|
-
source_type,
|
|
1236
|
-
_SOURCE_CHOICES,
|
|
1237
|
-
label='source_type',
|
|
1238
|
-
)
|
|
1239
|
-
target_format = _optional_choice(
|
|
1240
|
-
target_format,
|
|
1241
|
-
_FORMAT_CHOICES,
|
|
1242
|
-
label='target_format',
|
|
1243
|
-
)
|
|
1244
|
-
target_format_kwargs = _format_namespace_kwargs(
|
|
1245
|
-
format_value=target_format,
|
|
1246
|
-
default='json',
|
|
1247
|
-
)
|
|
1248
|
-
target_type = _optional_choice(
|
|
1249
|
-
target_type,
|
|
1250
|
-
_SOURCE_CHOICES,
|
|
1251
|
-
label='target_type',
|
|
1252
|
-
)
|
|
1253
|
-
|
|
1254
|
-
resolved_source_type = source_type or _infer_resource_type_soft(source)
|
|
1255
|
-
resolved_source_value = source if source is not None else '-'
|
|
1256
|
-
resolved_target_value = target if target is not None else '-'
|
|
1257
|
-
|
|
1258
|
-
if resolved_source_type is not None:
|
|
1259
|
-
resolved_source_type = _validate_choice(
|
|
1260
|
-
resolved_source_type,
|
|
1261
|
-
_SOURCE_CHOICES,
|
|
1262
|
-
label='source_type',
|
|
1263
|
-
)
|
|
1264
|
-
|
|
1265
|
-
resolved_target_type = _resolve_resource_type(
|
|
1266
|
-
explicit_type=None,
|
|
1267
|
-
override_type=target_type,
|
|
1268
|
-
value=resolved_target_value,
|
|
1269
|
-
label='target_type',
|
|
1270
|
-
)
|
|
1271
|
-
|
|
1272
|
-
_log_inferred_resource(
|
|
1273
|
-
state,
|
|
1274
|
-
role='source',
|
|
1275
|
-
value=resolved_source_value,
|
|
1276
|
-
resource_type=resolved_source_type,
|
|
1277
|
-
)
|
|
1278
|
-
_log_inferred_resource(
|
|
1279
|
-
state,
|
|
1280
|
-
role='target',
|
|
1281
|
-
value=resolved_target_value,
|
|
1282
|
-
resource_type=resolved_target_type,
|
|
1283
|
-
)
|
|
1284
|
-
|
|
1285
|
-
ns = _stateful_namespace(
|
|
1286
|
-
state,
|
|
1287
|
-
command='transform',
|
|
1288
|
-
source=resolved_source_value,
|
|
1289
|
-
source_type=resolved_source_type,
|
|
1290
|
-
operations=json_type(operations),
|
|
1291
|
-
target=resolved_target_value,
|
|
1292
|
-
source_format=source_format,
|
|
1293
|
-
target_type=resolved_target_type,
|
|
1294
|
-
target_format=target_format_kwargs['format'],
|
|
1295
|
-
**target_format_kwargs,
|
|
1296
|
-
)
|
|
1297
|
-
return int(cmd_transform(ns))
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
@app.command('validate')
|
|
1301
|
-
def validate_cmd(
|
|
1302
|
-
ctx: typer.Context,
|
|
1303
|
-
rules: RulesJSONOption = '{}',
|
|
1304
|
-
source: StreamingSourceArg = '-',
|
|
1305
|
-
source_format: SourceFormatOption | None = None,
|
|
1306
|
-
source_type: SourceOverrideOption | None = None,
|
|
1307
|
-
target: TargetPathOption | None = None,
|
|
1308
|
-
) -> int:
|
|
1309
|
-
"""
|
|
1310
|
-
Validate data against JSON-described rules.
|
|
1311
|
-
|
|
1312
|
-
Parameters
|
|
1313
|
-
----------
|
|
1314
|
-
ctx : typer.Context
|
|
1315
|
-
Typer execution context provided to the command.
|
|
1316
|
-
rules : RulesJSONOption, optional
|
|
1317
|
-
Validation rules as a JSON string.
|
|
1318
|
-
source : StreamingSourceArg, optional
|
|
1319
|
-
Data source (file path or ``-`` for stdin).
|
|
1320
|
-
source_format : SourceFormatOption | None, optional
|
|
1321
|
-
Optional stdin format hint (JSON or CSV) when SOURCE is ``-``.
|
|
1322
|
-
source_type : SourceOverrideOption | None, optional
|
|
1323
|
-
Override the inferred source type when heuristics fail.
|
|
1324
|
-
target : TargetPathOption | None, optional
|
|
1325
|
-
Optional output path. Use ``-`` for stdout.
|
|
1326
|
-
|
|
1327
|
-
Returns
|
|
1328
|
-
-------
|
|
1329
|
-
int
|
|
1330
|
-
Zero on success.
|
|
1331
|
-
"""
|
|
1332
|
-
source_format = _optional_choice(
|
|
1333
|
-
source_format,
|
|
1334
|
-
_FORMAT_CHOICES,
|
|
1335
|
-
label='source_format',
|
|
1336
|
-
)
|
|
1337
|
-
source_type = _optional_choice(
|
|
1338
|
-
source_type,
|
|
1339
|
-
_SOURCE_CHOICES,
|
|
1340
|
-
label='source_type',
|
|
1341
|
-
)
|
|
1342
|
-
source_format_kwargs = _format_namespace_kwargs(
|
|
1343
|
-
format_value=source_format,
|
|
1344
|
-
default='json',
|
|
1345
|
-
)
|
|
1346
|
-
|
|
1347
|
-
state = _ensure_state(ctx)
|
|
1348
|
-
resolved_source_type = source_type or _infer_resource_type_soft(source)
|
|
1349
|
-
|
|
1350
|
-
_log_inferred_resource(
|
|
1351
|
-
state,
|
|
1352
|
-
role='source',
|
|
1353
|
-
value=source,
|
|
1354
|
-
resource_type=resolved_source_type,
|
|
1355
|
-
)
|
|
1356
|
-
|
|
1357
|
-
ns = _stateful_namespace(
|
|
1358
|
-
state,
|
|
1359
|
-
command='validate',
|
|
1360
|
-
source=source,
|
|
1361
|
-
source_type=resolved_source_type,
|
|
1362
|
-
rules=json_type(rules), # convert CLI string to dict
|
|
1363
|
-
target=target,
|
|
1364
|
-
source_format=source_format,
|
|
1365
|
-
**source_format_kwargs,
|
|
1366
|
-
)
|
|
1367
|
-
return int(cmd_validate(ns))
|