etlplus 0.3.23__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/__main__.py +1 -2
- etlplus/cli/__init__.py +15 -0
- etlplus/cli/app.py +1000 -0
- etlplus/cli/handlers.py +686 -0
- etlplus/cli/main.py +404 -0
- {etlplus-0.3.23.dist-info → etlplus-0.4.1.dist-info}/METADATA +2 -1
- {etlplus-0.3.23.dist-info → etlplus-0.4.1.dist-info}/RECORD +11 -8
- etlplus/cli.py +0 -868
- {etlplus-0.3.23.dist-info → etlplus-0.4.1.dist-info}/WHEEL +0 -0
- {etlplus-0.3.23.dist-info → etlplus-0.4.1.dist-info}/entry_points.txt +0 -0
- {etlplus-0.3.23.dist-info → etlplus-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.3.23.dist-info → etlplus-0.4.1.dist-info}/top_level.txt +0 -0
etlplus/cli.py
DELETED
|
@@ -1,868 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
:mod:`etlplus.cli` module.
|
|
3
|
-
|
|
4
|
-
Entry point for the ``etlplus`` command-line Interface (CLI).
|
|
5
|
-
|
|
6
|
-
This module wires subcommands via ``argparse`` using
|
|
7
|
-
``set_defaults(func=...)`` so dispatch is clean and extensible.
|
|
8
|
-
|
|
9
|
-
Subcommands
|
|
10
|
-
-----------
|
|
11
|
-
- ``extract``: extract data from files, databases, or REST APIs
|
|
12
|
-
- ``validate``: validate data against rules
|
|
13
|
-
- ``transform``: transform records
|
|
14
|
-
- ``load``: load data to files, databases, or REST APIs
|
|
15
|
-
"""
|
|
16
|
-
|
|
17
|
-
from __future__ import annotations
|
|
18
|
-
|
|
19
|
-
import argparse
|
|
20
|
-
import csv
|
|
21
|
-
import os
|
|
22
|
-
import sys
|
|
23
|
-
from collections.abc import Sequence
|
|
24
|
-
from pathlib import Path
|
|
25
|
-
from typing import Any
|
|
26
|
-
from typing import Literal
|
|
27
|
-
from typing import cast
|
|
28
|
-
|
|
29
|
-
from . import __version__
|
|
30
|
-
from .config import PipelineConfig
|
|
31
|
-
from .config import load_pipeline_config
|
|
32
|
-
from .enums import DataConnectorType
|
|
33
|
-
from .enums import FileFormat
|
|
34
|
-
from .extract import extract
|
|
35
|
-
from .file import File
|
|
36
|
-
from .load import load
|
|
37
|
-
from .run import run
|
|
38
|
-
from .transform import transform
|
|
39
|
-
from .types import JSONData
|
|
40
|
-
from .utils import json_type
|
|
41
|
-
from .utils import print_json
|
|
42
|
-
from .validate import validate
|
|
43
|
-
|
|
44
|
-
# SECTION: CONSTANTS ======================================================= #
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
CLI_DESCRIPTION = '\n'.join(
|
|
48
|
-
[
|
|
49
|
-
'ETLPlus - A Swiss Army knife for simple ETL operations.',
|
|
50
|
-
'',
|
|
51
|
-
' Provide a subcommand and options. Examples:',
|
|
52
|
-
'',
|
|
53
|
-
' etlplus extract file in.csv -o out.json',
|
|
54
|
-
' etlplus validate in.json --rules \'{"required": ["id"]}\'',
|
|
55
|
-
' etlplus transform in.json --operations \'{"select": ["id"]}\'',
|
|
56
|
-
' etlplus load in.json file out.json',
|
|
57
|
-
'',
|
|
58
|
-
' Enforce error if --format is provided for files. Examples:',
|
|
59
|
-
'',
|
|
60
|
-
' etlplus extract file in.csv --format csv --strict-format',
|
|
61
|
-
' etlplus load in.json file out.csv --format csv --strict-format',
|
|
62
|
-
],
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
CLI_EPILOG = '\n'.join(
|
|
66
|
-
[
|
|
67
|
-
'Environment:',
|
|
68
|
-
(
|
|
69
|
-
' ETLPLUS_FORMAT_BEHAVIOR controls behavior when '
|
|
70
|
-
'--format is provided for files.'
|
|
71
|
-
),
|
|
72
|
-
' Values:',
|
|
73
|
-
' - error|fail|strict: treat as error',
|
|
74
|
-
' - warn (default): print a warning',
|
|
75
|
-
' - ignore|silent: no message',
|
|
76
|
-
'',
|
|
77
|
-
'Note:',
|
|
78
|
-
' --strict-format overrides the environment behavior.',
|
|
79
|
-
],
|
|
80
|
-
)
|
|
81
|
-
|
|
82
|
-
FORMAT_ENV_KEY = 'ETLPLUS_FORMAT_BEHAVIOR'
|
|
83
|
-
|
|
84
|
-
PROJECT_URL = 'https://github.com/Dagitali/ETLPlus'
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
# SECTION: INTERNAL CONSTANTS =============================================== #
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
_FORMAT_ERROR_STATES = {'error', 'fail', 'strict'}
|
|
91
|
-
_FORMAT_SILENT_STATES = {'ignore', 'silent'}
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
# SECTION: TYPE ALIASES ===================================================== #
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
type FormatContext = Literal['source', 'target']
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
# SECTION: INTERNAL CLASSES ================================================= #
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
class _FormatAction(argparse.Action):
|
|
104
|
-
"""Argparse action that records when ``--format`` is provided."""
|
|
105
|
-
|
|
106
|
-
def __call__(
|
|
107
|
-
self,
|
|
108
|
-
parser: argparse.ArgumentParser,
|
|
109
|
-
namespace: argparse.Namespace,
|
|
110
|
-
values: str | Sequence[Any] | None,
|
|
111
|
-
option_string: str | None = None,
|
|
112
|
-
) -> None: # pragma: no cover - argparse wiring
|
|
113
|
-
setattr(namespace, self.dest, values)
|
|
114
|
-
namespace._format_explicit = True
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
def _add_format_options(
|
|
121
|
-
parser: argparse.ArgumentParser,
|
|
122
|
-
*,
|
|
123
|
-
context: FormatContext,
|
|
124
|
-
) -> None:
|
|
125
|
-
"""
|
|
126
|
-
Attach shared ``--format`` options to extract/load parsers.
|
|
127
|
-
|
|
128
|
-
Parameters
|
|
129
|
-
----------
|
|
130
|
-
parser : argparse.ArgumentParser
|
|
131
|
-
Parser to add options to.
|
|
132
|
-
context : FormatContext
|
|
133
|
-
Whether this is a source or target resource.
|
|
134
|
-
"""
|
|
135
|
-
parser.set_defaults(_format_explicit=False)
|
|
136
|
-
parser.add_argument(
|
|
137
|
-
'--strict-format',
|
|
138
|
-
action='store_true',
|
|
139
|
-
help=(
|
|
140
|
-
'Treat providing --format for file '
|
|
141
|
-
f'{context}s as an error (overrides environment behavior)'
|
|
142
|
-
),
|
|
143
|
-
)
|
|
144
|
-
parser.add_argument(
|
|
145
|
-
'--format',
|
|
146
|
-
choices=list(FileFormat.choices()),
|
|
147
|
-
default='json',
|
|
148
|
-
action=_FormatAction,
|
|
149
|
-
help=(
|
|
150
|
-
f'Format of the {context} when not a file. For file {context}s '
|
|
151
|
-
'this option is ignored and the format is inferred from the '
|
|
152
|
-
'filename extension.'
|
|
153
|
-
),
|
|
154
|
-
)
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
def _emit_behavioral_notice(
|
|
158
|
-
message: str,
|
|
159
|
-
behavior: str,
|
|
160
|
-
) -> None:
|
|
161
|
-
"""
|
|
162
|
-
Print or raise based on the configured behavior.
|
|
163
|
-
|
|
164
|
-
Parameters
|
|
165
|
-
----------
|
|
166
|
-
message : str
|
|
167
|
-
The message to emit.
|
|
168
|
-
behavior : str
|
|
169
|
-
The effective format-behavior mode.
|
|
170
|
-
|
|
171
|
-
Raises
|
|
172
|
-
------
|
|
173
|
-
ValueError
|
|
174
|
-
If the behavior is in the error states.
|
|
175
|
-
"""
|
|
176
|
-
if behavior in _FORMAT_ERROR_STATES:
|
|
177
|
-
raise ValueError(message)
|
|
178
|
-
if behavior in _FORMAT_SILENT_STATES:
|
|
179
|
-
return
|
|
180
|
-
print(f'Warning: {message}', file=sys.stderr)
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
def _format_behavior(
|
|
184
|
-
strict: bool,
|
|
185
|
-
) -> str:
|
|
186
|
-
"""
|
|
187
|
-
Return the effective format-behavior mode.
|
|
188
|
-
|
|
189
|
-
Parameters
|
|
190
|
-
----------
|
|
191
|
-
strict : bool
|
|
192
|
-
Whether to enforce strict format behavior.
|
|
193
|
-
|
|
194
|
-
Returns
|
|
195
|
-
-------
|
|
196
|
-
str
|
|
197
|
-
The effective format-behavior mode.
|
|
198
|
-
"""
|
|
199
|
-
if strict:
|
|
200
|
-
return 'error'
|
|
201
|
-
env_value = os.getenv(FORMAT_ENV_KEY, 'warn')
|
|
202
|
-
return (env_value or 'warn').strip().lower()
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
def _handle_format_guard(
|
|
206
|
-
*,
|
|
207
|
-
io_context: Literal['source', 'target'],
|
|
208
|
-
resource_type: str,
|
|
209
|
-
format_explicit: bool,
|
|
210
|
-
strict: bool,
|
|
211
|
-
) -> None:
|
|
212
|
-
"""
|
|
213
|
-
Warn or raise when --format is used alongside file resources.
|
|
214
|
-
|
|
215
|
-
Parameters
|
|
216
|
-
----------
|
|
217
|
-
io_context : Literal['source', 'target']
|
|
218
|
-
Whether this is a source or target resource.
|
|
219
|
-
resource_type : str
|
|
220
|
-
The type of resource being processed.
|
|
221
|
-
format_explicit : bool
|
|
222
|
-
Whether the --format option was explicitly provided.
|
|
223
|
-
strict : bool
|
|
224
|
-
Whether to enforce strict format behavior.
|
|
225
|
-
"""
|
|
226
|
-
if resource_type != 'file' or not format_explicit:
|
|
227
|
-
return
|
|
228
|
-
message = (
|
|
229
|
-
f'--format is ignored for file {io_context}s; '
|
|
230
|
-
'inferred from filename extension.'
|
|
231
|
-
)
|
|
232
|
-
behavior = _format_behavior(strict)
|
|
233
|
-
_emit_behavioral_notice(message, behavior)
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
def _list_sections(
|
|
237
|
-
cfg: PipelineConfig,
|
|
238
|
-
args: argparse.Namespace,
|
|
239
|
-
) -> dict[str, Any]:
|
|
240
|
-
"""
|
|
241
|
-
Build sectioned metadata output for the list command.
|
|
242
|
-
|
|
243
|
-
Parameters
|
|
244
|
-
----------
|
|
245
|
-
cfg : PipelineConfig
|
|
246
|
-
The loaded pipeline configuration.
|
|
247
|
-
args : argparse.Namespace
|
|
248
|
-
Parsed command-line arguments.
|
|
249
|
-
|
|
250
|
-
Returns
|
|
251
|
-
-------
|
|
252
|
-
dict[str, Any]
|
|
253
|
-
Metadata output for the list command.
|
|
254
|
-
"""
|
|
255
|
-
sections: dict[str, Any] = {}
|
|
256
|
-
if getattr(args, 'pipelines', False):
|
|
257
|
-
sections['pipelines'] = [cfg.name]
|
|
258
|
-
if getattr(args, 'sources', False):
|
|
259
|
-
sections['sources'] = [src.name for src in cfg.sources]
|
|
260
|
-
if getattr(args, 'targets', False):
|
|
261
|
-
sections['targets'] = [tgt.name for tgt in cfg.targets]
|
|
262
|
-
if getattr(args, 'transforms', False):
|
|
263
|
-
sections['transforms'] = [
|
|
264
|
-
getattr(trf, 'name', None) for trf in cfg.transforms
|
|
265
|
-
]
|
|
266
|
-
if not sections:
|
|
267
|
-
sections['jobs'] = _pipeline_summary(cfg)['jobs']
|
|
268
|
-
return sections
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
def _materialize_csv_payload(
|
|
272
|
-
source: object,
|
|
273
|
-
) -> JSONData | str:
|
|
274
|
-
"""
|
|
275
|
-
Return parsed CSV rows when ``source`` points at a CSV file.
|
|
276
|
-
|
|
277
|
-
Parameters
|
|
278
|
-
----------
|
|
279
|
-
source : object
|
|
280
|
-
The source of data.
|
|
281
|
-
|
|
282
|
-
Returns
|
|
283
|
-
-------
|
|
284
|
-
JSONData | str
|
|
285
|
-
Parsed CSV rows or the original source if not a CSV file.
|
|
286
|
-
"""
|
|
287
|
-
if not isinstance(source, str):
|
|
288
|
-
return cast(JSONData, source)
|
|
289
|
-
path = Path(source)
|
|
290
|
-
if path.suffix.lower() != '.csv' or not path.is_file():
|
|
291
|
-
return source
|
|
292
|
-
return _read_csv_rows(path)
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
def _pipeline_summary(
|
|
296
|
-
cfg: PipelineConfig,
|
|
297
|
-
) -> dict[str, Any]:
|
|
298
|
-
"""
|
|
299
|
-
Return a human-friendly snapshot of a pipeline config.
|
|
300
|
-
|
|
301
|
-
Parameters
|
|
302
|
-
----------
|
|
303
|
-
cfg : PipelineConfig
|
|
304
|
-
The loaded pipeline configuration.
|
|
305
|
-
|
|
306
|
-
Returns
|
|
307
|
-
-------
|
|
308
|
-
dict[str, Any]
|
|
309
|
-
A human-friendly snapshot of a pipeline config.
|
|
310
|
-
"""
|
|
311
|
-
sources = [src.name for src in cfg.sources]
|
|
312
|
-
targets = [tgt.name for tgt in cfg.targets]
|
|
313
|
-
jobs = [job.name for job in cfg.jobs]
|
|
314
|
-
return {
|
|
315
|
-
'name': cfg.name,
|
|
316
|
-
'version': cfg.version,
|
|
317
|
-
'sources': sources,
|
|
318
|
-
'targets': targets,
|
|
319
|
-
'jobs': jobs,
|
|
320
|
-
}
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
def _read_csv_rows(
|
|
324
|
-
path: Path,
|
|
325
|
-
) -> list[dict[str, str]]:
|
|
326
|
-
"""
|
|
327
|
-
Read CSV rows into dictionaries.
|
|
328
|
-
|
|
329
|
-
Parameters
|
|
330
|
-
----------
|
|
331
|
-
path : Path
|
|
332
|
-
Path to a CSV file.
|
|
333
|
-
|
|
334
|
-
Returns
|
|
335
|
-
-------
|
|
336
|
-
list[dict[str, str]]
|
|
337
|
-
List of dictionaries, each representing a row in the CSV file.
|
|
338
|
-
"""
|
|
339
|
-
with path.open(newline='', encoding='utf-8') as handle:
|
|
340
|
-
reader = csv.DictReader(handle)
|
|
341
|
-
return [dict(row) for row in reader]
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
def _write_json_output(
|
|
345
|
-
data: Any,
|
|
346
|
-
output_path: str | None,
|
|
347
|
-
*,
|
|
348
|
-
success_message: str,
|
|
349
|
-
) -> bool:
|
|
350
|
-
"""
|
|
351
|
-
Optionally persist JSON data to disk.
|
|
352
|
-
|
|
353
|
-
Parameters
|
|
354
|
-
----------
|
|
355
|
-
data : Any
|
|
356
|
-
Data to write.
|
|
357
|
-
output_path : str | None
|
|
358
|
-
Path to write the output to. None to print to stdout.
|
|
359
|
-
success_message : str
|
|
360
|
-
Message to print upon successful write.
|
|
361
|
-
|
|
362
|
-
Returns
|
|
363
|
-
-------
|
|
364
|
-
bool
|
|
365
|
-
True if output was written to a file, False if printed to stdout.
|
|
366
|
-
"""
|
|
367
|
-
if not output_path:
|
|
368
|
-
return False
|
|
369
|
-
File(Path(output_path), FileFormat.JSON).write_json(data)
|
|
370
|
-
print(f'{success_message} {output_path}')
|
|
371
|
-
return True
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
# SECTION: FUNCTIONS ======================================================== #
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
# -- Command Handlers -- #
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
def cmd_extract(
|
|
381
|
-
args: argparse.Namespace,
|
|
382
|
-
) -> int:
|
|
383
|
-
"""
|
|
384
|
-
Extract data from a source.
|
|
385
|
-
|
|
386
|
-
Parameters
|
|
387
|
-
----------
|
|
388
|
-
args : argparse.Namespace
|
|
389
|
-
Parsed command-line arguments.
|
|
390
|
-
|
|
391
|
-
Returns
|
|
392
|
-
-------
|
|
393
|
-
int
|
|
394
|
-
Zero on success.
|
|
395
|
-
"""
|
|
396
|
-
_handle_format_guard(
|
|
397
|
-
io_context='source',
|
|
398
|
-
resource_type=args.source_type,
|
|
399
|
-
format_explicit=getattr(args, '_format_explicit', False),
|
|
400
|
-
strict=getattr(args, 'strict_format', False),
|
|
401
|
-
)
|
|
402
|
-
|
|
403
|
-
if args.source_type == 'file':
|
|
404
|
-
result = extract(args.source_type, args.source)
|
|
405
|
-
else:
|
|
406
|
-
result = extract(
|
|
407
|
-
args.source_type,
|
|
408
|
-
args.source,
|
|
409
|
-
file_format=getattr(args, 'format', None),
|
|
410
|
-
)
|
|
411
|
-
|
|
412
|
-
if not _write_json_output(
|
|
413
|
-
result,
|
|
414
|
-
getattr(args, 'output', None),
|
|
415
|
-
success_message='Data extracted and saved to',
|
|
416
|
-
):
|
|
417
|
-
print_json(result)
|
|
418
|
-
|
|
419
|
-
return 0
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
def cmd_validate(
|
|
423
|
-
args: argparse.Namespace,
|
|
424
|
-
) -> int:
|
|
425
|
-
"""
|
|
426
|
-
Validate data from a source.
|
|
427
|
-
|
|
428
|
-
Parameters
|
|
429
|
-
----------
|
|
430
|
-
args : argparse.Namespace
|
|
431
|
-
Parsed command-line arguments.
|
|
432
|
-
|
|
433
|
-
Returns
|
|
434
|
-
-------
|
|
435
|
-
int
|
|
436
|
-
Zero on success.
|
|
437
|
-
"""
|
|
438
|
-
payload = _materialize_csv_payload(args.source)
|
|
439
|
-
result = validate(payload, args.rules)
|
|
440
|
-
|
|
441
|
-
output_path = getattr(args, 'output', None)
|
|
442
|
-
if output_path:
|
|
443
|
-
validated_data = result.get('data')
|
|
444
|
-
if validated_data is not None:
|
|
445
|
-
_write_json_output(
|
|
446
|
-
validated_data,
|
|
447
|
-
output_path,
|
|
448
|
-
success_message='Validation result saved to',
|
|
449
|
-
)
|
|
450
|
-
else:
|
|
451
|
-
print(
|
|
452
|
-
f'Validation failed, no data to save for {output_path}',
|
|
453
|
-
file=sys.stderr,
|
|
454
|
-
)
|
|
455
|
-
else:
|
|
456
|
-
print_json(result)
|
|
457
|
-
|
|
458
|
-
return 0
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
def cmd_transform(
|
|
462
|
-
args: argparse.Namespace,
|
|
463
|
-
) -> int:
|
|
464
|
-
"""
|
|
465
|
-
Transform data from a source.
|
|
466
|
-
|
|
467
|
-
Parameters
|
|
468
|
-
----------
|
|
469
|
-
args : argparse.Namespace
|
|
470
|
-
Parsed command-line arguments.
|
|
471
|
-
|
|
472
|
-
Returns
|
|
473
|
-
-------
|
|
474
|
-
int
|
|
475
|
-
Zero on success.
|
|
476
|
-
"""
|
|
477
|
-
payload = _materialize_csv_payload(args.source)
|
|
478
|
-
data = transform(payload, args.operations)
|
|
479
|
-
|
|
480
|
-
if not _write_json_output(
|
|
481
|
-
data,
|
|
482
|
-
getattr(args, 'output', None),
|
|
483
|
-
success_message='Data transformed and saved to',
|
|
484
|
-
):
|
|
485
|
-
print_json(data)
|
|
486
|
-
|
|
487
|
-
return 0
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
def cmd_load(
|
|
491
|
-
args: argparse.Namespace,
|
|
492
|
-
) -> int:
|
|
493
|
-
"""
|
|
494
|
-
Load data into a target.
|
|
495
|
-
|
|
496
|
-
Parameters
|
|
497
|
-
----------
|
|
498
|
-
args : argparse.Namespace
|
|
499
|
-
Parsed command-line arguments.
|
|
500
|
-
|
|
501
|
-
Returns
|
|
502
|
-
-------
|
|
503
|
-
int
|
|
504
|
-
Zero on success.
|
|
505
|
-
"""
|
|
506
|
-
_handle_format_guard(
|
|
507
|
-
io_context='target',
|
|
508
|
-
resource_type=args.target_type,
|
|
509
|
-
format_explicit=getattr(args, '_format_explicit', False),
|
|
510
|
-
strict=getattr(args, 'strict_format', False),
|
|
511
|
-
)
|
|
512
|
-
|
|
513
|
-
if args.target_type == 'file':
|
|
514
|
-
result = load(args.source, args.target_type, args.target)
|
|
515
|
-
else:
|
|
516
|
-
result = load(
|
|
517
|
-
args.source,
|
|
518
|
-
args.target_type,
|
|
519
|
-
args.target,
|
|
520
|
-
file_format=getattr(args, 'format', None),
|
|
521
|
-
)
|
|
522
|
-
|
|
523
|
-
if not _write_json_output(
|
|
524
|
-
result,
|
|
525
|
-
getattr(args, 'output', None),
|
|
526
|
-
success_message='Data loaded and saved to',
|
|
527
|
-
):
|
|
528
|
-
print_json(result)
|
|
529
|
-
|
|
530
|
-
return 0
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
def cmd_pipeline(args: argparse.Namespace) -> int:
|
|
534
|
-
"""
|
|
535
|
-
Inspect or run a pipeline YAML configuration.
|
|
536
|
-
|
|
537
|
-
--list prints job names; --run JOB executes a job end-to-end.
|
|
538
|
-
|
|
539
|
-
Parameters
|
|
540
|
-
----------
|
|
541
|
-
args : argparse.Namespace
|
|
542
|
-
Parsed command-line arguments.
|
|
543
|
-
|
|
544
|
-
Returns
|
|
545
|
-
-------
|
|
546
|
-
int
|
|
547
|
-
Zero on success.
|
|
548
|
-
"""
|
|
549
|
-
cfg = load_pipeline_config(args.config, substitute=True)
|
|
550
|
-
|
|
551
|
-
if getattr(args, 'list', False) and not getattr(args, 'run', None):
|
|
552
|
-
print_json({'jobs': _pipeline_summary(cfg)['jobs']})
|
|
553
|
-
return 0
|
|
554
|
-
|
|
555
|
-
run_job = getattr(args, 'run', None)
|
|
556
|
-
if run_job:
|
|
557
|
-
result = run(job=run_job, config_path=args.config)
|
|
558
|
-
print_json({'status': 'ok', 'result': result})
|
|
559
|
-
return 0
|
|
560
|
-
|
|
561
|
-
print_json(_pipeline_summary(cfg))
|
|
562
|
-
return 0
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
def cmd_list(args: argparse.Namespace) -> int:
|
|
566
|
-
"""
|
|
567
|
-
Print ETL job names from a pipeline YAML configuration.
|
|
568
|
-
|
|
569
|
-
Parameters
|
|
570
|
-
----------
|
|
571
|
-
args : argparse.Namespace
|
|
572
|
-
Parsed command-line arguments.
|
|
573
|
-
|
|
574
|
-
Returns
|
|
575
|
-
-------
|
|
576
|
-
int
|
|
577
|
-
Zero on success.
|
|
578
|
-
"""
|
|
579
|
-
cfg = load_pipeline_config(args.config, substitute=True)
|
|
580
|
-
print_json(_list_sections(cfg, args))
|
|
581
|
-
return 0
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
def cmd_run(args: argparse.Namespace) -> int:
|
|
585
|
-
"""
|
|
586
|
-
Execute an ETL job end-to-end from a pipeline YAML configuration.
|
|
587
|
-
|
|
588
|
-
Parameters
|
|
589
|
-
----------
|
|
590
|
-
args : argparse.Namespace
|
|
591
|
-
Parsed command-line arguments.
|
|
592
|
-
|
|
593
|
-
Returns
|
|
594
|
-
-------
|
|
595
|
-
int
|
|
596
|
-
Zero on success.
|
|
597
|
-
"""
|
|
598
|
-
cfg = load_pipeline_config(args.config, substitute=True)
|
|
599
|
-
|
|
600
|
-
job_name = getattr(args, 'job', None) or getattr(args, 'pipeline', None)
|
|
601
|
-
if job_name:
|
|
602
|
-
result = run(job=job_name, config_path=args.config)
|
|
603
|
-
print_json({'status': 'ok', 'result': result})
|
|
604
|
-
return 0
|
|
605
|
-
|
|
606
|
-
print_json(_pipeline_summary(cfg))
|
|
607
|
-
return 0
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
# -- Parser -- #
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
def create_parser() -> argparse.ArgumentParser:
|
|
614
|
-
"""
|
|
615
|
-
Create the argument parser for the CLI.
|
|
616
|
-
|
|
617
|
-
Returns
|
|
618
|
-
-------
|
|
619
|
-
argparse.ArgumentParser
|
|
620
|
-
Configured parser with subcommands for the CLI.
|
|
621
|
-
"""
|
|
622
|
-
parser = argparse.ArgumentParser(
|
|
623
|
-
prog='etlplus',
|
|
624
|
-
description=CLI_DESCRIPTION,
|
|
625
|
-
epilog=CLI_EPILOG,
|
|
626
|
-
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
627
|
-
# formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
628
|
-
)
|
|
629
|
-
|
|
630
|
-
parser.add_argument(
|
|
631
|
-
'-V',
|
|
632
|
-
'--version',
|
|
633
|
-
action='version',
|
|
634
|
-
version=f'%(prog)s {__version__}',
|
|
635
|
-
)
|
|
636
|
-
|
|
637
|
-
subparsers = parser.add_subparsers(
|
|
638
|
-
dest='command',
|
|
639
|
-
help='Available commands',
|
|
640
|
-
)
|
|
641
|
-
|
|
642
|
-
# Define "extract" command.
|
|
643
|
-
extract_parser = subparsers.add_parser(
|
|
644
|
-
'extract',
|
|
645
|
-
help=('Extract data from sources (files, databases, REST APIs)'),
|
|
646
|
-
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
647
|
-
)
|
|
648
|
-
extract_parser.add_argument(
|
|
649
|
-
'source_type',
|
|
650
|
-
choices=list(DataConnectorType.choices()),
|
|
651
|
-
help='Type of source to extract from',
|
|
652
|
-
)
|
|
653
|
-
extract_parser.add_argument(
|
|
654
|
-
'source',
|
|
655
|
-
help=(
|
|
656
|
-
'Source location '
|
|
657
|
-
'(file path, database connection string, or API URL)'
|
|
658
|
-
),
|
|
659
|
-
)
|
|
660
|
-
extract_parser.add_argument(
|
|
661
|
-
'-o',
|
|
662
|
-
'--output',
|
|
663
|
-
help='Output file to save extracted data (JSON format)',
|
|
664
|
-
)
|
|
665
|
-
_add_format_options(extract_parser, context='source')
|
|
666
|
-
extract_parser.set_defaults(func=cmd_extract)
|
|
667
|
-
|
|
668
|
-
# Define "validate" command.
|
|
669
|
-
validate_parser = subparsers.add_parser(
|
|
670
|
-
'validate',
|
|
671
|
-
help='Validate data from sources',
|
|
672
|
-
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
673
|
-
)
|
|
674
|
-
validate_parser.add_argument(
|
|
675
|
-
'source',
|
|
676
|
-
help='Data source to validate (file path or JSON string)',
|
|
677
|
-
)
|
|
678
|
-
validate_parser.add_argument(
|
|
679
|
-
'--rules',
|
|
680
|
-
type=json_type,
|
|
681
|
-
default={},
|
|
682
|
-
help='Validation rules as JSON string',
|
|
683
|
-
)
|
|
684
|
-
validate_parser.set_defaults(func=cmd_validate)
|
|
685
|
-
|
|
686
|
-
# Define "transform" command.
|
|
687
|
-
transform_parser = subparsers.add_parser(
|
|
688
|
-
'transform',
|
|
689
|
-
help='Transform data',
|
|
690
|
-
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
691
|
-
)
|
|
692
|
-
transform_parser.add_argument(
|
|
693
|
-
'source',
|
|
694
|
-
help='Data source to transform (file path or JSON string)',
|
|
695
|
-
)
|
|
696
|
-
transform_parser.add_argument(
|
|
697
|
-
'--operations',
|
|
698
|
-
type=json_type,
|
|
699
|
-
default={},
|
|
700
|
-
help='Transformation operations as JSON string',
|
|
701
|
-
)
|
|
702
|
-
transform_parser.add_argument(
|
|
703
|
-
'-o',
|
|
704
|
-
'--output',
|
|
705
|
-
help='Output file to save transformed data',
|
|
706
|
-
)
|
|
707
|
-
transform_parser.set_defaults(func=cmd_transform)
|
|
708
|
-
|
|
709
|
-
# Define "load" command.
|
|
710
|
-
load_parser = subparsers.add_parser(
|
|
711
|
-
'load',
|
|
712
|
-
help='Load data to targets (files, databases, REST APIs)',
|
|
713
|
-
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
714
|
-
)
|
|
715
|
-
load_parser.add_argument(
|
|
716
|
-
'source',
|
|
717
|
-
help='Data source to load (file path or JSON string)',
|
|
718
|
-
)
|
|
719
|
-
load_parser.add_argument(
|
|
720
|
-
'target_type',
|
|
721
|
-
choices=list(DataConnectorType.choices()),
|
|
722
|
-
help='Type of target to load to',
|
|
723
|
-
)
|
|
724
|
-
load_parser.add_argument(
|
|
725
|
-
'target',
|
|
726
|
-
help=(
|
|
727
|
-
'Target location '
|
|
728
|
-
'(file path, database connection string, or API URL)'
|
|
729
|
-
),
|
|
730
|
-
)
|
|
731
|
-
_add_format_options(load_parser, context='target')
|
|
732
|
-
load_parser.set_defaults(func=cmd_load)
|
|
733
|
-
|
|
734
|
-
# Define "pipeline" command (reads YAML config).
|
|
735
|
-
pipe_parser = subparsers.add_parser(
|
|
736
|
-
'pipeline',
|
|
737
|
-
help=(
|
|
738
|
-
'Inspect or run pipeline YAML (see '
|
|
739
|
-
f'{PROJECT_URL}/blob/main/docs/pipeline-guide.md)'
|
|
740
|
-
),
|
|
741
|
-
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
742
|
-
)
|
|
743
|
-
pipe_parser.add_argument(
|
|
744
|
-
'--config',
|
|
745
|
-
required=True,
|
|
746
|
-
help='Path to pipeline YAML configuration file',
|
|
747
|
-
)
|
|
748
|
-
pipe_parser.add_argument(
|
|
749
|
-
'--list',
|
|
750
|
-
action='store_true',
|
|
751
|
-
help='List available job names and exit',
|
|
752
|
-
)
|
|
753
|
-
pipe_parser.add_argument(
|
|
754
|
-
'--run',
|
|
755
|
-
metavar='JOB',
|
|
756
|
-
help='Run a specific job by name',
|
|
757
|
-
)
|
|
758
|
-
pipe_parser.set_defaults(func=cmd_pipeline)
|
|
759
|
-
|
|
760
|
-
# Define "list" command.
|
|
761
|
-
list_parser = subparsers.add_parser(
|
|
762
|
-
'list',
|
|
763
|
-
help='List ETL pipeline metadata',
|
|
764
|
-
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
765
|
-
)
|
|
766
|
-
list_parser.add_argument(
|
|
767
|
-
'--config',
|
|
768
|
-
required=True,
|
|
769
|
-
help='Path to pipeline YAML configuration file',
|
|
770
|
-
)
|
|
771
|
-
list_parser.add_argument(
|
|
772
|
-
'--pipelines',
|
|
773
|
-
action='store_true',
|
|
774
|
-
help='List ETL pipelines',
|
|
775
|
-
)
|
|
776
|
-
list_parser.add_argument(
|
|
777
|
-
'--sources',
|
|
778
|
-
action='store_true',
|
|
779
|
-
help='List data sources',
|
|
780
|
-
)
|
|
781
|
-
list_parser.add_argument(
|
|
782
|
-
'--targets',
|
|
783
|
-
action='store_true',
|
|
784
|
-
help='List data targets',
|
|
785
|
-
)
|
|
786
|
-
list_parser.add_argument(
|
|
787
|
-
'--transforms',
|
|
788
|
-
action='store_true',
|
|
789
|
-
help='List data transforms',
|
|
790
|
-
)
|
|
791
|
-
list_parser.set_defaults(func=cmd_list)
|
|
792
|
-
|
|
793
|
-
# Define "run" command.
|
|
794
|
-
run_parser = subparsers.add_parser(
|
|
795
|
-
'run',
|
|
796
|
-
help=(
|
|
797
|
-
'Run an ETL pipeline '
|
|
798
|
-
f'(see {PROJECT_URL}/blob/main/docs/run-module.md)'
|
|
799
|
-
),
|
|
800
|
-
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
801
|
-
)
|
|
802
|
-
run_parser.add_argument(
|
|
803
|
-
'--config',
|
|
804
|
-
required=True,
|
|
805
|
-
help='Path to pipeline YAML configuration file',
|
|
806
|
-
)
|
|
807
|
-
run_parser.add_argument(
|
|
808
|
-
'-j',
|
|
809
|
-
'--job',
|
|
810
|
-
help='Name of the job to run',
|
|
811
|
-
)
|
|
812
|
-
run_parser.add_argument(
|
|
813
|
-
'-p',
|
|
814
|
-
'--pipeline',
|
|
815
|
-
help='Name of the pipeline to run',
|
|
816
|
-
)
|
|
817
|
-
run_parser.set_defaults(func=cmd_run)
|
|
818
|
-
|
|
819
|
-
return parser
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
# -- Main -- #
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
def main(
|
|
826
|
-
argv: list[str] | None = None,
|
|
827
|
-
) -> int:
|
|
828
|
-
"""
|
|
829
|
-
Handle CLI's main entry point.
|
|
830
|
-
|
|
831
|
-
Parameters
|
|
832
|
-
----------
|
|
833
|
-
argv : list[str] | None, optional
|
|
834
|
-
List of command-line arguments. If ``None``, uses ``sys.argv``.
|
|
835
|
-
|
|
836
|
-
Returns
|
|
837
|
-
-------
|
|
838
|
-
int
|
|
839
|
-
Zero on success, non-zero on error.
|
|
840
|
-
|
|
841
|
-
Notes
|
|
842
|
-
-----
|
|
843
|
-
This function prints results to stdout and errors to stderr.
|
|
844
|
-
"""
|
|
845
|
-
parser = create_parser()
|
|
846
|
-
args = parser.parse_args(argv)
|
|
847
|
-
|
|
848
|
-
if not args.command:
|
|
849
|
-
parser.print_help()
|
|
850
|
-
return 0
|
|
851
|
-
|
|
852
|
-
try:
|
|
853
|
-
# Prefer argparse's dispatch to avoid duplicating logic.
|
|
854
|
-
func = getattr(args, 'func', None)
|
|
855
|
-
if callable(func):
|
|
856
|
-
return int(func(args))
|
|
857
|
-
|
|
858
|
-
# Fallback: no subcommand function bound.
|
|
859
|
-
parser.print_help()
|
|
860
|
-
return 0
|
|
861
|
-
|
|
862
|
-
except KeyboardInterrupt:
|
|
863
|
-
# Conventional exit code for SIGINT
|
|
864
|
-
return 130
|
|
865
|
-
|
|
866
|
-
except (OSError, TypeError, ValueError) as e:
|
|
867
|
-
print(f'Error: {e}', file=sys.stderr)
|
|
868
|
-
return 1
|