etlplus 0.5.2__py3-none-any.whl → 0.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
etlplus/cli/handlers.py CHANGED
@@ -6,44 +6,41 @@ Command handler functions for the ``etlplus`` command-line interface (CLI).
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
- import argparse
10
- import csv
11
- import io
12
- import json
13
9
  import os
14
10
  import sys
11
+ from collections.abc import Mapping
15
12
  from pathlib import Path
16
13
  from typing import Any
14
+ from typing import Literal
17
15
  from typing import cast
18
16
 
19
17
  from ..config import PipelineConfig
20
18
  from ..config import load_pipeline_config
21
- from ..ddl import load_table_spec
22
- from ..ddl import render_tables
23
- from ..enums import FileFormat
19
+ from ..database import load_table_spec
20
+ from ..database import render_tables
24
21
  from ..extract import extract
25
22
  from ..file import File
26
23
  from ..load import load
27
24
  from ..run import run
28
25
  from ..transform import transform
29
26
  from ..types import JSONData
30
- from ..utils import json_type
31
- from ..utils import print_json
27
+ from ..types import TemplateKey
28
+ from ..validate import FieldRules
32
29
  from ..validate import validate
30
+ from . import io as cli_io
33
31
 
34
32
  # SECTION: EXPORTS ========================================================== #
35
33
 
36
34
 
37
35
  __all__ = [
38
36
  # Functions
39
- 'cmd_extract',
40
- 'cmd_list',
41
- 'cmd_load',
42
- 'cmd_pipeline',
43
- 'cmd_render',
44
- 'cmd_run',
45
- 'cmd_transform',
46
- 'cmd_validate',
37
+ 'extract_handler',
38
+ 'check_handler',
39
+ 'load_handler',
40
+ 'render_handler',
41
+ 'run_handler',
42
+ 'transform_handler',
43
+ 'validate_handler',
47
44
  ]
48
45
 
49
46
 
@@ -72,7 +69,7 @@ def _collect_table_specs(
72
69
  specs: list[dict[str, Any]] = []
73
70
 
74
71
  if spec_path:
75
- specs.append(load_table_spec(Path(spec_path)))
72
+ specs.append(dict(load_table_spec(Path(spec_path))))
76
73
 
77
74
  if config_path:
78
75
  cfg = load_pipeline_config(config_path, substitute=True)
@@ -81,102 +78,48 @@ def _collect_table_specs(
81
78
  return specs
82
79
 
83
80
 
84
- def _emit_json(
85
- data: Any,
86
- *,
87
- pretty: bool,
88
- ) -> None:
89
- """
90
- Emit JSON to stdout honoring the pretty/compact preference.
91
-
92
- Parameters
93
- ----------
94
- data : Any
95
- Arbitrary JSON-serializable payload.
96
- pretty : bool
97
- When ``True`` pretty-print via :func:`print_json`; otherwise emit a
98
- compact JSON string.
99
- """
100
- if pretty:
101
- print_json(data)
102
- return
103
-
104
- dumped = json.dumps(
105
- data,
106
- ensure_ascii=False,
107
- separators=(',', ':'),
108
- )
109
- print(dumped)
110
-
111
-
112
- def _explicit_cli_format(
113
- args: argparse.Namespace,
114
- ) -> str | None:
115
- """Return the explicit CLI format hint when provided."""
116
-
117
- if not getattr(args, '_format_explicit', False):
118
- return None
119
- for attr in ('format', 'target_format', 'source_format'):
120
- value = getattr(args, attr, None)
121
- if value is None:
122
- continue
123
- normalized = value.strip().lower()
124
- if normalized:
125
- return normalized
126
- return None
127
-
128
-
129
- def _infer_payload_format(
130
- text: str,
131
- ) -> str:
132
- """
133
- Infer JSON vs CSV from payload text.
134
-
135
- Parameters
136
- ----------
137
- text : str
138
- Incoming payload as plain text.
139
-
140
- Returns
141
- -------
142
- str
143
- ``'json'`` when the text starts with ``{``/``[``, else ``'csv'``.
144
- """
145
- stripped = text.lstrip()
146
- if stripped.startswith('{') or stripped.startswith('['):
147
- return 'json'
148
- return 'csv'
149
-
150
-
151
- def _list_sections(
81
+ def _check_sections(
152
82
  cfg: PipelineConfig,
153
- args: argparse.Namespace,
83
+ *,
84
+ jobs: bool,
85
+ pipelines: bool,
86
+ sources: bool,
87
+ targets: bool,
88
+ transforms: bool,
154
89
  ) -> dict[str, Any]:
155
90
  """
156
- Build sectioned metadata output for the list command.
91
+ Build sectioned metadata output for the check command.
157
92
 
158
93
  Parameters
159
94
  ----------
160
95
  cfg : PipelineConfig
161
96
  The loaded pipeline configuration.
162
- args : argparse.Namespace
163
- Parsed command-line arguments.
97
+ jobs : bool
98
+ Whether to include job metadata.
99
+ pipelines : bool
100
+ Whether to include pipeline metadata.
101
+ sources : bool
102
+ Whether to include source metadata.
103
+ targets : bool
104
+ Whether to include target metadata.
105
+ transforms : bool
106
+ Whether to include transform metadata.
164
107
 
165
108
  Returns
166
109
  -------
167
110
  dict[str, Any]
168
- Metadata output for the list command.
111
+ Metadata output for the check command.
169
112
  """
170
113
  sections: dict[str, Any] = {}
171
- if getattr(args, 'jobs', False):
114
+ if jobs:
172
115
  sections['jobs'] = _pipeline_summary(cfg)['jobs']
173
- if getattr(args, 'pipelines', False):
116
+ if pipelines:
174
117
  sections['pipelines'] = [cfg.name]
175
- if getattr(args, 'sources', False):
118
+ if sources:
176
119
  sections['sources'] = [src.name for src in cfg.sources]
177
- if getattr(args, 'targets', False):
120
+ if targets:
178
121
  sections['targets'] = [tgt.name for tgt in cfg.targets]
179
- if getattr(args, 'transforms', False):
122
+ if transforms:
180
123
  sections['transforms'] = [
181
124
  getattr(trf, 'name', None) for trf in cfg.transforms
182
125
  ]
@@ -185,88 +128,6 @@ def _list_sections(
185
128
  return sections
186
129
 
187
130
 
188
- def _materialize_file_payload(
189
- source: object,
190
- *,
191
- format_hint: str | None,
192
- format_explicit: bool,
193
- ) -> JSONData | object:
194
- """
195
- Return structured payloads when ``source`` references a file.
196
-
197
- Parameters
198
- ----------
199
- source : object
200
- Input source of data, possibly a file path.
201
- format_hint : str | None
202
- Explicit format hint: 'json', 'csv', or None to infer.
203
- format_explicit : bool
204
- Whether an explicit format hint was provided.
205
-
206
- Returns
207
- -------
208
- JSONData | object
209
- Parsed JSON data when ``source`` is a file; otherwise the original
210
- ``source`` object.
211
- """
212
- if isinstance(source, (dict, list)):
213
- return cast(JSONData, source)
214
- if not isinstance(source, (str, os.PathLike)):
215
- return source
216
-
217
- path = Path(source)
218
-
219
- normalized_hint = (format_hint or '').strip().lower()
220
- fmt: FileFormat | None = None
221
-
222
- if format_explicit and normalized_hint:
223
- try:
224
- fmt = FileFormat(normalized_hint)
225
- except ValueError:
226
- fmt = None
227
- elif not format_explicit:
228
- suffix = path.suffix.lower().lstrip('.')
229
- if suffix:
230
- try:
231
- fmt = FileFormat(suffix)
232
- except ValueError:
233
- fmt = None
234
-
235
- if fmt is None:
236
- return source
237
- if fmt == FileFormat.CSV:
238
- return _read_csv_rows(path)
239
- return File(path, fmt).read()
240
-
241
-
242
- def _parse_text_payload(
243
- text: str,
244
- fmt: str | None,
245
- ) -> JSONData | str:
246
- """
247
- Parse JSON/CSV text into a Python payload.
248
-
249
- Parameters
250
- ----------
251
- text : str
252
- The input text payload.
253
- fmt : str | None
254
- Explicit format hint: 'json', 'csv', or None to infer.
255
-
256
- Returns
257
- -------
258
- JSONData | str
259
- The parsed payload as JSON data or raw text.
260
- """
261
- effective = (fmt or '').strip().lower() or _infer_payload_format(text)
262
- if effective == 'json':
263
- return cast(JSONData, json_type(text))
264
- if effective == 'csv':
265
- reader = csv.DictReader(io.StringIO(text))
266
- return [dict(row) for row in reader]
267
- return text
268
-
269
-
270
131
  def _pipeline_summary(
271
132
  cfg: PipelineConfig,
272
133
  ) -> dict[str, Any]:
@@ -295,294 +156,183 @@ def _pipeline_summary(
295
156
  }
296
157
 
297
158
 
298
- def _presentation_flags(
299
- args: argparse.Namespace,
300
- ) -> tuple[bool, bool]:
301
- """
302
- Return presentation toggles from the parsed namespace.
303
-
304
- Parameters
305
- ----------
306
- args : argparse.Namespace
307
- Namespace produced by the CLI parser.
308
-
309
- Returns
310
- -------
311
- tuple[bool, bool]
312
- Pair of ``(pretty, quiet)`` flags with safe defaults.
313
- """
314
- return getattr(args, 'pretty', True), getattr(args, 'quiet', False)
315
-
316
-
317
- def _read_csv_rows(
318
- path: Path,
319
- ) -> list[dict[str, str]]:
320
- """
321
- Read CSV rows into dictionaries.
322
-
323
- Parameters
324
- ----------
325
- path : Path
326
- Path to a CSV file.
327
-
328
- Returns
329
- -------
330
- list[dict[str, str]]
331
- List of dictionaries, each representing a row in the CSV file.
332
- """
333
- with path.open(newline='', encoding='utf-8') as handle:
334
- reader = csv.DictReader(handle)
335
- return [dict(row) for row in reader]
336
-
337
-
338
- def _read_stdin_text() -> str:
339
- """
340
- Return every character from ``stdin`` as a single string.
341
-
342
- Returns
343
- -------
344
- str
345
- Entire ``stdin`` contents.
346
- """
347
- return sys.stdin.read()
159
+ # SECTION: FUNCTIONS ======================================================== #
348
160
 
349
161
 
350
- def _resolve_cli_payload(
351
- source: object,
162
+ def check_handler(
352
163
  *,
353
- format_hint: str | None,
354
- format_explicit: bool,
355
- hydrate_files: bool = True,
356
- ) -> JSONData | object:
164
+ config: str,
165
+ jobs: bool = False,
166
+ pipelines: bool = False,
167
+ sources: bool = False,
168
+ summary: bool = False,
169
+ targets: bool = False,
170
+ transforms: bool = False,
171
+ substitute: bool = True,
172
+ pretty: bool = True,
173
+ ) -> int:
357
174
  """
358
- Normalize CLI-provided payloads, honoring stdin and inline data.
175
+ Print requested pipeline sections from a YAML configuration.
359
176
 
360
177
  Parameters
361
178
  ----------
362
- source : object
363
- Raw CLI value (path, inline payload, or ``'-'`` for stdin).
364
- format_hint : str | None
365
- Explicit format hint supplied by the CLI option.
366
- format_explicit : bool
367
- Flag indicating whether the format hint was explicitly provided.
368
- hydrate_files : bool, optional
369
- When ``True`` (default) materialize file paths into structured data.
370
- When ``False``, keep the original path so downstream code can stream
371
- from disk directly.
179
+ config : str
180
+ Path to the pipeline YAML configuration.
181
+ jobs : bool, optional
182
+ Whether to include job metadata. Default is ``False``.
183
+ pipelines : bool, optional
184
+ Whether to include pipeline metadata. Default is ``False``.
185
+ sources : bool, optional
186
+ Whether to include source metadata. Default is ``False``.
187
+ summary : bool, optional
188
+ Whether to print a full summary of the pipeline. Default is ``False``.
189
+ targets : bool, optional
190
+ Whether to include target metadata. Default is ``False``.
191
+ transforms : bool, optional
192
+ Whether to include transform metadata. Default is ``False``.
193
+ substitute : bool, optional
194
+ Whether to perform environment variable substitution. Default is
195
+ ``True``.
196
+ pretty : bool, optional
197
+ Whether to pretty-print output. Default is ``True``.
372
198
 
373
199
  Returns
374
200
  -------
375
- JSONData | object
376
- Parsed payload or the original source value when hydration is
377
- disabled.
378
- """
379
- if isinstance(source, (os.PathLike, str)) and str(source) == '-':
380
- text = _read_stdin_text()
381
- return _parse_text_payload(text, format_hint)
201
+ int
202
+ Zero on success.
382
203
 
383
- if not hydrate_files:
384
- return source
204
+ """
205
+ cfg = load_pipeline_config(config, substitute=substitute)
206
+ if summary:
207
+ cli_io.emit_json(_pipeline_summary(cfg), pretty=True)
208
+ return 0
385
209
 
386
- return _materialize_file_payload(
387
- source,
388
- format_hint=format_hint,
389
- format_explicit=format_explicit,
210
+ cli_io.emit_json(
211
+ _check_sections(
212
+ cfg,
213
+ jobs=jobs,
214
+ pipelines=pipelines,
215
+ sources=sources,
216
+ targets=targets,
217
+ transforms=transforms,
218
+ ),
219
+ pretty=pretty,
390
220
  )
221
+ return 0
391
222
 
392
223
 
393
- def _write_json_output(
394
- data: Any,
395
- output_path: str | None,
224
+ def extract_handler(
396
225
  *,
397
- success_message: str,
398
- ) -> bool:
399
- """
400
- Optionally persist JSON data to disk.
401
-
402
- Parameters
403
- ----------
404
- data : Any
405
- Data to write.
406
- output_path : str | None
407
- Path to write the output to. None to print to stdout.
408
- success_message : str
409
- Message to print upon successful write.
410
-
411
- Returns
412
- -------
413
- bool
414
- True if output was written to a file, False if printed to stdout.
415
- """
416
- if not output_path or output_path == '-':
417
- return False
418
- File(Path(output_path), FileFormat.JSON).write_json(data)
419
- print(f'{success_message} {output_path}')
420
- return True
421
-
422
-
423
- # SECTION: FUNCTIONS ======================================================== #
424
-
425
-
426
- def cmd_extract(
427
- args: argparse.Namespace,
226
+ source_type: str,
227
+ source: str,
228
+ format_hint: str | None = None,
229
+ format_explicit: bool = False,
230
+ target: str | None = None,
231
+ output: str | None = None,
232
+ pretty: bool = True,
428
233
  ) -> int:
429
234
  """
430
235
  Extract data from a source.
431
236
 
432
237
  Parameters
433
238
  ----------
434
- args : argparse.Namespace
435
- Parsed command-line arguments.
239
+ source_type : str
240
+ The type of the source (e.g., 'file', 'api', 'database').
241
+ source : str
242
+ The source identifier (e.g., path, URL, DSN).
243
+ format_hint : str | None, optional
244
+ An optional format hint (e.g., 'json', 'csv'). Default is ``None``.
245
+ format_explicit : bool, optional
246
+ Whether the format hint was explicitly provided. Default is ``False``.
247
+ target : str | None, optional
248
+ The target destination (e.g., path, database). Default is ``None``.
249
+ output : str | None, optional
250
+ Path to write output data. Default is ``None``.
251
+ pretty : bool, optional
252
+ Whether to pretty-print output. Default is ``True``.
436
253
 
437
254
  Returns
438
255
  -------
439
256
  int
440
257
  Zero on success.
258
+
441
259
  """
442
- pretty, _ = _presentation_flags(args)
443
- explicit_format = _explicit_cli_format(args)
260
+ explicit_format = format_hint if format_explicit else None
444
261
 
445
- if args.source == '-':
446
- text = _read_stdin_text()
447
- payload = _parse_text_payload(text, getattr(args, 'format', None))
448
- _emit_json(payload, pretty=pretty)
262
+ if source == '-':
263
+ text = cli_io.read_stdin_text()
264
+ payload = cli_io.parse_text_payload(
265
+ text,
266
+ format_hint,
267
+ )
268
+ cli_io.emit_json(payload, pretty=pretty)
449
269
 
450
270
  return 0
451
271
 
452
272
  result = extract(
453
- args.source_type,
454
- args.source,
273
+ source_type,
274
+ source,
455
275
  file_format=explicit_format,
456
276
  )
457
- output_path = getattr(args, 'target', None)
458
- if output_path is None:
459
- output_path = getattr(args, 'output', None)
277
+ output_path = target or output
460
278
 
461
- if not _write_json_output(
279
+ cli_io.emit_or_write(
462
280
  result,
463
281
  output_path,
282
+ pretty=pretty,
464
283
  success_message='Data extracted and saved to',
465
- ):
466
- _emit_json(result, pretty=pretty)
467
-
468
- return 0
469
-
470
-
471
- def cmd_validate(
472
- args: argparse.Namespace,
473
- ) -> int:
474
- """
475
- Validate data from a source.
476
-
477
- Parameters
478
- ----------
479
- args : argparse.Namespace
480
- Parsed command-line arguments.
481
-
482
- Returns
483
- -------
484
- int
485
- Zero on success.
486
- """
487
- pretty, _quiet = _presentation_flags(args)
488
- format_explicit: bool = getattr(args, '_format_explicit', False)
489
- format_hint: str | None = getattr(args, 'source_format', None)
490
- payload = cast(
491
- JSONData | str,
492
- _resolve_cli_payload(
493
- args.source,
494
- format_hint=format_hint,
495
- format_explicit=format_explicit,
496
- ),
497
284
  )
498
- result = validate(payload, args.rules)
499
-
500
- target_path = getattr(args, 'target', None)
501
- if target_path:
502
- validated_data = result.get('data')
503
- if validated_data is not None:
504
- _write_json_output(
505
- validated_data,
506
- target_path,
507
- success_message='Validation result saved to',
508
- )
509
- else:
510
- print(
511
- f'Validation failed, no data to save for {target_path}',
512
- file=sys.stderr,
513
- )
514
- else:
515
- _emit_json(result, pretty=pretty)
516
285
 
517
286
  return 0
518
287
 
519
288
 
520
- def cmd_transform(
521
- args: argparse.Namespace,
522
- ) -> int:
523
- """
524
- Transform data from a source.
525
-
526
- Parameters
527
- ----------
528
- args : argparse.Namespace
529
- Parsed command-line arguments.
530
-
531
- Returns
532
- -------
533
- int
534
- Zero on success.
535
- """
536
- pretty, _quiet = _presentation_flags(args)
537
- format_hint: str | None = getattr(args, 'source_format', None)
538
- format_explicit: bool = format_hint is not None
539
-
540
- payload = cast(
541
- JSONData | str,
542
- _resolve_cli_payload(
543
- args.source,
544
- format_hint=format_hint,
545
- format_explicit=format_explicit,
546
- ),
547
- )
548
-
549
- data = transform(payload, args.operations)
550
-
551
- if not _write_json_output(
552
- data,
553
- getattr(args, 'target', None),
554
- success_message='Data transformed and saved to',
555
- ):
556
- _emit_json(data, pretty=pretty)
557
-
558
- return 0
559
-
560
-
561
- def cmd_load(
562
- args: argparse.Namespace,
289
+ def load_handler(
290
+ *,
291
+ source: str,
292
+ target_type: str,
293
+ target: str,
294
+ source_format: str | None = None,
295
+ target_format: str | None = None,
296
+ format_explicit: bool = False,
297
+ output: str | None = None,
298
+ pretty: bool = True,
563
299
  ) -> int:
564
300
  """
565
301
  Load data into a target.
566
302
 
567
303
  Parameters
568
304
  ----------
569
- args : argparse.Namespace
570
- Parsed command-line arguments.
305
+ source : str
306
+ The source payload (e.g., path, inline data).
307
+ target_type : str
308
+ The type of the target (e.g., 'file', 'database').
309
+ target : str
310
+ The target destination (e.g., path, DSN).
311
+ source_format : str | None, optional
312
+ An optional source format hint (e.g., 'json', 'csv'). Default is
313
+ ``None``.
314
+ target_format : str | None, optional
315
+ An optional target format hint (e.g., 'json', 'csv'). Default is
316
+ ``None``.
317
+ format_explicit : bool, optional
318
+ Whether the format hint was explicitly provided. Default is ``False``.
319
+ output : str | None, optional
320
+ Path to write output data. Default is ``None``.
321
+ pretty : bool, optional
322
+ Whether to pretty-print output. Default is ``True``.
571
323
 
572
324
  Returns
573
325
  -------
574
326
  int
575
327
  Zero on success.
576
328
  """
577
- pretty, _ = _presentation_flags(args)
578
- explicit_format = _explicit_cli_format(args)
329
+ explicit_format = target_format if format_explicit else None
579
330
 
580
331
  # Allow piping into load.
581
- source_format = getattr(args, 'source_format', None)
582
332
  source_value = cast(
583
333
  str | Path | os.PathLike[str] | dict[str, Any] | list[dict[str, Any]],
584
- _resolve_cli_payload(
585
- args.source,
334
+ cli_io.resolve_cli_payload(
335
+ source,
586
336
  format_hint=source_format,
587
337
  format_explicit=source_format is not None,
588
338
  hydrate_files=False,
@@ -590,94 +340,81 @@ def cmd_load(
590
340
  )
591
341
 
592
342
  # Allow piping out of load for file targets.
593
- if args.target_type == 'file' and args.target == '-':
594
- payload = _materialize_file_payload(
343
+ if target_type == 'file' and target == '-':
344
+ payload = cli_io.materialize_file_payload(
595
345
  source_value,
596
346
  format_hint=source_format,
597
347
  format_explicit=source_format is not None,
598
348
  )
599
- _emit_json(payload, pretty=pretty)
349
+ cli_io.emit_json(payload, pretty=pretty)
600
350
  return 0
601
351
 
602
352
  result = load(
603
353
  source_value,
604
- args.target_type,
605
- args.target,
354
+ target_type,
355
+ target,
606
356
  file_format=explicit_format,
607
357
  )
608
358
 
609
- output_path = getattr(args, 'output', None)
610
- if not _write_json_output(
359
+ output_path = output
360
+ cli_io.emit_or_write(
611
361
  result,
612
362
  output_path,
363
+ pretty=pretty,
613
364
  success_message='Load result saved to',
614
- ):
615
- _emit_json(result, pretty=pretty)
365
+ )
616
366
 
617
367
  return 0
618
368
 
619
369
 
620
- def cmd_pipeline(
621
- args: argparse.Namespace,
370
+ def render_handler(
371
+ *,
372
+ config: str | None = None,
373
+ spec: str | None = None,
374
+ table: str | None = None,
375
+ template: TemplateKey | None = None,
376
+ template_path: str | None = None,
377
+ output: str | None = None,
378
+ pretty: bool = True,
379
+ quiet: bool = False,
622
380
  ) -> int:
623
381
  """
624
- Inspect or run a pipeline YAML configuration.
382
+ Render SQL DDL statements from table schema specs.
625
383
 
626
384
  Parameters
627
385
  ----------
628
- args : argparse.Namespace
629
- Parsed command-line arguments.
386
+ config : str | None, optional
387
+ Path to a pipeline YAML configuration. Default is ``None``.
388
+ spec : str | None, optional
389
+ Path to a standalone table spec file. Default is ``None``.
390
+ table : str | None, optional
391
+ Table name filter. Default is ``None``.
392
+ template : TemplateKey | None, optional
393
+ The template key to use for rendering. Default is ``None``.
394
+ template_path : str | None, optional
395
+ Path to a custom template file. Default is ``None``.
396
+ output : str | None, optional
397
+ Path to write output SQL. Default is ``None``.
398
+ pretty : bool, optional
399
+ Whether to pretty-print output. Default is ``True``.
400
+ quiet : bool, optional
401
+ Whether to suppress non-error output. Default is ``False``.
630
402
 
631
403
  Returns
632
404
  -------
633
405
  int
634
406
  Zero on success.
635
407
  """
636
- print(
637
- 'DEPRECATED: use "etlplus list --summary|--jobs" or '
638
- '"etlplus run --job/--pipeline" instead of "etlplus pipeline".',
639
- file=sys.stderr,
640
- )
641
-
642
- cfg = load_pipeline_config(args.config, substitute=True)
643
-
644
- list_flag = getattr(args, 'list', False) or getattr(args, 'jobs', False)
645
- run_target = (
646
- getattr(args, 'run', None)
647
- or getattr(args, 'job', None)
648
- or getattr(args, 'pipeline', None)
649
- )
650
-
651
- if list_flag and not run_target:
652
- print_json({'jobs': _pipeline_summary(cfg)['jobs']})
653
- return 0
654
-
655
- if run_target:
656
- result = run(job=run_target, config_path=args.config)
657
- print_json({'status': 'ok', 'result': result})
658
- return 0
659
-
660
- print_json(_pipeline_summary(cfg))
661
- return 0
662
-
663
-
664
- def cmd_render(
665
- args: argparse.Namespace,
666
- ) -> int:
667
- """Render SQL DDL statements from table schema specs."""
668
-
669
- _pretty, quiet = _presentation_flags(args)
670
-
671
- template_value = getattr(args, 'template', 'ddl') or 'ddl'
672
- template_path = getattr(args, 'template_path', None)
673
- table_filter = getattr(args, 'table', None)
674
- spec_path = getattr(args, 'spec', None)
675
- config_path = getattr(args, 'config', None)
408
+ template_value: TemplateKey = template or 'ddl'
409
+ template_path_override = template_path
410
+ table_filter = table
411
+ spec_path = spec
412
+ config_path = config
676
413
 
677
414
  # If the provided template points to a file, treat it as a path override.
678
- file_override = template_path
679
- template_key = template_value
680
- if template_path is None:
415
+ file_override = template_path_override
416
+ template_key: TemplateKey | None = template_value
417
+ if template_path_override is None:
681
418
  candidate_path = Path(template_value)
682
419
  if candidate_path.exists():
683
420
  file_override = str(candidate_path)
@@ -710,62 +447,210 @@ def cmd_render(
710
447
  sql_text = (
711
448
  '\n'.join(chunk.rstrip() for chunk in rendered_chunks).rstrip() + '\n'
712
449
  )
450
+ rendered_output = sql_text if pretty else sql_text.rstrip('\n')
713
451
 
714
- output_path = getattr(args, 'output', None)
452
+ output_path = output
715
453
  if output_path and output_path != '-':
716
- Path(output_path).write_text(sql_text, encoding='utf-8')
454
+ Path(output_path).write_text(rendered_output, encoding='utf-8')
717
455
  if not quiet:
718
456
  print(f'Rendered {len(specs)} schema(s) to {output_path}')
719
457
  return 0
720
458
 
721
- print(sql_text)
459
+ print(rendered_output)
722
460
  return 0
723
461
 
724
462
 
725
- def cmd_list(args: argparse.Namespace) -> int:
463
+ def run_handler(
464
+ *,
465
+ config: str,
466
+ job: str | None = None,
467
+ pipeline: str | None = None,
468
+ pretty: bool = True,
469
+ ) -> int:
726
470
  """
727
- Print requested pipeline sections from a YAML configuration.
471
+ Execute an ETL job end-to-end from a pipeline YAML configuration.
728
472
 
729
473
  Parameters
730
474
  ----------
731
- args : argparse.Namespace
732
- Parsed command-line arguments.
475
+ config : str
476
+ Path to the pipeline YAML configuration.
477
+ job : str | None, optional
478
+ Name of the job to run. If not provided, runs the entire pipeline.
479
+ Default is ``None``.
480
+ pipeline : str | None, optional
481
+ Alias for ``job``. Default is ``None``.
482
+ pretty : bool, optional
483
+ Whether to pretty-print output. Default is ``True``.
733
484
 
734
485
  Returns
735
486
  -------
736
487
  int
737
488
  Zero on success.
738
489
  """
739
- cfg = load_pipeline_config(args.config, substitute=True)
740
- if getattr(args, 'summary', False):
741
- print_json(_pipeline_summary(cfg))
490
+ cfg = load_pipeline_config(config, substitute=True)
491
+
492
+ job_name = job or pipeline
493
+ if job_name:
494
+ result = run(job=job_name, config_path=config)
495
+ cli_io.emit_json({'status': 'ok', 'result': result}, pretty=pretty)
742
496
  return 0
743
497
 
744
- print_json(_list_sections(cfg, args))
498
+ cli_io.emit_json(_pipeline_summary(cfg), pretty=pretty)
745
499
  return 0
746
500
 
747
501
 
748
- def cmd_run(args: argparse.Namespace) -> int:
502
+ TransformOperations = Mapping[
503
+ Literal['filter', 'map', 'select', 'sort', 'aggregate'],
504
+ Any,
505
+ ]
506
+
507
+
508
+ def transform_handler(
509
+ *,
510
+ source: str,
511
+ operations: JSONData | str,
512
+ target: str | None = None,
513
+ source_format: str | None = None,
514
+ target_format: str | None = None,
515
+ pretty: bool = True,
516
+ format_explicit: bool = False,
517
+ ) -> int:
749
518
  """
750
- Execute an ETL job end-to-end from a pipeline YAML configuration.
519
+ Transform data from a source.
751
520
 
752
521
  Parameters
753
522
  ----------
754
- args : argparse.Namespace
755
- Parsed command-line arguments.
523
+ source : str
524
+ The source payload (e.g., path, inline data).
525
+ operations : JSONData | str
526
+ The transformation operations (inline JSON or path).
527
+ target : str | None, optional
528
+ The target destination (e.g., path). Default is ``None``.
529
+ source_format : str | None, optional
530
+ An optional source format hint (e.g., 'json', 'csv'). Default is
531
+ ``None``.
532
+ target_format : str | None, optional
533
+ An optional target format hint (e.g., 'json', 'csv'). Default is
534
+ ``None``.
535
+ pretty : bool, optional
536
+ Whether to pretty-print output. Default is ``True``.
537
+ format_explicit : bool, optional
538
+ Whether the format hint was explicitly provided. Default is ``False``.
756
539
 
757
540
  Returns
758
541
  -------
759
542
  int
760
543
  Zero on success.
544
+
545
+ Raises
546
+ ------
547
+ ValueError
548
+ If the operations payload is not a mapping.
761
549
  """
762
- cfg = load_pipeline_config(args.config, substitute=True)
550
+ format_hint: str | None = source_format
551
+ format_explicit = format_hint is not None or format_explicit
763
552
 
764
- job_name = getattr(args, 'job', None) or getattr(args, 'pipeline', None)
765
- if job_name:
766
- result = run(job=job_name, config_path=args.config)
767
- print_json({'status': 'ok', 'result': result})
553
+ payload = cast(
554
+ JSONData | str,
555
+ cli_io.resolve_cli_payload(
556
+ source,
557
+ format_hint=format_hint,
558
+ format_explicit=format_explicit,
559
+ ),
560
+ )
561
+
562
+ operations_payload = cli_io.resolve_cli_payload(
563
+ operations,
564
+ format_hint=None,
565
+ format_explicit=format_explicit,
566
+ )
567
+ if not isinstance(operations_payload, dict):
568
+ raise ValueError('operations must resolve to a mapping of transforms')
569
+
570
+ data = transform(payload, cast(TransformOperations, operations_payload))
571
+
572
+ if target and target != '-':
573
+ File.write_file(target, data, file_format=target_format)
574
+ print(f'Data transformed and saved to {target}')
768
575
  return 0
769
576
 
770
- print_json(_pipeline_summary(cfg))
577
+ cli_io.emit_json(data, pretty=pretty)
578
+ return 0
579
+
580
+
581
+ def validate_handler(
582
+ *,
583
+ source: str,
584
+ rules: JSONData | str,
585
+ source_format: str | None = None,
586
+ target: str | None = None,
587
+ format_explicit: bool = False,
588
+ pretty: bool = True,
589
+ ) -> int:
590
+ """
591
+ Validate data from a source.
592
+
593
+ Parameters
594
+ ----------
595
+ source : str
596
+ The source payload (e.g., path, inline data).
597
+ rules : JSONData | str
598
+ The validation rules (inline JSON or path).
599
+ source_format : str | None, optional
600
+ An optional source format hint (e.g., 'json', 'csv'). Default is
601
+ ``None``.
602
+ target : str | None, optional
603
+ The target destination (e.g., path). Default is ``None``.
604
+ format_explicit : bool, optional
605
+ Whether the format hint was explicitly provided. Default is ``False``.
606
+ pretty : bool, optional
607
+ Whether to pretty-print output. Default is ``True``.
608
+
609
+ Returns
610
+ -------
611
+ int
612
+ Zero on success.
613
+
614
+ Raises
615
+ ------
616
+ ValueError
617
+ If the rules payload is not a mapping.
618
+ """
619
+ format_hint: str | None = source_format
620
+ payload = cast(
621
+ JSONData | str,
622
+ cli_io.resolve_cli_payload(
623
+ source,
624
+ format_hint=format_hint,
625
+ format_explicit=format_explicit,
626
+ ),
627
+ )
628
+
629
+ rules_payload = cli_io.resolve_cli_payload(
630
+ rules,
631
+ format_hint=None,
632
+ format_explicit=format_explicit,
633
+ )
634
+ if not isinstance(rules_payload, dict):
635
+ raise ValueError('rules must resolve to a mapping of field rules')
636
+
637
+ field_rules = cast(Mapping[str, FieldRules], rules_payload)
638
+ result = validate(payload, field_rules)
639
+
640
+ if target and target != '-':
641
+ validated_data = result.get('data')
642
+ if validated_data is not None:
643
+ cli_io.write_json_output(
644
+ validated_data,
645
+ target,
646
+ success_message='Validation result saved to',
647
+ )
648
+ else:
649
+ print(
650
+ f'Validation failed, no data to save for {target}',
651
+ file=sys.stderr,
652
+ )
653
+ else:
654
+ cli_io.emit_json(result, pretty=pretty)
655
+
771
656
  return 0