etlplus 0.7.0__py3-none-any.whl → 0.8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
etlplus/cli/handlers.py CHANGED
@@ -6,30 +6,28 @@ Command handler functions for the ``etlplus`` command-line interface (CLI).
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
- import argparse
10
- import csv
11
- import io
12
- import json
13
9
  import os
14
10
  import sys
11
+ from collections.abc import Mapping
15
12
  from pathlib import Path
16
13
  from typing import Any
14
+ from typing import Literal
17
15
  from typing import cast
18
16
 
19
17
  from ..config import PipelineConfig
20
18
  from ..config import load_pipeline_config
21
19
  from ..database import load_table_spec
22
20
  from ..database import render_tables
23
- from ..enums import FileFormat
24
21
  from ..extract import extract
25
22
  from ..file import File
26
23
  from ..load import load
27
24
  from ..run import run
28
25
  from ..transform import transform
29
26
  from ..types import JSONData
30
- from ..utils import json_type
31
- from ..utils import print_json
27
+ from ..types import TemplateKey
28
+ from ..validate import FieldRules
32
29
  from ..validate import validate
30
+ from . import io as cli_io
33
31
 
34
32
  # SECTION: EXPORTS ========================================================== #
35
33
 
@@ -39,7 +37,6 @@ __all__ = [
39
37
  'extract_handler',
40
38
  'check_handler',
41
39
  'load_handler',
42
- 'pipeline_handler',
43
40
  'render_handler',
44
41
  'run_handler',
45
42
  'transform_handler',
@@ -72,7 +69,7 @@ def _collect_table_specs(
72
69
  specs: list[dict[str, Any]] = []
73
70
 
74
71
  if spec_path:
75
- specs.append(load_table_spec(Path(spec_path)))
72
+ specs.append(dict(load_table_spec(Path(spec_path))))
76
73
 
77
74
  if config_path:
78
75
  cfg = load_pipeline_config(config_path, substitute=True)
@@ -81,76 +78,14 @@ def _collect_table_specs(
81
78
  return specs
82
79
 
83
80
 
84
- def _emit_json(
85
- data: Any,
86
- *,
87
- pretty: bool,
88
- ) -> None:
89
- """
90
- Emit JSON to stdout honoring the pretty/compact preference.
91
-
92
- Parameters
93
- ----------
94
- data : Any
95
- Arbitrary JSON-serializable payload.
96
- pretty : bool
97
- When ``True`` pretty-print via :func:`print_json`; otherwise emit a
98
- compact JSON string.
99
- """
100
- if pretty:
101
- print_json(data)
102
- return
103
-
104
- dumped = json.dumps(
105
- data,
106
- ensure_ascii=False,
107
- separators=(',', ':'),
108
- )
109
- print(dumped)
110
-
111
-
112
- def _explicit_cli_format(
113
- args: argparse.Namespace,
114
- ) -> str | None:
115
- """Return the explicit CLI format hint when provided."""
116
-
117
- if not getattr(args, '_format_explicit', False):
118
- return None
119
- for attr in ('format', 'target_format', 'source_format'):
120
- value = getattr(args, attr, None)
121
- if value is None:
122
- continue
123
- normalized = value.strip().lower()
124
- if normalized:
125
- return normalized
126
- return None
127
-
128
-
129
- def _infer_payload_format(
130
- text: str,
131
- ) -> str:
132
- """
133
- Infer JSON vs CSV from payload text.
134
-
135
- Parameters
136
- ----------
137
- text : str
138
- Incoming payload as plain text.
139
-
140
- Returns
141
- -------
142
- str
143
- ``'json'`` when the text starts with ``{``/``[``, else ``'csv'``.
144
- """
145
- stripped = text.lstrip()
146
- if stripped.startswith('{') or stripped.startswith('['):
147
- return 'json'
148
- return 'csv'
149
-
150
-
151
81
  def _check_sections(
152
82
  cfg: PipelineConfig,
153
- args: argparse.Namespace,
83
+ *,
84
+ jobs: bool,
85
+ pipelines: bool,
86
+ sources: bool,
87
+ targets: bool,
88
+ transforms: bool,
154
89
  ) -> dict[str, Any]:
155
90
  """
156
91
  Build sectioned metadata output for the check command.
@@ -159,8 +94,16 @@ def _check_sections(
159
94
  ----------
160
95
  cfg : PipelineConfig
161
96
  The loaded pipeline configuration.
162
- args : argparse.Namespace
163
- Parsed command-line arguments.
97
+ jobs : bool
98
+ Whether to include job metadata.
99
+ pipelines : bool
100
+ Whether to include pipeline metadata.
101
+ sources : bool
102
+ Whether to include source metadata.
103
+ targets : bool
104
+ Whether to include target metadata.
105
+ transforms : bool
106
+ Whether to include transform metadata.
164
107
 
165
108
  Returns
166
109
  -------
@@ -168,15 +111,15 @@ def _check_sections(
168
111
  Metadata output for the check command.
169
112
  """
170
113
  sections: dict[str, Any] = {}
171
- if getattr(args, 'jobs', False):
114
+ if jobs:
172
115
  sections['jobs'] = _pipeline_summary(cfg)['jobs']
173
- if getattr(args, 'pipelines', False):
116
+ if pipelines:
174
117
  sections['pipelines'] = [cfg.name]
175
- if getattr(args, 'sources', False):
118
+ if sources:
176
119
  sections['sources'] = [src.name for src in cfg.sources]
177
- if getattr(args, 'targets', False):
120
+ if targets:
178
121
  sections['targets'] = [tgt.name for tgt in cfg.targets]
179
- if getattr(args, 'transforms', False):
122
+ if transforms:
180
123
  sections['transforms'] = [
181
124
  getattr(trf, 'name', None) for trf in cfg.transforms
182
125
  ]
@@ -185,88 +128,6 @@ def _check_sections(
185
128
  return sections
186
129
 
187
130
 
188
- def _materialize_file_payload(
189
- source: object,
190
- *,
191
- format_hint: str | None,
192
- format_explicit: bool,
193
- ) -> JSONData | object:
194
- """
195
- Return structured payloads when ``source`` references a file.
196
-
197
- Parameters
198
- ----------
199
- source : object
200
- Input source of data, possibly a file path.
201
- format_hint : str | None
202
- Explicit format hint: 'json', 'csv', or None to infer.
203
- format_explicit : bool
204
- Whether an explicit format hint was provided.
205
-
206
- Returns
207
- -------
208
- JSONData | object
209
- Parsed JSON data when ``source`` is a file; otherwise the original
210
- ``source`` object.
211
- """
212
- if isinstance(source, (dict, list)):
213
- return cast(JSONData, source)
214
- if not isinstance(source, (str, os.PathLike)):
215
- return source
216
-
217
- path = Path(source)
218
-
219
- normalized_hint = (format_hint or '').strip().lower()
220
- fmt: FileFormat | None = None
221
-
222
- if format_explicit and normalized_hint:
223
- try:
224
- fmt = FileFormat(normalized_hint)
225
- except ValueError:
226
- fmt = None
227
- elif not format_explicit:
228
- suffix = path.suffix.lower().lstrip('.')
229
- if suffix:
230
- try:
231
- fmt = FileFormat(suffix)
232
- except ValueError:
233
- fmt = None
234
-
235
- if fmt is None:
236
- return source
237
- if fmt == FileFormat.CSV:
238
- return _read_csv_rows(path)
239
- return File(path, fmt).read()
240
-
241
-
242
- def _parse_text_payload(
243
- text: str,
244
- fmt: str | None,
245
- ) -> JSONData | str:
246
- """
247
- Parse JSON/CSV text into a Python payload.
248
-
249
- Parameters
250
- ----------
251
- text : str
252
- The input text payload.
253
- fmt : str | None
254
- Explicit format hint: 'json', 'csv', or None to infer.
255
-
256
- Returns
257
- -------
258
- JSONData | str
259
- The parsed payload as JSON data or raw text.
260
- """
261
- effective = (fmt or '').strip().lower() or _infer_payload_format(text)
262
- if effective == 'json':
263
- return cast(JSONData, json_type(text))
264
- if effective == 'csv':
265
- reader = csv.DictReader(io.StringIO(text))
266
- return [dict(row) for row in reader]
267
- return text
268
-
269
-
270
131
  def _pipeline_summary(
271
132
  cfg: PipelineConfig,
272
133
  ) -> dict[str, Any]:
@@ -295,229 +156,183 @@ def _pipeline_summary(
295
156
  }
296
157
 
297
158
 
298
- def _presentation_flags(
299
- args: argparse.Namespace,
300
- ) -> tuple[bool, bool]:
301
- """
302
- Return presentation toggles from the parsed namespace.
303
-
304
- Parameters
305
- ----------
306
- args : argparse.Namespace
307
- Namespace produced by the CLI parser.
308
-
309
- Returns
310
- -------
311
- tuple[bool, bool]
312
- Pair of ``(pretty, quiet)`` flags with safe defaults.
313
- """
314
- return getattr(args, 'pretty', True), getattr(args, 'quiet', False)
315
-
316
-
317
- def _read_csv_rows(
318
- path: Path,
319
- ) -> list[dict[str, str]]:
320
- """
321
- Read CSV rows into dictionaries.
322
-
323
- Parameters
324
- ----------
325
- path : Path
326
- Path to a CSV file.
327
-
328
- Returns
329
- -------
330
- list[dict[str, str]]
331
- List of dictionaries, each representing a row in the CSV file.
332
- """
333
- with path.open(newline='', encoding='utf-8') as handle:
334
- reader = csv.DictReader(handle)
335
- return [dict(row) for row in reader]
336
-
337
-
338
- def _read_stdin_text() -> str:
339
- """
340
- Return every character from ``stdin`` as a single string.
341
-
342
- Returns
343
- -------
344
- str
345
- Entire ``stdin`` contents.
346
- """
347
- return sys.stdin.read()
348
-
349
-
350
- def _resolve_cli_payload(
351
- source: object,
352
- *,
353
- format_hint: str | None,
354
- format_explicit: bool,
355
- hydrate_files: bool = True,
356
- ) -> JSONData | object:
357
- """
358
- Normalize CLI-provided payloads, honoring stdin and inline data.
359
-
360
- Parameters
361
- ----------
362
- source : object
363
- Raw CLI value (path, inline payload, or ``'-'`` for stdin).
364
- format_hint : str | None
365
- Explicit format hint supplied by the CLI option.
366
- format_explicit : bool
367
- Flag indicating whether the format hint was explicitly provided.
368
- hydrate_files : bool, optional
369
- When ``True`` (default) materialize file paths into structured data.
370
- When ``False``, keep the original path so downstream code can stream
371
- from disk directly.
372
-
373
- Returns
374
- -------
375
- JSONData | object
376
- Parsed payload or the original source value when hydration is
377
- disabled.
378
- """
379
- if isinstance(source, (os.PathLike, str)) and str(source) == '-':
380
- text = _read_stdin_text()
381
- return _parse_text_payload(text, format_hint)
382
-
383
- if not hydrate_files:
384
- return source
385
-
386
- return _materialize_file_payload(
387
- source,
388
- format_hint=format_hint,
389
- format_explicit=format_explicit,
390
- )
391
-
392
-
393
- def _write_json_output(
394
- data: Any,
395
- output_path: str | None,
396
- *,
397
- success_message: str,
398
- ) -> bool:
399
- """
400
- Optionally persist JSON data to disk.
401
-
402
- Parameters
403
- ----------
404
- data : Any
405
- Data to write.
406
- output_path : str | None
407
- Path to write the output to. None to print to stdout.
408
- success_message : str
409
- Message to print upon successful write.
410
-
411
- Returns
412
- -------
413
- bool
414
- True if output was written to a file, False if printed to stdout.
415
- """
416
- if not output_path or output_path == '-':
417
- return False
418
- File(Path(output_path), FileFormat.JSON).write_json(data)
419
- print(f'{success_message} {output_path}')
420
- return True
421
-
422
-
423
159
  # SECTION: FUNCTIONS ======================================================== #
424
160
 
425
161
 
426
162
  def check_handler(
427
- args: argparse.Namespace,
163
+ *,
164
+ config: str,
165
+ jobs: bool = False,
166
+ pipelines: bool = False,
167
+ sources: bool = False,
168
+ summary: bool = False,
169
+ targets: bool = False,
170
+ transforms: bool = False,
171
+ substitute: bool = True,
172
+ pretty: bool = True,
428
173
  ) -> int:
429
174
  """
430
175
  Print requested pipeline sections from a YAML configuration.
431
176
 
432
177
  Parameters
433
178
  ----------
434
- args : argparse.Namespace
435
- Parsed command-line arguments.
179
+ config : str
180
+ Path to the pipeline YAML configuration.
181
+ jobs : bool, optional
182
+ Whether to include job metadata. Default is ``False``.
183
+ pipelines : bool, optional
184
+ Whether to include pipeline metadata. Default is ``False``.
185
+ sources : bool, optional
186
+ Whether to include source metadata. Default is ``False``.
187
+ summary : bool, optional
188
+ Whether to print a full summary of the pipeline. Default is ``False``.
189
+ targets : bool, optional
190
+ Whether to include target metadata. Default is ``False``.
191
+ transforms : bool, optional
192
+ Whether to include transform metadata. Default is ``False``.
193
+ substitute : bool, optional
194
+ Whether to perform environment variable substitution. Default is
195
+ ``True``.
196
+ pretty : bool, optional
197
+ Whether to pretty-print output. Default is ``True``.
436
198
 
437
199
  Returns
438
200
  -------
439
201
  int
440
202
  Zero on success.
203
+
441
204
  """
442
- cfg = load_pipeline_config(args.config, substitute=True)
443
- if getattr(args, 'summary', False):
444
- print_json(_pipeline_summary(cfg))
205
+ cfg = load_pipeline_config(config, substitute=substitute)
206
+ if summary:
207
+ cli_io.emit_json(_pipeline_summary(cfg), pretty=True)
445
208
  return 0
446
209
 
447
- print_json(_check_sections(cfg, args))
210
+ cli_io.emit_json(
211
+ _check_sections(
212
+ cfg,
213
+ jobs=jobs,
214
+ pipelines=pipelines,
215
+ sources=sources,
216
+ targets=targets,
217
+ transforms=transforms,
218
+ ),
219
+ pretty=pretty,
220
+ )
448
221
  return 0
449
222
 
450
223
 
451
224
  def extract_handler(
452
- args: argparse.Namespace,
225
+ *,
226
+ source_type: str,
227
+ source: str,
228
+ format_hint: str | None = None,
229
+ format_explicit: bool = False,
230
+ target: str | None = None,
231
+ output: str | None = None,
232
+ pretty: bool = True,
453
233
  ) -> int:
454
234
  """
455
235
  Extract data from a source.
456
236
 
457
237
  Parameters
458
238
  ----------
459
- args : argparse.Namespace
460
- Parsed command-line arguments.
239
+ source_type : str
240
+ The type of the source (e.g., 'file', 'api', 'database').
241
+ source : str
242
+ The source identifier (e.g., path, URL, DSN).
243
+ format_hint : str | None, optional
244
+ An optional format hint (e.g., 'json', 'csv'). Default is ``None``.
245
+ format_explicit : bool, optional
246
+ Whether the format hint was explicitly provided. Default is ``False``.
247
+ target : str | None, optional
248
+ The target destination (e.g., path, database). Default is ``None``.
249
+ output : str | None, optional
250
+ Path to write output data. Default is ``None``.
251
+ pretty : bool, optional
252
+ Whether to pretty-print output. Default is ``True``.
461
253
 
462
254
  Returns
463
255
  -------
464
256
  int
465
257
  Zero on success.
258
+
466
259
  """
467
- pretty, _ = _presentation_flags(args)
468
- explicit_format = _explicit_cli_format(args)
260
+ explicit_format = format_hint if format_explicit else None
469
261
 
470
- if args.source == '-':
471
- text = _read_stdin_text()
472
- payload = _parse_text_payload(text, getattr(args, 'format', None))
473
- _emit_json(payload, pretty=pretty)
262
+ if source == '-':
263
+ text = cli_io.read_stdin_text()
264
+ payload = cli_io.parse_text_payload(
265
+ text,
266
+ format_hint,
267
+ )
268
+ cli_io.emit_json(payload, pretty=pretty)
474
269
 
475
270
  return 0
476
271
 
477
272
  result = extract(
478
- args.source_type,
479
- args.source,
273
+ source_type,
274
+ source,
480
275
  file_format=explicit_format,
481
276
  )
482
- output_path = getattr(args, 'target', None)
483
- if output_path is None:
484
- output_path = getattr(args, 'output', None)
277
+ output_path = target or output
485
278
 
486
- if not _write_json_output(
279
+ cli_io.emit_or_write(
487
280
  result,
488
281
  output_path,
282
+ pretty=pretty,
489
283
  success_message='Data extracted and saved to',
490
- ):
491
- _emit_json(result, pretty=pretty)
284
+ )
492
285
 
493
286
  return 0
494
287
 
495
288
 
496
289
  def load_handler(
497
- args: argparse.Namespace,
290
+ *,
291
+ source: str,
292
+ target_type: str,
293
+ target: str,
294
+ source_format: str | None = None,
295
+ target_format: str | None = None,
296
+ format_explicit: bool = False,
297
+ output: str | None = None,
298
+ pretty: bool = True,
498
299
  ) -> int:
499
300
  """
500
301
  Load data into a target.
501
302
 
502
303
  Parameters
503
304
  ----------
504
- args : argparse.Namespace
505
- Parsed command-line arguments.
305
+ source : str
306
+ The source payload (e.g., path, inline data).
307
+ target_type : str
308
+ The type of the target (e.g., 'file', 'database').
309
+ target : str
310
+ The target destination (e.g., path, DSN).
311
+ source_format : str | None, optional
312
+ An optional source format hint (e.g., 'json', 'csv'). Default is
313
+ ``None``.
314
+ target_format : str | None, optional
315
+ An optional target format hint (e.g., 'json', 'csv'). Default is
316
+ ``None``.
317
+ format_explicit : bool, optional
318
+ Whether the format hint was explicitly provided. Default is ``False``.
319
+ output : str | None, optional
320
+ Path to write output data. Default is ``None``.
321
+ pretty : bool, optional
322
+ Whether to pretty-print output. Default is ``True``.
506
323
 
507
324
  Returns
508
325
  -------
509
326
  int
510
327
  Zero on success.
511
328
  """
512
- pretty, _ = _presentation_flags(args)
513
- explicit_format = _explicit_cli_format(args)
329
+ explicit_format = target_format if format_explicit else None
514
330
 
515
331
  # Allow piping into load.
516
- source_format = getattr(args, 'source_format', None)
517
332
  source_value = cast(
518
333
  str | Path | os.PathLike[str] | dict[str, Any] | list[dict[str, Any]],
519
- _resolve_cli_payload(
520
- args.source,
334
+ cli_io.resolve_cli_payload(
335
+ source,
521
336
  format_hint=source_format,
522
337
  format_explicit=source_format is not None,
523
338
  hydrate_files=False,
@@ -525,93 +340,81 @@ def load_handler(
525
340
  )
526
341
 
527
342
  # Allow piping out of load for file targets.
528
- if args.target_type == 'file' and args.target == '-':
529
- payload = _materialize_file_payload(
343
+ if target_type == 'file' and target == '-':
344
+ payload = cli_io.materialize_file_payload(
530
345
  source_value,
531
346
  format_hint=source_format,
532
347
  format_explicit=source_format is not None,
533
348
  )
534
- _emit_json(payload, pretty=pretty)
349
+ cli_io.emit_json(payload, pretty=pretty)
535
350
  return 0
536
351
 
537
352
  result = load(
538
353
  source_value,
539
- args.target_type,
540
- args.target,
354
+ target_type,
355
+ target,
541
356
  file_format=explicit_format,
542
357
  )
543
358
 
544
- output_path = getattr(args, 'output', None)
545
- if not _write_json_output(
359
+ output_path = output
360
+ cli_io.emit_or_write(
546
361
  result,
547
362
  output_path,
363
+ pretty=pretty,
548
364
  success_message='Load result saved to',
549
- ):
550
- _emit_json(result, pretty=pretty)
365
+ )
551
366
 
552
367
  return 0
553
368
 
554
369
 
555
- def pipeline_handler(
556
- args: argparse.Namespace,
370
+ def render_handler(
371
+ *,
372
+ config: str | None = None,
373
+ spec: str | None = None,
374
+ table: str | None = None,
375
+ template: TemplateKey | None = None,
376
+ template_path: str | None = None,
377
+ output: str | None = None,
378
+ pretty: bool = True,
379
+ quiet: bool = False,
557
380
  ) -> int:
558
381
  """
559
- Inspect or run a pipeline YAML configuration.
382
+ Render SQL DDL statements from table schema specs.
560
383
 
561
384
  Parameters
562
385
  ----------
563
- args : argparse.Namespace
564
- Parsed command-line arguments.
386
+ config : str | None, optional
387
+ Path to a pipeline YAML configuration. Default is ``None``.
388
+ spec : str | None, optional
389
+ Path to a standalone table spec file. Default is ``None``.
390
+ table : str | None, optional
391
+ Table name filter. Default is ``None``.
392
+ template : TemplateKey | None, optional
393
+ The template key to use for rendering. Default is ``None``.
394
+ template_path : str | None, optional
395
+ Path to a custom template file. Default is ``None``.
396
+ output : str | None, optional
397
+ Path to write output SQL. Default is ``None``.
398
+ pretty : bool, optional
399
+ Whether to pretty-print output. Default is ``True``.
400
+ quiet : bool, optional
401
+ Whether to suppress non-error output. Default is ``False``.
565
402
 
566
403
  Returns
567
404
  -------
568
405
  int
569
406
  Zero on success.
570
407
  """
571
- print(
572
- 'DEPRECATED: use "etlplus check --summary|--jobs" or '
573
- '"etlplus run --job/--pipeline" instead of "etlplus pipeline".',
574
- file=sys.stderr,
575
- )
576
-
577
- cfg = load_pipeline_config(args.config, substitute=True)
578
-
579
- list_flag = getattr(args, 'list', False) or getattr(args, 'jobs', False)
580
- run_target = (
581
- getattr(args, 'run', None)
582
- or getattr(args, 'job', None)
583
- or getattr(args, 'pipeline', None)
584
- )
585
-
586
- if list_flag and not run_target:
587
- print_json({'jobs': _pipeline_summary(cfg)['jobs']})
588
- return 0
589
-
590
- if run_target:
591
- result = run(job=run_target, config_path=args.config)
592
- print_json({'status': 'ok', 'result': result})
593
- return 0
594
-
595
- print_json(_pipeline_summary(cfg))
596
- return 0
597
-
598
-
599
- def render_handler(
600
- args: argparse.Namespace,
601
- ) -> int:
602
- """Render SQL DDL statements from table schema specs."""
603
- _, quiet = _presentation_flags(args)
604
-
605
- template_value = getattr(args, 'template', 'ddl') or 'ddl'
606
- template_path = getattr(args, 'template_path', None)
607
- table_filter = getattr(args, 'table', None)
608
- spec_path = getattr(args, 'spec', None)
609
- config_path = getattr(args, 'config', None)
408
+ template_value: TemplateKey = template or 'ddl'
409
+ template_path_override = template_path
410
+ table_filter = table
411
+ spec_path = spec
412
+ config_path = config
610
413
 
611
414
  # If the provided template points to a file, treat it as a path override.
612
- file_override = template_path
613
- template_key = template_value
614
- if template_path is None:
415
+ file_override = template_path_override
416
+ template_key: TemplateKey | None = template_value
417
+ if template_path_override is None:
615
418
  candidate_path = Path(template_value)
616
419
  if candidate_path.exists():
617
420
  file_override = str(candidate_path)
@@ -644,121 +447,201 @@ def render_handler(
644
447
  sql_text = (
645
448
  '\n'.join(chunk.rstrip() for chunk in rendered_chunks).rstrip() + '\n'
646
449
  )
450
+ rendered_output = sql_text if pretty else sql_text.rstrip('\n')
647
451
 
648
- output_path = getattr(args, 'output', None)
452
+ output_path = output
649
453
  if output_path and output_path != '-':
650
- Path(output_path).write_text(sql_text, encoding='utf-8')
454
+ Path(output_path).write_text(rendered_output, encoding='utf-8')
651
455
  if not quiet:
652
456
  print(f'Rendered {len(specs)} schema(s) to {output_path}')
653
457
  return 0
654
458
 
655
- print(sql_text)
459
+ print(rendered_output)
656
460
  return 0
657
461
 
658
462
 
659
463
  def run_handler(
660
- args: argparse.Namespace,
464
+ *,
465
+ config: str,
466
+ job: str | None = None,
467
+ pipeline: str | None = None,
468
+ pretty: bool = True,
661
469
  ) -> int:
662
470
  """
663
471
  Execute an ETL job end-to-end from a pipeline YAML configuration.
664
472
 
665
473
  Parameters
666
474
  ----------
667
- args : argparse.Namespace
668
- Parsed command-line arguments.
475
+ config : str
476
+ Path to the pipeline YAML configuration.
477
+ job : str | None, optional
478
+ Name of the job to run. If not provided, runs the entire pipeline.
479
+ Default is ``None``.
480
+ pipeline : str | None, optional
481
+ Alias for ``job``. Default is ``None``.
482
+ pretty : bool, optional
483
+ Whether to pretty-print output. Default is ``True``.
669
484
 
670
485
  Returns
671
486
  -------
672
487
  int
673
488
  Zero on success.
674
489
  """
675
- cfg = load_pipeline_config(args.config, substitute=True)
490
+ cfg = load_pipeline_config(config, substitute=True)
676
491
 
677
- job_name = getattr(args, 'job', None) or getattr(args, 'pipeline', None)
492
+ job_name = job or pipeline
678
493
  if job_name:
679
- result = run(job=job_name, config_path=args.config)
680
- print_json({'status': 'ok', 'result': result})
494
+ result = run(job=job_name, config_path=config)
495
+ cli_io.emit_json({'status': 'ok', 'result': result}, pretty=pretty)
681
496
  return 0
682
497
 
683
- print_json(_pipeline_summary(cfg))
498
+ cli_io.emit_json(_pipeline_summary(cfg), pretty=pretty)
684
499
  return 0
685
500
 
686
501
 
502
+ TransformOperations = Mapping[
503
+ Literal['filter', 'map', 'select', 'sort', 'aggregate'],
504
+ Any,
505
+ ]
506
+
507
+
687
508
  def transform_handler(
688
- args: argparse.Namespace,
509
+ *,
510
+ source: str,
511
+ operations: JSONData | str,
512
+ target: str | None = None,
513
+ source_format: str | None = None,
514
+ target_format: str | None = None,
515
+ pretty: bool = True,
516
+ format_explicit: bool = False,
689
517
  ) -> int:
690
518
  """
691
519
  Transform data from a source.
692
520
 
693
521
  Parameters
694
522
  ----------
695
- args : argparse.Namespace
696
- Parsed command-line arguments.
523
+ source : str
524
+ The source payload (e.g., path, inline data).
525
+ operations : JSONData | str
526
+ The transformation operations (inline JSON or path).
527
+ target : str | None, optional
528
+ The target destination (e.g., path). Default is ``None``.
529
+ source_format : str | None, optional
530
+ An optional source format hint (e.g., 'json', 'csv'). Default is
531
+ ``None``.
532
+ target_format : str | None, optional
533
+ An optional target format hint (e.g., 'json', 'csv'). Default is
534
+ ``None``.
535
+ pretty : bool, optional
536
+ Whether to pretty-print output. Default is ``True``.
537
+ format_explicit : bool, optional
538
+ Whether the format hint was explicitly provided. Default is ``False``.
697
539
 
698
540
  Returns
699
541
  -------
700
542
  int
701
543
  Zero on success.
544
+
545
+ Raises
546
+ ------
547
+ ValueError
548
+ If the operations payload is not a mapping.
702
549
  """
703
- pretty, _ = _presentation_flags(args)
704
- format_hint: str | None = getattr(args, 'source_format', None)
705
- format_explicit: bool = format_hint is not None
550
+ format_hint: str | None = source_format
551
+ format_explicit = format_hint is not None or format_explicit
706
552
 
707
553
  payload = cast(
708
554
  JSONData | str,
709
- _resolve_cli_payload(
710
- args.source,
555
+ cli_io.resolve_cli_payload(
556
+ source,
711
557
  format_hint=format_hint,
712
558
  format_explicit=format_explicit,
713
559
  ),
714
560
  )
715
561
 
716
- data = transform(payload, args.operations)
562
+ operations_payload = cli_io.resolve_cli_payload(
563
+ operations,
564
+ format_hint=None,
565
+ format_explicit=format_explicit,
566
+ )
567
+ if not isinstance(operations_payload, dict):
568
+ raise ValueError('operations must resolve to a mapping of transforms')
717
569
 
718
- if not _write_json_output(
719
- data,
720
- getattr(args, 'target', None),
721
- success_message='Data transformed and saved to',
722
- ):
723
- _emit_json(data, pretty=pretty)
570
+ data = transform(payload, cast(TransformOperations, operations_payload))
724
571
 
572
+ if target and target != '-':
573
+ File.write_file(target, data, file_format=target_format)
574
+ print(f'Data transformed and saved to {target}')
575
+ return 0
576
+
577
+ cli_io.emit_json(data, pretty=pretty)
725
578
  return 0
726
579
 
727
580
 
728
581
  def validate_handler(
729
- args: argparse.Namespace,
582
+ *,
583
+ source: str,
584
+ rules: JSONData | str,
585
+ source_format: str | None = None,
586
+ target: str | None = None,
587
+ format_explicit: bool = False,
588
+ pretty: bool = True,
730
589
  ) -> int:
731
590
  """
732
591
  Validate data from a source.
733
592
 
734
593
  Parameters
735
594
  ----------
736
- args : argparse.Namespace
737
- Parsed command-line arguments.
595
+ source : str
596
+ The source payload (e.g., path, inline data).
597
+ rules : JSONData | str
598
+ The validation rules (inline JSON or path).
599
+ source_format : str | None, optional
600
+ An optional source format hint (e.g., 'json', 'csv'). Default is
601
+ ``None``.
602
+ target : str | None, optional
603
+ The target destination (e.g., path). Default is ``None``.
604
+ format_explicit : bool, optional
605
+ Whether the format hint was explicitly provided. Default is ``False``.
606
+ pretty : bool, optional
607
+ Whether to pretty-print output. Default is ``True``.
738
608
 
739
609
  Returns
740
610
  -------
741
611
  int
742
612
  Zero on success.
613
+
614
+ Raises
615
+ ------
616
+ ValueError
617
+ If the rules payload is not a mapping.
743
618
  """
744
- pretty, _ = _presentation_flags(args)
745
- format_explicit: bool = getattr(args, '_format_explicit', False)
746
- format_hint: str | None = getattr(args, 'source_format', None)
619
+ format_hint: str | None = source_format
747
620
  payload = cast(
748
621
  JSONData | str,
749
- _resolve_cli_payload(
750
- args.source,
622
+ cli_io.resolve_cli_payload(
623
+ source,
751
624
  format_hint=format_hint,
752
625
  format_explicit=format_explicit,
753
626
  ),
754
627
  )
755
- result = validate(payload, args.rules)
756
628
 
757
- target_path = getattr(args, 'target', None)
629
+ rules_payload = cli_io.resolve_cli_payload(
630
+ rules,
631
+ format_hint=None,
632
+ format_explicit=format_explicit,
633
+ )
634
+ if not isinstance(rules_payload, dict):
635
+ raise ValueError('rules must resolve to a mapping of field rules')
636
+
637
+ field_rules = cast(Mapping[str, FieldRules], rules_payload)
638
+ result = validate(payload, field_rules)
639
+
640
+ target_path = target
758
641
  if target_path:
759
642
  validated_data = result.get('data')
760
643
  if validated_data is not None:
761
- _write_json_output(
644
+ cli_io.write_json_output(
762
645
  validated_data,
763
646
  target_path,
764
647
  success_message='Validation result saved to',
@@ -769,6 +652,6 @@ def validate_handler(
769
652
  file=sys.stderr,
770
653
  )
771
654
  else:
772
- _emit_json(result, pretty=pretty)
655
+ cli_io.emit_json(result, pretty=pretty)
773
656
 
774
657
  return 0