etlplus 0.4.7__py3-none-any.whl → 0.8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
etlplus/cli/handlers.py CHANGED
@@ -6,124 +6,120 @@ Command handler functions for the ``etlplus`` command-line interface (CLI).
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
- import argparse
10
- import csv
11
- import io
12
- import json
13
9
  import os
14
10
  import sys
11
+ from collections.abc import Mapping
15
12
  from pathlib import Path
16
13
  from typing import Any
14
+ from typing import Literal
17
15
  from typing import cast
18
16
 
19
17
  from ..config import PipelineConfig
20
18
  from ..config import load_pipeline_config
21
- from ..enums import FileFormat
19
+ from ..database import load_table_spec
20
+ from ..database import render_tables
22
21
  from ..extract import extract
23
22
  from ..file import File
24
23
  from ..load import load
25
24
  from ..run import run
26
25
  from ..transform import transform
27
26
  from ..types import JSONData
28
- from ..utils import json_type
29
- from ..utils import print_json
27
+ from ..types import TemplateKey
28
+ from ..validate import FieldRules
30
29
  from ..validate import validate
30
+ from . import io as cli_io
31
31
 
32
32
  # SECTION: EXPORTS ========================================================== #
33
33
 
34
34
 
35
35
  __all__ = [
36
36
  # Functions
37
- 'cmd_extract',
38
- 'cmd_list',
39
- 'cmd_load',
40
- 'cmd_pipeline',
41
- 'cmd_run',
42
- 'cmd_transform',
43
- 'cmd_validate',
37
+ 'extract_handler',
38
+ 'check_handler',
39
+ 'load_handler',
40
+ 'render_handler',
41
+ 'run_handler',
42
+ 'transform_handler',
43
+ 'validate_handler',
44
44
  ]
45
45
 
46
46
 
47
47
  # SECTION: INTERNAL FUNCTIONS =============================================== #
48
48
 
49
49
 
50
- def _emit_json(
51
- data: Any,
52
- *,
53
- pretty: bool,
54
- ) -> None:
50
+ def _collect_table_specs(
51
+ config_path: str | None,
52
+ spec_path: str | None,
53
+ ) -> list[dict[str, Any]]:
55
54
  """
56
- Emit JSON to stdout honoring the pretty/compact preference.
55
+ Load table schemas from a pipeline config and/or standalone spec.
57
56
 
58
57
  Parameters
59
58
  ----------
60
- data : Any
61
- Arbitrary JSON-serializable payload.
62
- pretty : bool
63
- When ``True`` pretty-print via :func:`print_json`; otherwise emit a
64
- compact JSON string.
65
- """
66
- if pretty:
67
- print_json(data)
68
- return
69
-
70
- dumped = json.dumps(
71
- data,
72
- ensure_ascii=False,
73
- separators=(',', ':'),
74
- )
75
- print(dumped)
76
-
77
-
78
- def _infer_payload_format(
79
- text: str,
80
- ) -> str:
81
- """
82
- Infer JSON vs CSV from payload text.
83
-
84
- Parameters
85
- ----------
86
- text : str
87
- Incoming payload as plain text.
59
+ config_path : str | None
60
+ Path to a pipeline YAML config file.
61
+ spec_path : str | None
62
+ Path to a standalone table spec file.
88
63
 
89
64
  Returns
90
65
  -------
91
- str
92
- ``'json'`` when the text starts with ``{``/``[``, else ``'csv'``.
66
+ list[dict[str, Any]]
67
+ Collected table specification mappings.
93
68
  """
94
- stripped = text.lstrip()
95
- if stripped.startswith('{') or stripped.startswith('['):
96
- return 'json'
97
- return 'csv'
69
+ specs: list[dict[str, Any]] = []
70
+
71
+ if spec_path:
72
+ specs.append(dict(load_table_spec(Path(spec_path))))
73
+
74
+ if config_path:
75
+ cfg = load_pipeline_config(config_path, substitute=True)
76
+ specs.extend(getattr(cfg, 'table_schemas', []))
77
+
78
+ return specs
98
79
 
99
80
 
100
- def _list_sections(
81
+ def _check_sections(
101
82
  cfg: PipelineConfig,
102
- args: argparse.Namespace,
83
+ *,
84
+ jobs: bool,
85
+ pipelines: bool,
86
+ sources: bool,
87
+ targets: bool,
88
+ transforms: bool,
103
89
  ) -> dict[str, Any]:
104
90
  """
105
- Build sectioned metadata output for the list command.
91
+ Build sectioned metadata output for the check command.
106
92
 
107
93
  Parameters
108
94
  ----------
109
95
  cfg : PipelineConfig
110
96
  The loaded pipeline configuration.
111
- args : argparse.Namespace
112
- Parsed command-line arguments.
97
+ jobs : bool
98
+ Whether to include job metadata.
99
+ pipelines : bool
100
+ Whether to include pipeline metadata.
101
+ sources : bool
102
+ Whether to include source metadata.
103
+ targets : bool
104
+ Whether to include target metadata.
105
+ transforms : bool
106
+ Whether to include transform metadata.
113
107
 
114
108
  Returns
115
109
  -------
116
110
  dict[str, Any]
117
- Metadata output for the list command.
111
+ Metadata output for the check command.
118
112
  """
119
113
  sections: dict[str, Any] = {}
120
- if getattr(args, 'pipelines', False):
114
+ if jobs:
115
+ sections['jobs'] = _pipeline_summary(cfg)['jobs']
116
+ if pipelines:
121
117
  sections['pipelines'] = [cfg.name]
122
- if getattr(args, 'sources', False):
118
+ if sources:
123
119
  sections['sources'] = [src.name for src in cfg.sources]
124
- if getattr(args, 'targets', False):
120
+ if targets:
125
121
  sections['targets'] = [tgt.name for tgt in cfg.targets]
126
- if getattr(args, 'transforms', False):
122
+ if transforms:
127
123
  sections['transforms'] = [
128
124
  getattr(trf, 'name', None) for trf in cfg.transforms
129
125
  ]
@@ -132,106 +128,6 @@ def _list_sections(
132
128
  return sections
133
129
 
134
130
 
135
- def _explicit_cli_format(
136
- args: argparse.Namespace,
137
- ) -> str | None:
138
- """Return the explicit CLI format hint when provided."""
139
-
140
- if not getattr(args, '_format_explicit', False):
141
- return None
142
- for attr in ('format', 'target_format', 'source_format'):
143
- value = getattr(args, attr, None)
144
- if value is None:
145
- continue
146
- normalized = value.strip().lower()
147
- if normalized:
148
- return normalized
149
- return None
150
-
151
-
152
- def _materialize_file_payload(
153
- source: object,
154
- *,
155
- format_hint: str | None,
156
- format_explicit: bool,
157
- ) -> JSONData | object:
158
- """
159
- Return structured payloads when ``source`` references a file.
160
-
161
- Parameters
162
- ----------
163
- source : object
164
- Input source of data, possibly a file path.
165
- format_hint : str | None
166
- Explicit format hint: 'json', 'csv', or None to infer.
167
- format_explicit : bool
168
- Whether an explicit format hint was provided.
169
-
170
- Returns
171
- -------
172
- JSONData | object
173
- Parsed JSON data when ``source`` is a file; otherwise the original
174
- ``source`` object.
175
- """
176
- if isinstance(source, (dict, list)):
177
- return cast(JSONData, source)
178
- if not isinstance(source, (str, os.PathLike)):
179
- return source
180
-
181
- path = Path(source)
182
-
183
- normalized_hint = (format_hint or '').strip().lower()
184
- fmt: FileFormat | None = None
185
-
186
- if format_explicit and normalized_hint:
187
- try:
188
- fmt = FileFormat(normalized_hint)
189
- except ValueError:
190
- fmt = None
191
- elif not format_explicit:
192
- suffix = path.suffix.lower().lstrip('.')
193
- if suffix:
194
- try:
195
- fmt = FileFormat(suffix)
196
- except ValueError:
197
- fmt = None
198
-
199
- if fmt is None:
200
- return source
201
- if fmt == FileFormat.CSV:
202
- return _read_csv_rows(path)
203
- return File(path, fmt).read()
204
-
205
-
206
- def _parse_text_payload(
207
- text: str,
208
- fmt: str | None,
209
- ) -> JSONData | str:
210
- """
211
- Parse JSON/CSV text into a Python payload.
212
-
213
- Parameters
214
- ----------
215
- text : str
216
- The input text payload.
217
- fmt : str | None
218
- Explicit format hint: 'json', 'csv', or None to infer.
219
-
220
- Returns
221
- -------
222
- JSONData | str
223
- The parsed payload as JSON data or raw text.
224
- """
225
-
226
- effective = (fmt or '').strip().lower() or _infer_payload_format(text)
227
- if effective == 'json':
228
- return cast(JSONData, json_type(text))
229
- if effective == 'csv':
230
- reader = csv.DictReader(io.StringIO(text))
231
- return [dict(row) for row in reader]
232
- return text
233
-
234
-
235
131
  def _pipeline_summary(
236
132
  cfg: PipelineConfig,
237
133
  ) -> dict[str, Any]:
@@ -260,406 +156,502 @@ def _pipeline_summary(
260
156
  }
261
157
 
262
158
 
263
- def _presentation_flags(
264
- args: argparse.Namespace,
265
- ) -> tuple[bool, bool]:
266
- """Return presentation toggles from the parsed namespace.
267
-
268
- Parameters
269
- ----------
270
- args : argparse.Namespace
271
- Namespace produced by the CLI parser.
272
-
273
- Returns
274
- -------
275
- tuple[bool, bool]
276
- Pair of ``(pretty, quiet)`` flags with safe defaults.
277
- """
278
- return getattr(args, 'pretty', True), getattr(args, 'quiet', False)
279
-
280
-
281
- def _read_csv_rows(
282
- path: Path,
283
- ) -> list[dict[str, str]]:
284
- """
285
- Read CSV rows into dictionaries.
286
-
287
- Parameters
288
- ----------
289
- path : Path
290
- Path to a CSV file.
291
-
292
- Returns
293
- -------
294
- list[dict[str, str]]
295
- List of dictionaries, each representing a row in the CSV file.
296
- """
297
- with path.open(newline='', encoding='utf-8') as handle:
298
- reader = csv.DictReader(handle)
299
- return [dict(row) for row in reader]
300
-
301
-
302
- def _read_stdin_text() -> str:
303
- """
304
- Return every character from ``stdin`` as a single string.
305
-
306
- Returns
307
- -------
308
- str
309
- Entire ``stdin`` contents.
310
- """
311
- return sys.stdin.read()
159
+ # SECTION: FUNCTIONS ======================================================== #
312
160
 
313
161
 
314
- def _resolve_cli_payload(
315
- source: object,
162
+ def check_handler(
316
163
  *,
317
- format_hint: str | None,
318
- format_explicit: bool,
319
- hydrate_files: bool = True,
320
- ) -> JSONData | object:
164
+ config: str,
165
+ jobs: bool = False,
166
+ pipelines: bool = False,
167
+ sources: bool = False,
168
+ summary: bool = False,
169
+ targets: bool = False,
170
+ transforms: bool = False,
171
+ substitute: bool = True,
172
+ pretty: bool = True,
173
+ ) -> int:
321
174
  """
322
- Normalize CLI-provided payloads, honoring stdin and inline data.
175
+ Print requested pipeline sections from a YAML configuration.
323
176
 
324
177
  Parameters
325
178
  ----------
326
- source : object
327
- Raw CLI value (path, inline payload, or ``'-'`` for stdin).
328
- format_hint : str | None
329
- Explicit format hint supplied by the CLI option.
330
- format_explicit : bool
331
- Flag indicating whether the format hint was explicitly provided.
332
- hydrate_files : bool, optional
333
- When ``True`` (default) materialize file paths into structured data.
334
- When ``False``, keep the original path so downstream code can stream
335
- from disk directly.
179
+ config : str
180
+ Path to the pipeline YAML configuration.
181
+ jobs : bool, optional
182
+ Whether to include job metadata. Default is ``False``.
183
+ pipelines : bool, optional
184
+ Whether to include pipeline metadata. Default is ``False``.
185
+ sources : bool, optional
186
+ Whether to include source metadata. Default is ``False``.
187
+ summary : bool, optional
188
+ Whether to print a full summary of the pipeline. Default is ``False``.
189
+ targets : bool, optional
190
+ Whether to include target metadata. Default is ``False``.
191
+ transforms : bool, optional
192
+ Whether to include transform metadata. Default is ``False``.
193
+ substitute : bool, optional
194
+ Whether to perform environment variable substitution. Default is
195
+ ``True``.
196
+ pretty : bool, optional
197
+ Whether to pretty-print output. Default is ``True``.
336
198
 
337
199
  Returns
338
200
  -------
339
- JSONData | object
340
- Parsed payload or the original source value when hydration is
341
- disabled.
342
- """
343
-
344
- if isinstance(source, (os.PathLike, str)) and str(source) == '-':
345
- text = _read_stdin_text()
346
- return _parse_text_payload(text, format_hint)
201
+ int
202
+ Zero on success.
347
203
 
348
- if not hydrate_files:
349
- return source
204
+ """
205
+ cfg = load_pipeline_config(config, substitute=substitute)
206
+ if summary:
207
+ cli_io.emit_json(_pipeline_summary(cfg), pretty=True)
208
+ return 0
350
209
 
351
- return _materialize_file_payload(
352
- source,
353
- format_hint=format_hint,
354
- format_explicit=format_explicit,
210
+ cli_io.emit_json(
211
+ _check_sections(
212
+ cfg,
213
+ jobs=jobs,
214
+ pipelines=pipelines,
215
+ sources=sources,
216
+ targets=targets,
217
+ transforms=transforms,
218
+ ),
219
+ pretty=pretty,
355
220
  )
221
+ return 0
356
222
 
357
223
 
358
- def _write_json_output(
359
- data: Any,
360
- output_path: str | None,
224
+ def extract_handler(
361
225
  *,
362
- success_message: str,
363
- ) -> bool:
364
- """
365
- Optionally persist JSON data to disk.
366
-
367
- Parameters
368
- ----------
369
- data : Any
370
- Data to write.
371
- output_path : str | None
372
- Path to write the output to. None to print to stdout.
373
- success_message : str
374
- Message to print upon successful write.
375
-
376
- Returns
377
- -------
378
- bool
379
- True if output was written to a file, False if printed to stdout.
380
- """
381
- if not output_path or output_path == '-':
382
- return False
383
- File(Path(output_path), FileFormat.JSON).write_json(data)
384
- print(f'{success_message} {output_path}')
385
- return True
386
-
387
-
388
- # SECTION: FUNCTIONS ======================================================== #
389
-
390
-
391
- def cmd_extract(
392
- args: argparse.Namespace,
226
+ source_type: str,
227
+ source: str,
228
+ format_hint: str | None = None,
229
+ format_explicit: bool = False,
230
+ target: str | None = None,
231
+ output: str | None = None,
232
+ pretty: bool = True,
393
233
  ) -> int:
394
234
  """
395
235
  Extract data from a source.
396
236
 
397
237
  Parameters
398
238
  ----------
399
- args : argparse.Namespace
400
- Parsed command-line arguments.
239
+ source_type : str
240
+ The type of the source (e.g., 'file', 'api', 'database').
241
+ source : str
242
+ The source identifier (e.g., path, URL, DSN).
243
+ format_hint : str | None, optional
244
+ An optional format hint (e.g., 'json', 'csv'). Default is ``None``.
245
+ format_explicit : bool, optional
246
+ Whether the format hint was explicitly provided. Default is ``False``.
247
+ target : str | None, optional
248
+ The target destination (e.g., path, database). Default is ``None``.
249
+ output : str | None, optional
250
+ Path to write output data. Default is ``None``.
251
+ pretty : bool, optional
252
+ Whether to pretty-print output. Default is ``True``.
401
253
 
402
254
  Returns
403
255
  -------
404
256
  int
405
257
  Zero on success.
258
+
406
259
  """
407
- pretty, _ = _presentation_flags(args)
408
- explicit_format = _explicit_cli_format(args)
260
+ explicit_format = format_hint if format_explicit else None
409
261
 
410
- if args.source == '-':
411
- text = _read_stdin_text()
412
- payload = _parse_text_payload(text, getattr(args, 'format', None))
413
- _emit_json(payload, pretty=pretty)
262
+ if source == '-':
263
+ text = cli_io.read_stdin_text()
264
+ payload = cli_io.parse_text_payload(
265
+ text,
266
+ format_hint,
267
+ )
268
+ cli_io.emit_json(payload, pretty=pretty)
414
269
 
415
270
  return 0
416
271
 
417
272
  result = extract(
418
- args.source_type,
419
- args.source,
273
+ source_type,
274
+ source,
420
275
  file_format=explicit_format,
421
276
  )
422
- output_path = getattr(args, 'target', None)
423
- if output_path is None:
424
- output_path = getattr(args, 'output', None)
277
+ output_path = target or output
425
278
 
426
- if not _write_json_output(
279
+ cli_io.emit_or_write(
427
280
  result,
428
281
  output_path,
282
+ pretty=pretty,
429
283
  success_message='Data extracted and saved to',
430
- ):
431
- _emit_json(result, pretty=pretty)
284
+ )
432
285
 
433
286
  return 0
434
287
 
435
288
 
436
- def cmd_validate(
437
- args: argparse.Namespace,
289
+ def load_handler(
290
+ *,
291
+ source: str,
292
+ target_type: str,
293
+ target: str,
294
+ source_format: str | None = None,
295
+ target_format: str | None = None,
296
+ format_explicit: bool = False,
297
+ output: str | None = None,
298
+ pretty: bool = True,
438
299
  ) -> int:
439
300
  """
440
- Validate data from a source.
301
+ Load data into a target.
441
302
 
442
303
  Parameters
443
304
  ----------
444
- args : argparse.Namespace
445
- Parsed command-line arguments.
305
+ source : str
306
+ The source payload (e.g., path, inline data).
307
+ target_type : str
308
+ The type of the target (e.g., 'file', 'database').
309
+ target : str
310
+ The target destination (e.g., path, DSN).
311
+ source_format : str | None, optional
312
+ An optional source format hint (e.g., 'json', 'csv'). Default is
313
+ ``None``.
314
+ target_format : str | None, optional
315
+ An optional target format hint (e.g., 'json', 'csv'). Default is
316
+ ``None``.
317
+ format_explicit : bool, optional
318
+ Whether the format hint was explicitly provided. Default is ``False``.
319
+ output : str | None, optional
320
+ Path to write output data. Default is ``None``.
321
+ pretty : bool, optional
322
+ Whether to pretty-print output. Default is ``True``.
446
323
 
447
324
  Returns
448
325
  -------
449
326
  int
450
327
  Zero on success.
451
328
  """
452
- pretty, _quiet = _presentation_flags(args)
453
- format_explicit: bool = getattr(args, '_format_explicit', False)
454
- format_hint: str | None = getattr(args, 'source_format', None)
455
- payload = cast(
456
- JSONData | str,
457
- _resolve_cli_payload(
458
- args.source,
459
- format_hint=format_hint,
460
- format_explicit=format_explicit,
329
+ explicit_format = target_format if format_explicit else None
330
+
331
+ # Allow piping into load.
332
+ source_value = cast(
333
+ str | Path | os.PathLike[str] | dict[str, Any] | list[dict[str, Any]],
334
+ cli_io.resolve_cli_payload(
335
+ source,
336
+ format_hint=source_format,
337
+ format_explicit=source_format is not None,
338
+ hydrate_files=False,
461
339
  ),
462
340
  )
463
- result = validate(payload, args.rules)
464
341
 
465
- target_path = getattr(args, 'target', None)
466
- if target_path:
467
- validated_data = result.get('data')
468
- if validated_data is not None:
469
- _write_json_output(
470
- validated_data,
471
- target_path,
472
- success_message='Validation result saved to',
473
- )
474
- else:
475
- print(
476
- f'Validation failed, no data to save for {target_path}',
477
- file=sys.stderr,
478
- )
479
- else:
480
- _emit_json(result, pretty=pretty)
342
+ # Allow piping out of load for file targets.
343
+ if target_type == 'file' and target == '-':
344
+ payload = cli_io.materialize_file_payload(
345
+ source_value,
346
+ format_hint=source_format,
347
+ format_explicit=source_format is not None,
348
+ )
349
+ cli_io.emit_json(payload, pretty=pretty)
350
+ return 0
351
+
352
+ result = load(
353
+ source_value,
354
+ target_type,
355
+ target,
356
+ file_format=explicit_format,
357
+ )
358
+
359
+ output_path = output
360
+ cli_io.emit_or_write(
361
+ result,
362
+ output_path,
363
+ pretty=pretty,
364
+ success_message='Load result saved to',
365
+ )
481
366
 
482
367
  return 0
483
368
 
484
369
 
485
- def cmd_transform(
486
- args: argparse.Namespace,
370
+ def render_handler(
371
+ *,
372
+ config: str | None = None,
373
+ spec: str | None = None,
374
+ table: str | None = None,
375
+ template: TemplateKey | None = None,
376
+ template_path: str | None = None,
377
+ output: str | None = None,
378
+ pretty: bool = True,
379
+ quiet: bool = False,
487
380
  ) -> int:
488
381
  """
489
- Transform data from a source.
382
+ Render SQL DDL statements from table schema specs.
490
383
 
491
384
  Parameters
492
385
  ----------
493
- args : argparse.Namespace
494
- Parsed command-line arguments.
386
+ config : str | None, optional
387
+ Path to a pipeline YAML configuration. Default is ``None``.
388
+ spec : str | None, optional
389
+ Path to a standalone table spec file. Default is ``None``.
390
+ table : str | None, optional
391
+ Table name filter. Default is ``None``.
392
+ template : TemplateKey | None, optional
393
+ The template key to use for rendering. Default is ``None``.
394
+ template_path : str | None, optional
395
+ Path to a custom template file. Default is ``None``.
396
+ output : str | None, optional
397
+ Path to write output SQL. Default is ``None``.
398
+ pretty : bool, optional
399
+ Whether to pretty-print output. Default is ``True``.
400
+ quiet : bool, optional
401
+ Whether to suppress non-error output. Default is ``False``.
495
402
 
496
403
  Returns
497
404
  -------
498
405
  int
499
406
  Zero on success.
500
407
  """
501
- pretty, _quiet = _presentation_flags(args)
502
- format_hint: str | None = getattr(args, 'source_format', None)
503
- format_explicit: bool = format_hint is not None
408
+ template_value: TemplateKey = template or 'ddl'
409
+ template_path_override = template_path
410
+ table_filter = table
411
+ spec_path = spec
412
+ config_path = config
413
+
414
+ # If the provided template points to a file, treat it as a path override.
415
+ file_override = template_path_override
416
+ template_key: TemplateKey | None = template_value
417
+ if template_path_override is None:
418
+ candidate_path = Path(template_value)
419
+ if candidate_path.exists():
420
+ file_override = str(candidate_path)
421
+ template_key = None
422
+
423
+ specs = _collect_table_specs(config_path, spec_path)
424
+ if table_filter:
425
+ specs = [
426
+ spec
427
+ for spec in specs
428
+ if str(spec.get('table')) == table_filter
429
+ or str(spec.get('name', '')) == table_filter
430
+ ]
504
431
 
505
- payload = cast(
506
- JSONData | str,
507
- _resolve_cli_payload(
508
- args.source,
509
- format_hint=format_hint,
510
- format_explicit=format_explicit,
511
- ),
512
- )
432
+ if not specs:
433
+ target_desc = table_filter or 'table_schemas'
434
+ print(
435
+ 'No table schemas found for '
436
+ f'{target_desc}. Provide --spec or a pipeline --config with '
437
+ 'table_schemas.',
438
+ file=sys.stderr,
439
+ )
440
+ return 1
513
441
 
514
- data = transform(payload, args.operations)
442
+ rendered_chunks = render_tables(
443
+ specs,
444
+ template=template_key,
445
+ template_path=file_override,
446
+ )
447
+ sql_text = (
448
+ '\n'.join(chunk.rstrip() for chunk in rendered_chunks).rstrip() + '\n'
449
+ )
450
+ rendered_output = sql_text if pretty else sql_text.rstrip('\n')
515
451
 
516
- if not _write_json_output(
517
- data,
518
- getattr(args, 'target', None),
519
- success_message='Data transformed and saved to',
520
- ):
521
- _emit_json(data, pretty=pretty)
452
+ output_path = output
453
+ if output_path and output_path != '-':
454
+ Path(output_path).write_text(rendered_output, encoding='utf-8')
455
+ if not quiet:
456
+ print(f'Rendered {len(specs)} schema(s) to {output_path}')
457
+ return 0
522
458
 
459
+ print(rendered_output)
523
460
  return 0
524
461
 
525
462
 
526
- def cmd_load(
527
- args: argparse.Namespace,
463
+ def run_handler(
464
+ *,
465
+ config: str,
466
+ job: str | None = None,
467
+ pipeline: str | None = None,
468
+ pretty: bool = True,
528
469
  ) -> int:
529
470
  """
530
- Load data into a target.
471
+ Execute an ETL job end-to-end from a pipeline YAML configuration.
531
472
 
532
473
  Parameters
533
474
  ----------
534
- args : argparse.Namespace
535
- Parsed command-line arguments.
475
+ config : str
476
+ Path to the pipeline YAML configuration.
477
+ job : str | None, optional
478
+ Name of the job to run. If not provided, runs the entire pipeline.
479
+ Default is ``None``.
480
+ pipeline : str | None, optional
481
+ Alias for ``job``. Default is ``None``.
482
+ pretty : bool, optional
483
+ Whether to pretty-print output. Default is ``True``.
536
484
 
537
485
  Returns
538
486
  -------
539
487
  int
540
488
  Zero on success.
541
489
  """
542
- pretty, _ = _presentation_flags(args)
543
- explicit_format = _explicit_cli_format(args)
544
-
545
- # Allow piping into load.
546
- source_format = getattr(args, 'source_format', None)
547
- source_value = cast(
548
- str | Path | os.PathLike[str] | dict[str, Any] | list[dict[str, Any]],
549
- _resolve_cli_payload(
550
- args.source,
551
- format_hint=source_format,
552
- format_explicit=source_format is not None,
553
- hydrate_files=False,
554
- ),
555
- )
490
+ cfg = load_pipeline_config(config, substitute=True)
556
491
 
557
- # Allow piping out of load for file targets.
558
- if args.target_type == 'file' and args.target == '-':
559
- payload = _materialize_file_payload(
560
- source_value,
561
- format_hint=source_format,
562
- format_explicit=source_format is not None,
563
- )
564
- _emit_json(payload, pretty=pretty)
492
+ job_name = job or pipeline
493
+ if job_name:
494
+ result = run(job=job_name, config_path=config)
495
+ cli_io.emit_json({'status': 'ok', 'result': result}, pretty=pretty)
565
496
  return 0
566
497
 
567
- result = load(
568
- source_value,
569
- args.target_type,
570
- args.target,
571
- file_format=explicit_format,
572
- )
498
+ cli_io.emit_json(_pipeline_summary(cfg), pretty=pretty)
499
+ return 0
573
500
 
574
- output_path = getattr(args, 'output', None)
575
- if not _write_json_output(
576
- result,
577
- output_path,
578
- success_message='Load result saved to',
579
- ):
580
- _emit_json(result, pretty=pretty)
581
501
 
582
- return 0
502
+ TransformOperations = Mapping[
503
+ Literal['filter', 'map', 'select', 'sort', 'aggregate'],
504
+ Any,
505
+ ]
583
506
 
584
507
 
585
- def cmd_pipeline(
586
- args: argparse.Namespace,
508
+ def transform_handler(
509
+ *,
510
+ source: str,
511
+ operations: JSONData | str,
512
+ target: str | None = None,
513
+ source_format: str | None = None,
514
+ target_format: str | None = None,
515
+ pretty: bool = True,
516
+ format_explicit: bool = False,
587
517
  ) -> int:
588
518
  """
589
- Inspect or run a pipeline YAML configuration.
519
+ Transform data from a source.
590
520
 
591
521
  Parameters
592
522
  ----------
593
- args : argparse.Namespace
594
- Parsed command-line arguments.
523
+ source : str
524
+ The source payload (e.g., path, inline data).
525
+ operations : JSONData | str
526
+ The transformation operations (inline JSON or path).
527
+ target : str | None, optional
528
+ The target destination (e.g., path). Default is ``None``.
529
+ source_format : str | None, optional
530
+ An optional source format hint (e.g., 'json', 'csv'). Default is
531
+ ``None``.
532
+ target_format : str | None, optional
533
+ An optional target format hint (e.g., 'json', 'csv'). Default is
534
+ ``None``.
535
+ pretty : bool, optional
536
+ Whether to pretty-print output. Default is ``True``.
537
+ format_explicit : bool, optional
538
+ Whether the format hint was explicitly provided. Default is ``False``.
595
539
 
596
540
  Returns
597
541
  -------
598
542
  int
599
543
  Zero on success.
544
+
545
+ Raises
546
+ ------
547
+ ValueError
548
+ If the operations payload is not a mapping.
600
549
  """
601
- cfg = load_pipeline_config(args.config, substitute=True)
550
+ format_hint: str | None = source_format
551
+ format_explicit = format_hint is not None or format_explicit
552
+
553
+ payload = cast(
554
+ JSONData | str,
555
+ cli_io.resolve_cli_payload(
556
+ source,
557
+ format_hint=format_hint,
558
+ format_explicit=format_explicit,
559
+ ),
560
+ )
602
561
 
603
- list_flag = getattr(args, 'list', False) or getattr(args, 'jobs', False)
604
- run_target = (
605
- getattr(args, 'run', None)
606
- or getattr(args, 'job', None)
607
- or getattr(args, 'pipeline', None)
562
+ operations_payload = cli_io.resolve_cli_payload(
563
+ operations,
564
+ format_hint=None,
565
+ format_explicit=format_explicit,
608
566
  )
567
+ if not isinstance(operations_payload, dict):
568
+ raise ValueError('operations must resolve to a mapping of transforms')
609
569
 
610
- if list_flag and not run_target:
611
- print_json({'jobs': _pipeline_summary(cfg)['jobs']})
612
- return 0
570
+ data = transform(payload, cast(TransformOperations, operations_payload))
613
571
 
614
- if run_target:
615
- result = run(job=run_target, config_path=args.config)
616
- print_json({'status': 'ok', 'result': result})
572
+ if target and target != '-':
573
+ File.write_file(target, data, file_format=target_format)
574
+ print(f'Data transformed and saved to {target}')
617
575
  return 0
618
576
 
619
- print_json(_pipeline_summary(cfg))
577
+ cli_io.emit_json(data, pretty=pretty)
620
578
  return 0
621
579
 
622
580
 
623
- def cmd_list(args: argparse.Namespace) -> int:
581
+ def validate_handler(
582
+ *,
583
+ source: str,
584
+ rules: JSONData | str,
585
+ source_format: str | None = None,
586
+ target: str | None = None,
587
+ format_explicit: bool = False,
588
+ pretty: bool = True,
589
+ ) -> int:
624
590
  """
625
- Print requested pipeline sections from a YAML configuration.
591
+ Validate data from a source.
626
592
 
627
593
  Parameters
628
594
  ----------
629
- args : argparse.Namespace
630
- Parsed command-line arguments.
595
+ source : str
596
+ The source payload (e.g., path, inline data).
597
+ rules : JSONData | str
598
+ The validation rules (inline JSON or path).
599
+ source_format : str | None, optional
600
+ An optional source format hint (e.g., 'json', 'csv'). Default is
601
+ ``None``.
602
+ target : str | None, optional
603
+ The target destination (e.g., path). Default is ``None``.
604
+ format_explicit : bool, optional
605
+ Whether the format hint was explicitly provided. Default is ``False``.
606
+ pretty : bool, optional
607
+ Whether to pretty-print output. Default is ``True``.
631
608
 
632
609
  Returns
633
610
  -------
634
611
  int
635
612
  Zero on success.
636
- """
637
- cfg = load_pipeline_config(args.config, substitute=True)
638
- print_json(_list_sections(cfg, args))
639
- return 0
640
-
641
613
 
642
- def cmd_run(args: argparse.Namespace) -> int:
614
+ Raises
615
+ ------
616
+ ValueError
617
+ If the rules payload is not a mapping.
643
618
  """
644
- Execute an ETL job end-to-end from a pipeline YAML configuration.
619
+ format_hint: str | None = source_format
620
+ payload = cast(
621
+ JSONData | str,
622
+ cli_io.resolve_cli_payload(
623
+ source,
624
+ format_hint=format_hint,
625
+ format_explicit=format_explicit,
626
+ ),
627
+ )
645
628
 
646
- Parameters
647
- ----------
648
- args : argparse.Namespace
649
- Parsed command-line arguments.
629
+ rules_payload = cli_io.resolve_cli_payload(
630
+ rules,
631
+ format_hint=None,
632
+ format_explicit=format_explicit,
633
+ )
634
+ if not isinstance(rules_payload, dict):
635
+ raise ValueError('rules must resolve to a mapping of field rules')
650
636
 
651
- Returns
652
- -------
653
- int
654
- Zero on success.
655
- """
656
- cfg = load_pipeline_config(args.config, substitute=True)
637
+ field_rules = cast(Mapping[str, FieldRules], rules_payload)
638
+ result = validate(payload, field_rules)
657
639
 
658
- job_name = getattr(args, 'job', None) or getattr(args, 'pipeline', None)
659
- if job_name:
660
- result = run(job=job_name, config_path=args.config)
661
- print_json({'status': 'ok', 'result': result})
662
- return 0
640
+ target_path = target
641
+ if target_path:
642
+ validated_data = result.get('data')
643
+ if validated_data is not None:
644
+ cli_io.write_json_output(
645
+ validated_data,
646
+ target_path,
647
+ success_message='Validation result saved to',
648
+ )
649
+ else:
650
+ print(
651
+ f'Validation failed, no data to save for {target_path}',
652
+ file=sys.stderr,
653
+ )
654
+ else:
655
+ cli_io.emit_json(result, pretty=pretty)
663
656
 
664
- print_json(_pipeline_summary(cfg))
665
657
  return 0