etlplus 0.8.2__py3-none-any.whl → 0.8.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
etlplus/cli/handlers.py CHANGED
@@ -6,11 +6,12 @@ Command handler functions for the ``etlplus`` command-line interface (CLI).
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
- import argparse
10
9
  import os
11
10
  import sys
11
+ from collections.abc import Mapping
12
12
  from pathlib import Path
13
13
  from typing import Any
14
+ from typing import Literal
14
15
  from typing import cast
15
16
 
16
17
  from ..config import PipelineConfig
@@ -18,11 +19,13 @@ from ..config import load_pipeline_config
18
19
  from ..database import load_table_spec
19
20
  from ..database import render_tables
20
21
  from ..extract import extract
22
+ from ..file import File
21
23
  from ..load import load
22
24
  from ..run import run
23
25
  from ..transform import transform
24
26
  from ..types import JSONData
25
27
  from ..types import TemplateKey
28
+ from ..validate import FieldRules
26
29
  from ..validate import validate
27
30
  from . import io as cli_io
28
31
 
@@ -77,7 +80,12 @@ def _collect_table_specs(
77
80
 
78
81
  def _check_sections(
79
82
  cfg: PipelineConfig,
80
- args: argparse.Namespace,
83
+ *,
84
+ jobs: bool,
85
+ pipelines: bool,
86
+ sources: bool,
87
+ targets: bool,
88
+ transforms: bool,
81
89
  ) -> dict[str, Any]:
82
90
  """
83
91
  Build sectioned metadata output for the check command.
@@ -86,8 +94,16 @@ def _check_sections(
86
94
  ----------
87
95
  cfg : PipelineConfig
88
96
  The loaded pipeline configuration.
89
- args : argparse.Namespace
90
- Parsed command-line arguments.
97
+ jobs : bool
98
+ Whether to include job metadata.
99
+ pipelines : bool
100
+ Whether to include pipeline metadata.
101
+ sources : bool
102
+ Whether to include source metadata.
103
+ targets : bool
104
+ Whether to include target metadata.
105
+ transforms : bool
106
+ Whether to include transform metadata.
91
107
 
92
108
  Returns
93
109
  -------
@@ -95,15 +111,15 @@ def _check_sections(
95
111
  Metadata output for the check command.
96
112
  """
97
113
  sections: dict[str, Any] = {}
98
- if getattr(args, 'jobs', False):
114
+ if jobs:
99
115
  sections['jobs'] = _pipeline_summary(cfg)['jobs']
100
- if getattr(args, 'pipelines', False):
116
+ if pipelines:
101
117
  sections['pipelines'] = [cfg.name]
102
- if getattr(args, 'sources', False):
118
+ if sources:
103
119
  sections['sources'] = [src.name for src in cfg.sources]
104
- if getattr(args, 'targets', False):
120
+ if targets:
105
121
  sections['targets'] = [tgt.name for tgt in cfg.targets]
106
- if getattr(args, 'transforms', False):
122
+ if transforms:
107
123
  sections['transforms'] = [
108
124
  getattr(trf, 'name', None) for trf in cfg.transforms
109
125
  ]
@@ -144,67 +160,121 @@ def _pipeline_summary(
144
160
 
145
161
 
146
162
  def check_handler(
147
- args: argparse.Namespace,
163
+ *,
164
+ config: str,
165
+ jobs: bool = False,
166
+ pipelines: bool = False,
167
+ sources: bool = False,
168
+ summary: bool = False,
169
+ targets: bool = False,
170
+ transforms: bool = False,
171
+ substitute: bool = True,
172
+ pretty: bool = True,
148
173
  ) -> int:
149
174
  """
150
175
  Print requested pipeline sections from a YAML configuration.
151
176
 
152
177
  Parameters
153
178
  ----------
154
- args : argparse.Namespace
155
- Parsed command-line arguments.
179
+ config : str
180
+ Path to the pipeline YAML configuration.
181
+ jobs : bool, optional
182
+ Whether to include job metadata. Default is ``False``.
183
+ pipelines : bool, optional
184
+ Whether to include pipeline metadata. Default is ``False``.
185
+ sources : bool, optional
186
+ Whether to include source metadata. Default is ``False``.
187
+ summary : bool, optional
188
+ Whether to print a full summary of the pipeline. Default is ``False``.
189
+ targets : bool, optional
190
+ Whether to include target metadata. Default is ``False``.
191
+ transforms : bool, optional
192
+ Whether to include transform metadata. Default is ``False``.
193
+ substitute : bool, optional
194
+ Whether to perform environment variable substitution. Default is
195
+ ``True``.
196
+ pretty : bool, optional
197
+ Whether to pretty-print output. Default is ``True``.
156
198
 
157
199
  Returns
158
200
  -------
159
201
  int
160
202
  Zero on success.
203
+
161
204
  """
162
- cfg = load_pipeline_config(args.config, substitute=True)
163
- if getattr(args, 'summary', False):
205
+ cfg = load_pipeline_config(config, substitute=substitute)
206
+ if summary:
164
207
  cli_io.emit_json(_pipeline_summary(cfg), pretty=True)
165
208
  return 0
166
209
 
167
- cli_io.emit_json(_check_sections(cfg, args), pretty=True)
210
+ cli_io.emit_json(
211
+ _check_sections(
212
+ cfg,
213
+ jobs=jobs,
214
+ pipelines=pipelines,
215
+ sources=sources,
216
+ targets=targets,
217
+ transforms=transforms,
218
+ ),
219
+ pretty=pretty,
220
+ )
168
221
  return 0
169
222
 
170
223
 
171
224
  def extract_handler(
172
- args: argparse.Namespace,
225
+ *,
226
+ source_type: str,
227
+ source: str,
228
+ format_hint: str | None = None,
229
+ format_explicit: bool = False,
230
+ target: str | None = None,
231
+ output: str | None = None,
232
+ pretty: bool = True,
173
233
  ) -> int:
174
234
  """
175
235
  Extract data from a source.
176
236
 
177
237
  Parameters
178
238
  ----------
179
- args : argparse.Namespace
180
- Parsed command-line arguments.
239
+ source_type : str
240
+ The type of the source (e.g., 'file', 'api', 'database').
241
+ source : str
242
+ The source identifier (e.g., path, URL, DSN).
243
+ format_hint : str | None, optional
244
+ An optional format hint (e.g., 'json', 'csv'). Default is ``None``.
245
+ format_explicit : bool, optional
246
+ Whether the format hint was explicitly provided. Default is ``False``.
247
+ target : str | None, optional
248
+ The target destination (e.g., path, database). Default is ``None``.
249
+ output : str | None, optional
250
+ Path to write output data. Default is ``None``.
251
+ pretty : bool, optional
252
+ Whether to pretty-print output. Default is ``True``.
181
253
 
182
254
  Returns
183
255
  -------
184
256
  int
185
257
  Zero on success.
258
+
186
259
  """
187
- pretty, _ = cli_io.presentation_flags(args)
188
- explicit_format = cli_io.explicit_cli_format(args)
260
+ explicit_format = format_hint if format_explicit else None
189
261
 
190
- if args.source == '-':
262
+ if source == '-':
191
263
  text = cli_io.read_stdin_text()
192
264
  payload = cli_io.parse_text_payload(
193
265
  text,
194
- getattr(args, 'format', None),
266
+ format_hint,
195
267
  )
196
268
  cli_io.emit_json(payload, pretty=pretty)
197
269
 
198
270
  return 0
199
271
 
200
272
  result = extract(
201
- args.source_type,
202
- args.source,
273
+ source_type,
274
+ source,
203
275
  file_format=explicit_format,
204
276
  )
205
- output_path = getattr(args, 'target', None)
206
- if output_path is None:
207
- output_path = getattr(args, 'output', None)
277
+ output_path = target or output
208
278
 
209
279
  cli_io.emit_or_write(
210
280
  result,
@@ -217,30 +287,52 @@ def extract_handler(
217
287
 
218
288
 
219
289
  def load_handler(
220
- args: argparse.Namespace,
290
+ *,
291
+ source: str,
292
+ target_type: str,
293
+ target: str,
294
+ source_format: str | None = None,
295
+ target_format: str | None = None,
296
+ format_explicit: bool = False,
297
+ output: str | None = None,
298
+ pretty: bool = True,
221
299
  ) -> int:
222
300
  """
223
301
  Load data into a target.
224
302
 
225
303
  Parameters
226
304
  ----------
227
- args : argparse.Namespace
228
- Parsed command-line arguments.
305
+ source : str
306
+ The source payload (e.g., path, inline data).
307
+ target_type : str
308
+ The type of the target (e.g., 'file', 'database').
309
+ target : str
310
+ The target destination (e.g., path, DSN).
311
+ source_format : str | None, optional
312
+ An optional source format hint (e.g., 'json', 'csv'). Default is
313
+ ``None``.
314
+ target_format : str | None, optional
315
+ An optional target format hint (e.g., 'json', 'csv'). Default is
316
+ ``None``.
317
+ format_explicit : bool, optional
318
+ Whether the format hint was explicitly provided. Default is ``False``.
319
+ output : str | None, optional
320
+ Path to write output data. Default is ``None``.
321
+ pretty : bool, optional
322
+ Whether to pretty-print output. Default is ``True``.
229
323
 
230
324
  Returns
231
325
  -------
232
326
  int
233
327
  Zero on success.
234
328
  """
235
- pretty, _ = cli_io.presentation_flags(args)
236
- explicit_format = cli_io.explicit_cli_format(args)
329
+ explicit_format = target_format if format_explicit else None
237
330
 
238
331
  # Allow piping into load.
239
- source_format = getattr(args, 'source_format', None)
240
332
  source_value = cast(
241
333
  str | Path | os.PathLike[str] | dict[str, Any] | list[dict[str, Any]],
242
334
  cli_io.resolve_cli_payload(
243
- args.source,
335
+ source,
244
336
  format_hint=source_format,
245
337
  format_explicit=source_format is not None,
246
338
  hydrate_files=False,
@@ -248,7 +340,7 @@ def load_handler(
248
340
  )
249
341
 
250
342
  # Allow piping out of load for file targets.
251
- if args.target_type == 'file' and args.target == '-':
343
+ if target_type == 'file' and target == '-':
252
344
  payload = cli_io.materialize_file_payload(
253
345
  source_value,
254
346
  format_hint=source_format,
@@ -259,12 +351,12 @@ def load_handler(
259
351
 
260
352
  result = load(
261
353
  source_value,
262
- args.target_type,
263
- args.target,
354
+ target_type,
355
+ target,
264
356
  file_format=explicit_format,
265
357
  )
266
358
 
267
- output_path = getattr(args, 'output', None)
359
+ output_path = output
268
360
  cli_io.emit_or_write(
269
361
  result,
270
362
  output_path,
@@ -276,21 +368,53 @@ def load_handler(
276
368
 
277
369
 
278
370
  def render_handler(
279
- args: argparse.Namespace,
371
+ *,
372
+ config: str | None = None,
373
+ spec: str | None = None,
374
+ table: str | None = None,
375
+ template: TemplateKey | None = None,
376
+ template_path: str | None = None,
377
+ output: str | None = None,
378
+ pretty: bool = True,
379
+ quiet: bool = False,
280
380
  ) -> int:
281
- """Render SQL DDL statements from table schema specs."""
282
- _, quiet = cli_io.presentation_flags(args)
381
+ """
382
+ Render SQL DDL statements from table schema specs.
383
+
384
+ Parameters
385
+ ----------
386
+ config : str | None, optional
387
+ Path to a pipeline YAML configuration. Default is ``None``.
388
+ spec : str | None, optional
389
+ Path to a standalone table spec file. Default is ``None``.
390
+ table : str | None, optional
391
+ Table name filter. Default is ``None``.
392
+ template : TemplateKey | None, optional
393
+ The template key to use for rendering. Default is ``None``.
394
+ template_path : str | None, optional
395
+ Path to a custom template file. Default is ``None``.
396
+ output : str | None, optional
397
+ Path to write output SQL. Default is ``None``.
398
+ pretty : bool, optional
399
+ Whether to pretty-print output. Default is ``True``.
400
+ quiet : bool, optional
401
+ Whether to suppress non-error output. Default is ``False``.
283
402
 
284
- template_value: TemplateKey = getattr(args, 'template', 'ddl') or 'ddl'
285
- template_path = getattr(args, 'template_path', None)
286
- table_filter = getattr(args, 'table', None)
287
- spec_path = getattr(args, 'spec', None)
288
- config_path = getattr(args, 'config', None)
403
+ Returns
404
+ -------
405
+ int
406
+ Zero on success.
407
+ """
408
+ template_value: TemplateKey = template or 'ddl'
409
+ template_path_override = template_path
410
+ table_filter = table
411
+ spec_path = spec
412
+ config_path = config
289
413
 
290
414
  # If the provided template points to a file, treat it as a path override.
291
- file_override = template_path
415
+ file_override = template_path_override
292
416
  template_key: TemplateKey | None = template_value
293
- if template_path is None:
417
+ if template_path_override is None:
294
418
  candidate_path = Path(template_value)
295
419
  if candidate_path.exists():
296
420
  file_override = str(candidate_path)
@@ -323,117 +447,197 @@ def render_handler(
323
447
  sql_text = (
324
448
  '\n'.join(chunk.rstrip() for chunk in rendered_chunks).rstrip() + '\n'
325
449
  )
450
+ rendered_output = sql_text if pretty else sql_text.rstrip('\n')
326
451
 
327
- output_path = getattr(args, 'output', None)
452
+ output_path = output
328
453
  if output_path and output_path != '-':
329
- Path(output_path).write_text(sql_text, encoding='utf-8')
454
+ Path(output_path).write_text(rendered_output, encoding='utf-8')
330
455
  if not quiet:
331
456
  print(f'Rendered {len(specs)} schema(s) to {output_path}')
332
457
  return 0
333
458
 
334
- print(sql_text)
459
+ print(rendered_output)
335
460
  return 0
336
461
 
337
462
 
338
463
  def run_handler(
339
- args: argparse.Namespace,
464
+ *,
465
+ config: str,
466
+ job: str | None = None,
467
+ pipeline: str | None = None,
468
+ pretty: bool = True,
340
469
  ) -> int:
341
470
  """
342
471
  Execute an ETL job end-to-end from a pipeline YAML configuration.
343
472
 
344
473
  Parameters
345
474
  ----------
346
- args : argparse.Namespace
347
- Parsed command-line arguments.
475
+ config : str
476
+ Path to the pipeline YAML configuration.
477
+ job : str | None, optional
478
+ Name of the job to run. If not provided, runs the entire pipeline.
479
+ Default is ``None``.
480
+ pipeline : str | None, optional
481
+ Alias for ``job``. Default is ``None``.
482
+ pretty : bool, optional
483
+ Whether to pretty-print output. Default is ``True``.
348
484
 
349
485
  Returns
350
486
  -------
351
487
  int
352
488
  Zero on success.
353
489
  """
354
- cfg = load_pipeline_config(args.config, substitute=True)
490
+ cfg = load_pipeline_config(config, substitute=True)
355
491
 
356
- job_name = getattr(args, 'job', None) or getattr(args, 'pipeline', None)
492
+ job_name = job or pipeline
357
493
  if job_name:
358
- result = run(job=job_name, config_path=args.config)
359
- cli_io.emit_json({'status': 'ok', 'result': result}, pretty=True)
494
+ result = run(job=job_name, config_path=config)
495
+ cli_io.emit_json({'status': 'ok', 'result': result}, pretty=pretty)
360
496
  return 0
361
497
 
362
- cli_io.emit_json(_pipeline_summary(cfg), pretty=True)
498
+ cli_io.emit_json(_pipeline_summary(cfg), pretty=pretty)
363
499
  return 0
364
500
 
365
501
 
502
+ TransformOperations = Mapping[
503
+ Literal['filter', 'map', 'select', 'sort', 'aggregate'],
504
+ Any,
505
+ ]
506
+
507
+
366
508
  def transform_handler(
367
- args: argparse.Namespace,
509
+ *,
510
+ source: str,
511
+ operations: JSONData | str,
512
+ target: str | None = None,
513
+ source_format: str | None = None,
514
+ target_format: str | None = None,
515
+ pretty: bool = True,
516
+ format_explicit: bool = False,
368
517
  ) -> int:
369
518
  """
370
519
  Transform data from a source.
371
520
 
372
521
  Parameters
373
522
  ----------
374
- args : argparse.Namespace
375
- Parsed command-line arguments.
523
+ source : str
524
+ The source payload (e.g., path, inline data).
525
+ operations : JSONData | str
526
+ The transformation operations (inline JSON or path).
527
+ target : str | None, optional
528
+ The target destination (e.g., path). Default is ``None``.
529
+ source_format : str | None, optional
530
+ An optional source format hint (e.g., 'json', 'csv'). Default is
531
+ ``None``.
532
+ target_format : str | None, optional
533
+ An optional target format hint (e.g., 'json', 'csv'). Default is
534
+ ``None``.
535
+ pretty : bool, optional
536
+ Whether to pretty-print output. Default is ``True``.
537
+ format_explicit : bool, optional
538
+ Whether the format hint was explicitly provided. Default is ``False``.
376
539
 
377
540
  Returns
378
541
  -------
379
542
  int
380
543
  Zero on success.
544
+
545
+ Raises
546
+ ------
547
+ ValueError
548
+ If the operations payload is not a mapping.
381
549
  """
382
- pretty, _ = cli_io.presentation_flags(args)
383
- format_hint: str | None = getattr(args, 'source_format', None)
384
- format_explicit: bool = format_hint is not None
550
+ format_hint: str | None = source_format
551
+ format_explicit = format_hint is not None or format_explicit
385
552
 
386
553
  payload = cast(
387
554
  JSONData | str,
388
555
  cli_io.resolve_cli_payload(
389
- args.source,
556
+ source,
390
557
  format_hint=format_hint,
391
558
  format_explicit=format_explicit,
392
559
  ),
393
560
  )
394
561
 
395
- data = transform(payload, args.operations)
396
-
397
- cli_io.emit_or_write(
398
- data,
399
- getattr(args, 'target', None),
400
- pretty=pretty,
401
- success_message='Data transformed and saved to',
562
+ operations_payload = cli_io.resolve_cli_payload(
563
+ operations,
564
+ format_hint=None,
565
+ format_explicit=format_explicit,
402
566
  )
567
+ if not isinstance(operations_payload, dict):
568
+ raise ValueError('operations must resolve to a mapping of transforms')
403
569
 
570
+ data = transform(payload, cast(TransformOperations, operations_payload))
571
+
572
+ if target and target != '-':
573
+ File.write_file(target, data, file_format=target_format)
574
+ print(f'Data transformed and saved to {target}')
575
+ return 0
576
+
577
+ cli_io.emit_json(data, pretty=pretty)
404
578
  return 0
405
579
 
406
580
 
407
581
  def validate_handler(
408
- args: argparse.Namespace,
582
+ *,
583
+ source: str,
584
+ rules: JSONData | str,
585
+ source_format: str | None = None,
586
+ target: str | None = None,
587
+ format_explicit: bool = False,
588
+ pretty: bool = True,
409
589
  ) -> int:
410
590
  """
411
591
  Validate data from a source.
412
592
 
413
593
  Parameters
414
594
  ----------
415
- args : argparse.Namespace
416
- Parsed command-line arguments.
595
+ source : str
596
+ The source payload (e.g., path, inline data).
597
+ rules : JSONData | str
598
+ The validation rules (inline JSON or path).
599
+ source_format : str | None, optional
600
+ An optional source format hint (e.g., 'json', 'csv'). Default is
601
+ ``None``.
602
+ target : str | None, optional
603
+ The target destination (e.g., path). Default is ``None``.
604
+ format_explicit : bool, optional
605
+ Whether the format hint was explicitly provided. Default is ``False``.
606
+ pretty : bool, optional
607
+ Whether to pretty-print output. Default is ``True``.
417
608
 
418
609
  Returns
419
610
  -------
420
611
  int
421
612
  Zero on success.
613
+
614
+ Raises
615
+ ------
616
+ ValueError
617
+ If the rules payload is not a mapping.
422
618
  """
423
- pretty, _ = cli_io.presentation_flags(args)
424
- format_explicit: bool = getattr(args, '_format_explicit', False)
425
- format_hint: str | None = getattr(args, 'source_format', None)
619
+ format_hint: str | None = source_format
426
620
  payload = cast(
427
621
  JSONData | str,
428
622
  cli_io.resolve_cli_payload(
429
- args.source,
623
+ source,
430
624
  format_hint=format_hint,
431
625
  format_explicit=format_explicit,
432
626
  ),
433
627
  )
434
- result = validate(payload, args.rules)
435
628
 
436
- target_path = getattr(args, 'target', None)
629
+ rules_payload = cli_io.resolve_cli_payload(
630
+ rules,
631
+ format_hint=None,
632
+ format_explicit=format_explicit,
633
+ )
634
+ if not isinstance(rules_payload, dict):
635
+ raise ValueError('rules must resolve to a mapping of field rules')
636
+
637
+ field_rules = cast(Mapping[str, FieldRules], rules_payload)
638
+ result = validate(payload, field_rules)
639
+
640
+ target_path = target
437
641
  if target_path:
438
642
  validated_data = result.get('data')
439
643
  if validated_data is not None: