etlplus 0.4.1__py3-none-any.whl → 0.4.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
etlplus/cli/handlers.py CHANGED
@@ -14,7 +14,6 @@ import os
14
14
  import sys
15
15
  from pathlib import Path
16
16
  from typing import Any
17
- from typing import Literal
18
17
  from typing import cast
19
18
 
20
19
  from ..config import PipelineConfig
@@ -45,53 +44,9 @@ __all__ = [
45
44
  ]
46
45
 
47
46
 
48
- # SECTION: INTERNAL CONSTANTS =============================================== #
49
-
50
-
51
- # Standard output/error format behavior states
52
- _FORMAT_ERROR_STATES = {'error', 'fail', 'strict'}
53
- _FORMAT_SILENT_STATES = {'ignore', 'silent'}
54
-
55
-
56
- # SECTION: CONSTANTS ======================================================== #
57
-
58
-
59
- FORMAT_ENV_KEY = 'ETLPLUS_FORMAT_BEHAVIOR'
60
-
61
-
62
47
  # SECTION: INTERNAL FUNCTIONS =============================================== #
63
48
 
64
49
 
65
- def _emit_behavioral_notice(
66
- message: str,
67
- behavior: str,
68
- *,
69
- quiet: bool,
70
- ) -> None:
71
- """
72
- Emit or raise format-behavior notices.
73
-
74
- Parameters
75
- ----------
76
- message : str
77
- Warning message describing the ignored ``--format`` flag.
78
- behavior : str
79
- Effective format-behavior mode derived from CLI options and env.
80
- quiet : bool
81
- Whether non-essential warnings should be suppressed.
82
-
83
- Raises
84
- ------
85
- ValueError
86
- If ``behavior`` maps to an error state.
87
- """
88
- if behavior in _FORMAT_ERROR_STATES:
89
- raise ValueError(message)
90
- if behavior in _FORMAT_SILENT_STATES or quiet:
91
- return
92
- print(f'Warning: {message}', file=sys.stderr)
93
-
94
-
95
50
  def _emit_json(
96
51
  data: Any,
97
52
  *,
@@ -120,62 +75,6 @@ def _emit_json(
120
75
  print(dumped)
121
76
 
122
77
 
123
- def _format_behavior(
124
- strict: bool,
125
- ) -> str:
126
- """
127
- Return the effective format-behavior mode.
128
-
129
- Parameters
130
- ----------
131
- strict : bool
132
- Whether to enforce strict format behavior.
133
-
134
- Returns
135
- -------
136
- str
137
- The effective format-behavior mode.
138
- """
139
- if strict:
140
- return 'error'
141
- env_value = os.getenv(FORMAT_ENV_KEY, 'warn')
142
- return (env_value or 'warn').strip().lower()
143
-
144
-
145
- def _handle_format_guard(
146
- *,
147
- io_context: Literal['source', 'target'],
148
- resource_type: str,
149
- format_explicit: bool,
150
- strict: bool,
151
- quiet: bool,
152
- ) -> None:
153
- """
154
- Warn or raise when --format is used alongside file resources.
155
-
156
- Parameters
157
- ----------
158
- io_context : Literal['source', 'target']
159
- Whether this is a source or target resource.
160
- resource_type : str
161
- The type of resource being processed.
162
- format_explicit : bool
163
- Whether the --format option was explicitly provided.
164
- strict : bool
165
- Whether to enforce strict format behavior.
166
- quiet : bool
167
- Whether to suppress warnings.
168
- """
169
- if resource_type != 'file' or not format_explicit:
170
- return
171
- message = (
172
- f'--format is ignored for file {io_context}s; '
173
- 'inferred from filename extension.'
174
- )
175
- behavior = _format_behavior(strict)
176
- _emit_behavioral_notice(message, behavior, quiet=quiet)
177
-
178
-
179
78
  def _infer_payload_format(
180
79
  text: str,
181
80
  ) -> str:
@@ -233,28 +132,75 @@ def _list_sections(
233
132
  return sections
234
133
 
235
134
 
236
- def _materialize_csv_payload(
135
+ def _explicit_cli_format(
136
+ args: argparse.Namespace,
137
+ ) -> str | None:
138
+ """Return the explicit CLI format hint when provided."""
139
+
140
+ if not getattr(args, '_format_explicit', False):
141
+ return None
142
+ for attr in ('format', 'target_format', 'source_format'):
143
+ value = getattr(args, attr, None)
144
+ if value is None:
145
+ continue
146
+ normalized = value.strip().lower()
147
+ if normalized:
148
+ return normalized
149
+ return None
150
+
151
+
152
+ def _materialize_file_payload(
237
153
  source: object,
238
- ) -> JSONData | str:
154
+ *,
155
+ format_hint: str | None,
156
+ format_explicit: bool,
157
+ ) -> JSONData | object:
239
158
  """
240
- Return parsed CSV rows when ``source`` points at a CSV file.
159
+ Return structured payloads when ``source`` references a file.
241
160
 
242
161
  Parameters
243
162
  ----------
244
163
  source : object
245
- The source of data.
164
+ Input source of data, possibly a file path.
165
+ format_hint : str | None
166
+ Explicit format hint: 'json', 'csv', or None to infer.
167
+ format_explicit : bool
168
+ Whether an explicit format hint was provided.
246
169
 
247
170
  Returns
248
171
  -------
249
- JSONData | str
250
- Parsed CSV rows or the original source if not a CSV file.
172
+ JSONData | object
173
+ Parsed JSON data when ``source`` is a file; otherwise the original
174
+ ``source`` object.
251
175
  """
252
- if not isinstance(source, str):
176
+ if isinstance(source, (dict, list)):
253
177
  return cast(JSONData, source)
178
+ if not isinstance(source, (str, os.PathLike)):
179
+ return source
180
+
254
181
  path = Path(source)
255
- if path.suffix.lower() != '.csv' or not path.is_file():
182
+
183
+ normalized_hint = (format_hint or '').strip().lower()
184
+ fmt: FileFormat | None = None
185
+
186
+ if format_explicit and normalized_hint:
187
+ try:
188
+ fmt = FileFormat(normalized_hint)
189
+ except ValueError:
190
+ fmt = None
191
+ elif not format_explicit:
192
+ suffix = path.suffix.lower().lstrip('.')
193
+ if suffix:
194
+ try:
195
+ fmt = FileFormat(suffix)
196
+ except ValueError:
197
+ fmt = None
198
+
199
+ if fmt is None:
256
200
  return source
257
- return _read_csv_rows(path)
201
+ if fmt == FileFormat.CSV:
202
+ return _read_csv_rows(path)
203
+ return File(path, fmt).read()
258
204
 
259
205
 
260
206
  def _parse_text_payload(
@@ -365,6 +311,50 @@ def _read_stdin_text() -> str:
365
311
  return sys.stdin.read()
366
312
 
367
313
 
314
+ def _resolve_cli_payload(
315
+ source: object,
316
+ *,
317
+ format_hint: str | None,
318
+ format_explicit: bool,
319
+ hydrate_files: bool = True,
320
+ ) -> JSONData | object:
321
+ """
322
+ Normalize CLI-provided payloads, honoring stdin and inline data.
323
+
324
+ Parameters
325
+ ----------
326
+ source : object
327
+ Raw CLI value (path, inline payload, or ``'-'`` for stdin).
328
+ format_hint : str | None
329
+ Explicit format hint supplied by the CLI option.
330
+ format_explicit : bool
331
+ Flag indicating whether the format hint was explicitly provided.
332
+ hydrate_files : bool, optional
333
+ When ``True`` (default) materialize file paths into structured data.
334
+ When ``False``, keep the original path so downstream code can stream
335
+ from disk directly.
336
+
337
+ Returns
338
+ -------
339
+ JSONData | object
340
+ Parsed payload or the original source value when hydration is
341
+ disabled.
342
+ """
343
+
344
+ if isinstance(source, (os.PathLike, str)) and str(source) == '-':
345
+ text = _read_stdin_text()
346
+ return _parse_text_payload(text, format_hint)
347
+
348
+ if not hydrate_files:
349
+ return source
350
+
351
+ return _materialize_file_payload(
352
+ source,
353
+ format_hint=format_hint,
354
+ format_explicit=format_explicit,
355
+ )
356
+
357
+
368
358
  def _write_json_output(
369
359
  data: Any,
370
360
  output_path: str | None,
@@ -414,39 +404,28 @@ def cmd_extract(
414
404
  int
415
405
  Zero on success.
416
406
  """
417
- pretty, quiet = _presentation_flags(args)
418
-
419
- _handle_format_guard(
420
- io_context='source',
421
- resource_type=args.source_type,
422
- format_explicit=getattr(args, '_format_explicit', False),
423
- strict=getattr(args, 'strict_format', False),
424
- quiet=quiet,
425
- )
407
+ pretty, _ = _presentation_flags(args)
408
+ explicit_format = _explicit_cli_format(args)
426
409
 
427
410
  if args.source == '-':
428
411
  text = _read_stdin_text()
429
412
  payload = _parse_text_payload(text, getattr(args, 'format', None))
430
- if not _write_json_output(
431
- payload,
432
- getattr(args, 'output', None),
433
- success_message='Data extracted and saved to',
434
- ):
435
- _emit_json(payload, pretty=pretty)
413
+ _emit_json(payload, pretty=pretty)
414
+
436
415
  return 0
437
416
 
438
- if args.source_type == 'file':
439
- result = extract(args.source_type, args.source)
440
- else:
441
- result = extract(
442
- args.source_type,
443
- args.source,
444
- file_format=getattr(args, 'format', None),
445
- )
417
+ result = extract(
418
+ args.source_type,
419
+ args.source,
420
+ file_format=explicit_format,
421
+ )
422
+ output_path = getattr(args, 'target', None)
423
+ if output_path is None:
424
+ output_path = getattr(args, 'output', None)
446
425
 
447
426
  if not _write_json_output(
448
427
  result,
449
- getattr(args, 'output', None),
428
+ output_path,
450
429
  success_message='Data extracted and saved to',
451
430
  ):
452
431
  _emit_json(result, pretty=pretty)
@@ -470,30 +449,31 @@ def cmd_validate(
470
449
  int
471
450
  Zero on success.
472
451
  """
473
- pretty, _ = _presentation_flags(args)
474
-
475
- if args.source == '-':
476
- text = _read_stdin_text()
477
- payload = _parse_text_payload(
478
- text,
479
- getattr(args, 'input_format', None),
480
- )
481
- else:
482
- payload = _materialize_csv_payload(args.source)
452
+ pretty, _quiet = _presentation_flags(args)
453
+ format_explicit: bool = getattr(args, '_format_explicit', False)
454
+ format_hint: str | None = getattr(args, 'source_format', None)
455
+ payload = cast(
456
+ JSONData | str,
457
+ _resolve_cli_payload(
458
+ args.source,
459
+ format_hint=format_hint,
460
+ format_explicit=format_explicit,
461
+ ),
462
+ )
483
463
  result = validate(payload, args.rules)
484
464
 
485
- output_path = getattr(args, 'output', None)
486
- if output_path:
465
+ target_path = getattr(args, 'target', None)
466
+ if target_path:
487
467
  validated_data = result.get('data')
488
468
  if validated_data is not None:
489
469
  _write_json_output(
490
470
  validated_data,
491
- output_path,
471
+ target_path,
492
472
  success_message='Validation result saved to',
493
473
  )
494
474
  else:
495
475
  print(
496
- f'Validation failed, no data to save for {output_path}',
476
+ f'Validation failed, no data to save for {target_path}',
497
477
  file=sys.stderr,
498
478
  )
499
479
  else:
@@ -518,22 +498,24 @@ def cmd_transform(
518
498
  int
519
499
  Zero on success.
520
500
  """
521
- pretty, _ = _presentation_flags(args)
501
+ pretty, _quiet = _presentation_flags(args)
502
+ format_hint: str | None = getattr(args, 'source_format', None)
503
+ format_explicit: bool = format_hint is not None
522
504
 
523
- if args.source == '-':
524
- text = _read_stdin_text()
525
- payload = _parse_text_payload(
526
- text,
527
- getattr(args, 'input_format', None),
528
- )
529
- else:
530
- payload = _materialize_csv_payload(args.source)
505
+ payload = cast(
506
+ JSONData | str,
507
+ _resolve_cli_payload(
508
+ args.source,
509
+ format_hint=format_hint,
510
+ format_explicit=format_explicit,
511
+ ),
512
+ )
531
513
 
532
514
  data = transform(payload, args.operations)
533
515
 
534
516
  if not _write_json_output(
535
517
  data,
536
- getattr(args, 'output', None),
518
+ getattr(args, 'target', None),
537
519
  success_message='Data transformed and saved to',
538
520
  ):
539
521
  _emit_json(data, pretty=pretty)
@@ -557,52 +539,43 @@ def cmd_load(
557
539
  int
558
540
  Zero on success.
559
541
  """
560
- pretty, quiet = _presentation_flags(args)
561
-
562
- _handle_format_guard(
563
- io_context='target',
564
- resource_type=args.target_type,
565
- format_explicit=getattr(args, '_format_explicit', False),
566
- strict=getattr(args, 'strict_format', False),
567
- quiet=quiet,
568
- )
542
+ pretty, _ = _presentation_flags(args)
543
+ explicit_format = _explicit_cli_format(args)
569
544
 
570
545
  # Allow piping into load.
571
- source_value: (
572
- str | Path | os.PathLike[str] | dict[str, Any] | list[dict[str, Any]]
546
+ source_format = getattr(args, 'source_format', None)
547
+ source_value = cast(
548
+ str | Path | os.PathLike[str] | dict[str, Any] | list[dict[str, Any]],
549
+ _resolve_cli_payload(
550
+ args.source,
551
+ format_hint=source_format,
552
+ format_explicit=source_format is not None,
553
+ hydrate_files=False,
554
+ ),
573
555
  )
574
- if args.source == '-':
575
- text = _read_stdin_text()
576
- source_value = cast(
577
- str | dict[str, Any] | list[dict[str, Any]],
578
- _parse_text_payload(
579
- text,
580
- getattr(args, 'input_format', None),
581
- ),
582
- )
583
- else:
584
- source_value = args.source
585
556
 
586
557
  # Allow piping out of load for file targets.
587
558
  if args.target_type == 'file' and args.target == '-':
588
- payload = _materialize_csv_payload(source_value)
559
+ payload = _materialize_file_payload(
560
+ source_value,
561
+ format_hint=source_format,
562
+ format_explicit=source_format is not None,
563
+ )
589
564
  _emit_json(payload, pretty=pretty)
590
565
  return 0
591
566
 
592
- if args.target_type == 'file':
593
- result = load(source_value, args.target_type, args.target)
594
- else:
595
- result = load(
596
- source_value,
597
- args.target_type,
598
- args.target,
599
- file_format=getattr(args, 'format', None),
600
- )
567
+ result = load(
568
+ source_value,
569
+ args.target_type,
570
+ args.target,
571
+ file_format=explicit_format,
572
+ )
601
573
 
574
+ output_path = getattr(args, 'output', None)
602
575
  if not _write_json_output(
603
576
  result,
604
- getattr(args, 'output', None),
605
- success_message='Data loaded and saved to',
577
+ output_path,
578
+ success_message='Load result saved to',
606
579
  ):
607
580
  _emit_json(result, pretty=pretty)
608
581
 
@@ -627,13 +600,19 @@ def cmd_pipeline(
627
600
  """
628
601
  cfg = load_pipeline_config(args.config, substitute=True)
629
602
 
630
- if getattr(args, 'list', False) and not getattr(args, 'run', None):
603
+ list_flag = getattr(args, 'list', False) or getattr(args, 'jobs', False)
604
+ run_target = (
605
+ getattr(args, 'run', None)
606
+ or getattr(args, 'job', None)
607
+ or getattr(args, 'pipeline', None)
608
+ )
609
+
610
+ if list_flag and not run_target:
631
611
  print_json({'jobs': _pipeline_summary(cfg)['jobs']})
632
612
  return 0
633
613
 
634
- run_job = getattr(args, 'run', None)
635
- if run_job:
636
- result = run(job=run_job, config_path=args.config)
614
+ if run_target:
615
+ result = run(job=run_target, config_path=args.config)
637
616
  print_json({'status': 'ok', 'result': result})
638
617
  return 0
639
618