etlplus 0.7.2__py3-none-any.whl → 0.8.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
etlplus/cli/io.py ADDED
@@ -0,0 +1,343 @@
1
+ """
2
+ :mod:`etlplus.cli.io` module.
3
+
4
+ Shared I/O helpers for CLI handlers (stdin/stdout, payload hydration).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import argparse
10
+ import csv
11
+ import io as _io
12
+ import json
13
+ import os
14
+ import sys
15
+ from pathlib import Path
16
+ from typing import Any
17
+ from typing import cast
18
+
19
+ from ..enums import FileFormat
20
+ from ..file import File
21
+ from ..types import JSONData
22
+ from ..utils import json_type
23
+ from ..utils import print_json
24
+
25
+ # SECTION: EXPORTS ========================================================== #
26
+
27
+
28
+ __all__ = [
29
+ # Functions
30
+ 'emit_json',
31
+ 'emit_or_write',
32
+ 'explicit_cli_format',
33
+ 'infer_payload_format',
34
+ 'materialize_file_payload',
35
+ 'parse_text_payload',
36
+ 'presentation_flags',
37
+ 'read_csv_rows',
38
+ 'read_stdin_text',
39
+ 'resolve_cli_payload',
40
+ 'write_json_output',
41
+ ]
42
+
43
+
44
+ # SECTION: FUNCTIONS ======================================================== #
45
+
46
+
47
+ def emit_json(
48
+ data: Any,
49
+ *,
50
+ pretty: bool,
51
+ ) -> None:
52
+ """
53
+ Emit JSON honoring pretty/compact preference.
54
+
55
+ Parameters
56
+ ----------
57
+ data : Any
58
+ Data to serialize as JSON.
59
+ pretty : bool
60
+ Whether to pretty-print JSON output.
61
+ """
62
+ if pretty:
63
+ print_json(data)
64
+ return
65
+ dumped = json.dumps(data, ensure_ascii=False, separators=(',', ':'))
66
+ print(dumped)
67
+
68
+
69
+ def emit_or_write(
70
+ data: Any,
71
+ output_path: str | None,
72
+ *,
73
+ pretty: bool,
74
+ success_message: str,
75
+ ) -> None:
76
+ """
77
+ Emit JSON or persist to disk based on ``output_path``.
78
+
79
+ Parameters
80
+ ----------
81
+ data : Any
82
+ The data to serialize.
83
+ output_path : str | None
84
+ Target file path; when falsy or ``'-'`` data is emitted to stdout.
85
+ pretty : bool
86
+ Whether to pretty-print JSON emission.
87
+ success_message : str
88
+ Message printed when writing to disk succeeds.
89
+ """
90
+ if write_json_output(
91
+ data,
92
+ output_path,
93
+ success_message=success_message,
94
+ ):
95
+ return
96
+ emit_json(data, pretty=pretty)
97
+
98
+
99
+ def explicit_cli_format(
100
+ args: argparse.Namespace,
101
+ ) -> str | None:
102
+ """
103
+ Return explicit format hint when provided on CLI.
104
+
105
+ Parameters
106
+ ----------
107
+ args : argparse.Namespace
108
+ The argparse namespace containing CLI arguments.
109
+
110
+ Returns
111
+ -------
112
+ str | None
113
+ The explicit format hint if provided, otherwise None.
114
+ """
115
+ if not getattr(args, '_format_explicit', False):
116
+ return None
117
+ for attr in ('format', 'target_format', 'source_format'):
118
+ value = getattr(args, attr, None)
119
+ if value is None:
120
+ continue
121
+ normalized = str(value).strip().lower()
122
+ if normalized:
123
+ return normalized
124
+ return None
125
+
126
+
127
+ def infer_payload_format(
128
+ text: str,
129
+ ) -> str:
130
+ """
131
+ Infer JSON vs CSV from payload text.
132
+
133
+ Parameters
134
+ ----------
135
+ text : str
136
+ The payload text to analyze.
137
+
138
+ Returns
139
+ -------
140
+ str
141
+ The inferred format: either 'json' or 'csv'.
142
+ """
143
+ stripped = text.lstrip()
144
+ if stripped.startswith('{') or stripped.startswith('['):
145
+ return 'json'
146
+ return 'csv'
147
+
148
+
149
+ def materialize_file_payload(
150
+ source: object,
151
+ *,
152
+ format_hint: str | None,
153
+ format_explicit: bool,
154
+ ) -> JSONData | object:
155
+ """
156
+ Return structured payloads when ``source`` references a file.
157
+
158
+ Parameters
159
+ ----------
160
+ source : object
161
+ The source payload, potentially a file path.
162
+ format_hint : str | None
163
+ An optional format hint (e.g., 'json', 'csv').
164
+ format_explicit : bool
165
+ Whether the format hint was explicitly provided.
166
+
167
+ Returns
168
+ -------
169
+ JSONData | object
170
+ The materialized payload if a file was read, otherwise the original
171
+ source.
172
+ """
173
+ if isinstance(source, (dict, list)):
174
+ return cast(JSONData, source)
175
+ if not isinstance(source, (str, os.PathLike)):
176
+ return source
177
+
178
+ path = Path(source)
179
+
180
+ normalized_hint = (format_hint or '').strip().lower()
181
+ fmt: FileFormat | None = None
182
+
183
+ if format_explicit and normalized_hint:
184
+ try:
185
+ fmt = FileFormat(normalized_hint)
186
+ except ValueError:
187
+ fmt = None
188
+ elif not format_explicit:
189
+ suffix = path.suffix.lower().lstrip('.')
190
+ if suffix:
191
+ try:
192
+ fmt = FileFormat(suffix)
193
+ except ValueError:
194
+ fmt = None
195
+
196
+ if fmt is None:
197
+ return source
198
+ if fmt == FileFormat.CSV:
199
+ return read_csv_rows(path)
200
+ return File(path, fmt).read()
201
+
202
+
203
+ def parse_text_payload(
204
+ text: str,
205
+ fmt: str | None,
206
+ ) -> JSONData | str:
207
+ """
208
+ Parse JSON/CSV text into a Python payload.
209
+
210
+ Parameters
211
+ ----------
212
+ text : str
213
+ The text payload to parse.
214
+ fmt : str | None
215
+ An optional format hint (e.g., 'json', 'csv').
216
+
217
+ Returns
218
+ -------
219
+ JSONData | str
220
+ The parsed payload as JSON data or raw text.
221
+ """
222
+ effective = (fmt or '').strip().lower() or infer_payload_format(text)
223
+ if effective == 'json':
224
+ return cast(JSONData, json_type(text))
225
+ if effective == 'csv':
226
+ reader = csv.DictReader(_io.StringIO(text))
227
+ return [dict(row) for row in reader]
228
+ return text
229
+
230
+
231
+ def presentation_flags(
232
+ args: argparse.Namespace,
233
+ ) -> tuple[bool, bool]:
234
+ """
235
+ Return (pretty, quiet) toggles with safe defaults.
236
+
237
+ Parameters
238
+ ----------
239
+ args : argparse.Namespace
240
+ The argparse namespace containing CLI arguments.
241
+
242
+ Returns
243
+ -------
244
+ tuple[bool, bool]
245
+ A tuple containing the pretty and quiet flags.
246
+ """
247
+ return getattr(args, 'pretty', True), getattr(args, 'quiet', False)
248
+
249
+
250
+ def read_csv_rows(
251
+ path: Path,
252
+ ) -> list[dict[str, str]]:
253
+ """
254
+ Read CSV rows into dictionaries.
255
+
256
+ Parameters
257
+ ----------
258
+ path : Path
259
+ The path to the CSV file.
260
+
261
+ Returns
262
+ -------
263
+ list[dict[str, str]]
264
+ The list of CSV rows as dictionaries.
265
+ """
266
+ with path.open(newline='', encoding='utf-8') as handle:
267
+ reader = csv.DictReader(handle)
268
+ return [dict(row) for row in reader]
269
+
270
+
271
+ def read_stdin_text() -> str:
272
+ """Return entire stdin payload."""
273
+ return sys.stdin.read()
274
+
275
+
276
+ def resolve_cli_payload(
277
+ source: object,
278
+ *,
279
+ format_hint: str | None,
280
+ format_explicit: bool,
281
+ hydrate_files: bool = True,
282
+ ) -> JSONData | object:
283
+ """
284
+ Normalize CLI-provided payloads, honoring stdin and inline data.
285
+
286
+ Parameters
287
+ ----------
288
+ source : object
289
+ The source payload, potentially stdin or a file path.
290
+ format_hint : str | None
291
+ An optional format hint (e.g., 'json', 'csv').
292
+ format_explicit : bool
293
+ Whether the format hint was explicitly provided.
294
+ hydrate_files : bool, optional
295
+ Whether to materialize file-based payloads. Default is True.
296
+
297
+ Returns
298
+ -------
299
+ JSONData | object
300
+ The resolved payload.
301
+ """
302
+ if isinstance(source, (os.PathLike, str)) and str(source) == '-':
303
+ text = read_stdin_text()
304
+ return parse_text_payload(text, format_hint)
305
+
306
+ if not hydrate_files:
307
+ return source
308
+
309
+ return materialize_file_payload(
310
+ source,
311
+ format_hint=format_hint,
312
+ format_explicit=format_explicit,
313
+ )
314
+
315
+
316
+ def write_json_output(
317
+ data: Any,
318
+ output_path: str | None,
319
+ *,
320
+ success_message: str,
321
+ ) -> bool:
322
+ """
323
+ Persist JSON data to disk when output path provided.
324
+
325
+ Parameters
326
+ ----------
327
+ data : Any
328
+ The data to serialize as JSON.
329
+ output_path : str | None
330
+ The output file path, or None/'-' to skip writing.
331
+ success_message : str
332
+ The message to print upon successful write.
333
+
334
+ Returns
335
+ -------
336
+ bool
337
+ True if data was written to disk; False if not.
338
+ """
339
+ if not output_path or output_path == '-':
340
+ return False
341
+ File(Path(output_path), FileFormat.JSON).write_json(data)
342
+ print(f'{success_message} {output_path}')
343
+ return True
etlplus/cli/main.py CHANGED
@@ -12,26 +12,21 @@ from __future__ import annotations
12
12
  import argparse
13
13
  import contextlib
14
14
  import sys
15
- from collections.abc import Sequence
16
- from typing import Literal
17
15
 
18
16
  import click
19
17
  import typer
20
18
 
21
19
  from .. import __version__
22
- from ..enums import DataConnectorType
23
- from ..enums import FileFormat
24
20
  from ..utils import json_type
25
- from .app import PROJECT_URL
26
- from .app import app
27
- from .handlers import check_handler
28
- from .handlers import extract_handler
29
- from .handlers import load_handler
30
- from .handlers import pipeline_handler
31
- from .handlers import render_handler
32
- from .handlers import run_handler
33
- from .handlers import transform_handler
34
- from .handlers import validate_handler
21
+ from . import handlers
22
+ from .commands import app
23
+ from .constants import CLI_DESCRIPTION
24
+ from .constants import CLI_EPILOG
25
+ from .constants import DATA_CONNECTORS
26
+ from .constants import FILE_FORMATS
27
+ from .constants import PROJECT_URL
28
+ from .options import add_argparse_format_options
29
+ from .types import DataConnectorContext
35
30
 
36
31
  # SECTION: EXPORTS ========================================================== #
37
32
 
@@ -43,31 +38,6 @@ __all__ = [
43
38
  ]
44
39
 
45
40
 
46
- # SECTION: TYPE ALIASES ===================================================== #
47
-
48
-
49
- type FormatContext = Literal['source', 'target']
50
-
51
-
52
- # SECTION: INTERNAL CLASSES ================================================= #
53
-
54
-
55
- class _FormatAction(argparse.Action):
56
- """
57
- Argparse action that records when ``--source-format`` or
58
- ``--target-format`` is provided."""
59
-
60
- def __call__(
61
- self,
62
- parser: argparse.ArgumentParser,
63
- namespace: argparse.Namespace,
64
- values: str | Sequence[object] | None,
65
- option_string: str | None = None,
66
- ) -> None: # pragma: no cover
67
- setattr(namespace, self.dest, values)
68
- namespace._format_explicit = True
69
-
70
-
71
41
  # SECTION: INTERNAL FUNCTIONS =============================================== #
72
42
 
73
43
 
@@ -122,7 +92,7 @@ def _add_config_option(
122
92
  def _add_format_options(
123
93
  parser: argparse.ArgumentParser,
124
94
  *,
125
- context: FormatContext,
95
+ context: DataConnectorContext,
126
96
  ) -> None:
127
97
  """
128
98
  Attach shared ``--source-format`` or ``--target-format`` options to
@@ -132,63 +102,11 @@ def _add_format_options(
132
102
  ----------
133
103
  parser : argparse.ArgumentParser
134
104
  Parser to augment.
135
- context : FormatContext
105
+ context : DataConnectorContext
136
106
  Context for the format option: either ``'source'`` or ``'target'``
137
107
  """
138
108
  parser.set_defaults(_format_explicit=False)
139
- parser.add_argument(
140
- '--source-format',
141
- choices=list(FileFormat.choices()),
142
- default='json',
143
- action=_FormatAction,
144
- help=(
145
- f'Format of the {context}. Overrides filename-based inference '
146
- 'when provided.'
147
- ),
148
- )
149
- parser.add_argument(
150
- '--target-format',
151
- choices=list(FileFormat.choices()),
152
- default='json',
153
- action=_FormatAction,
154
- help=(
155
- f'Format of the {context}. Overrides filename-based inference '
156
- 'when provided.'
157
- ),
158
- )
159
-
160
-
161
- def _cli_description() -> str:
162
- return '\n'.join(
163
- [
164
- 'ETLPlus - A Swiss Army knife for simple ETL operations.',
165
- '',
166
- ' Provide a subcommand and options. Examples:',
167
- '',
168
- ' etlplus extract file in.csv > out.json',
169
- ' etlplus validate in.json --rules \'{"required": ["id"]}\'',
170
- (
171
- ' etlplus transform --from file in.csv --operations '
172
- '\'{"select": ["id"]}\' --to file -o out.json'
173
- ),
174
- ' etlplus extract in.csv | etlplus load --to file out.json',
175
- '',
176
- ' Override format inference when extensions are misleading:',
177
- '',
178
- ' etlplus extract data.txt --source-format csv',
179
- ' etlplus load payload.bin --target-format json',
180
- ],
181
- )
182
-
183
-
184
- def _cli_epilog() -> str:
185
- return '\n'.join(
186
- [
187
- 'Tip:',
188
- ' --source-format and --target-format override format '
189
- 'inference based on filename extensions when needed.',
190
- ],
191
- )
109
+ add_argparse_format_options(parser, context=context)
192
110
 
193
111
 
194
112
  def _emit_context_help(
@@ -293,8 +211,8 @@ def create_parser() -> argparse.ArgumentParser:
293
211
 
294
212
  parser = argparse.ArgumentParser(
295
213
  prog='etlplus',
296
- description=_cli_description(),
297
- epilog=_cli_epilog(),
214
+ description=CLI_DESCRIPTION,
215
+ epilog=CLI_EPILOG,
298
216
  formatter_class=argparse.RawDescriptionHelpFormatter,
299
217
  )
300
218
 
@@ -305,6 +223,25 @@ def create_parser() -> argparse.ArgumentParser:
305
223
  version=f'%(prog)s {__version__}',
306
224
  )
307
225
 
226
+ parser.add_argument(
227
+ '--pretty',
228
+ action=argparse.BooleanOptionalAction,
229
+ default=True,
230
+ help='Pretty-print JSON output (default: pretty).',
231
+ )
232
+ parser.add_argument(
233
+ '--quiet',
234
+ action=argparse.BooleanOptionalAction,
235
+ default=False,
236
+ help='Suppress warnings and non-essential output.',
237
+ )
238
+ parser.add_argument(
239
+ '--verbose',
240
+ action=argparse.BooleanOptionalAction,
241
+ default=False,
242
+ help='Emit extra diagnostics to stderr.',
243
+ )
244
+
308
245
  subparsers = parser.add_subparsers(
309
246
  dest='command',
310
247
  help='Available commands',
@@ -318,7 +255,7 @@ def create_parser() -> argparse.ArgumentParser:
318
255
  )
319
256
  extract_parser.add_argument(
320
257
  'source_type',
321
- choices=list(DataConnectorType.choices()),
258
+ choices=sorted(DATA_CONNECTORS),
322
259
  help='Type of source to extract from',
323
260
  )
324
261
  extract_parser.add_argument(
@@ -329,7 +266,7 @@ def create_parser() -> argparse.ArgumentParser:
329
266
  ),
330
267
  )
331
268
  _add_format_options(extract_parser, context='source')
332
- extract_parser.set_defaults(func=extract_handler)
269
+ extract_parser.set_defaults(func=handlers.extract_handler)
333
270
 
334
271
  validate_parser = subparsers.add_parser(
335
272
  'validate',
@@ -346,7 +283,7 @@ def create_parser() -> argparse.ArgumentParser:
346
283
  default={},
347
284
  help='Validation rules as JSON string',
348
285
  )
349
- validate_parser.set_defaults(func=validate_handler)
286
+ validate_parser.set_defaults(func=handlers.validate_handler)
350
287
 
351
288
  transform_parser = subparsers.add_parser(
352
289
  'transform',
@@ -366,18 +303,18 @@ def create_parser() -> argparse.ArgumentParser:
366
303
  transform_parser.add_argument(
367
304
  '--from',
368
305
  dest='from_',
369
- choices=list(DataConnectorType.choices()),
306
+ choices=sorted(DATA_CONNECTORS),
370
307
  help='Override the inferred source type (file, database, api).',
371
308
  )
372
309
  transform_parser.add_argument(
373
310
  '--to',
374
311
  dest='to',
375
- choices=list(DataConnectorType.choices()),
312
+ choices=sorted(DATA_CONNECTORS),
376
313
  help='Override the inferred target type (file, database, api).',
377
314
  )
378
315
  transform_parser.add_argument(
379
316
  '--source-format',
380
- choices=list(FileFormat.choices()),
317
+ choices=sorted(FILE_FORMATS),
381
318
  dest='source_format',
382
319
  help=(
383
320
  'Input payload format when SOURCE is - or a literal payload. '
@@ -387,14 +324,14 @@ def create_parser() -> argparse.ArgumentParser:
387
324
  transform_parser.add_argument(
388
325
  '--target-format',
389
326
  dest='target_format',
390
- choices=list(FileFormat.choices()),
327
+ choices=sorted(FILE_FORMATS),
391
328
  help=(
392
329
  'Output payload format '
393
330
  'when writing to stdout or non-file targets. '
394
331
  'File targets infer format from the extension.'
395
332
  ),
396
333
  )
397
- transform_parser.set_defaults(func=transform_handler)
334
+ transform_parser.set_defaults(func=handlers.transform_handler)
398
335
 
399
336
  load_parser = subparsers.add_parser(
400
337
  'load',
@@ -407,7 +344,7 @@ def create_parser() -> argparse.ArgumentParser:
407
344
  )
408
345
  load_parser.add_argument(
409
346
  'target_type',
410
- choices=list(DataConnectorType.choices()),
347
+ choices=sorted(DATA_CONNECTORS),
411
348
  help='Type of target to load to',
412
349
  )
413
350
  load_parser.add_argument(
@@ -418,29 +355,7 @@ def create_parser() -> argparse.ArgumentParser:
418
355
  ),
419
356
  )
420
357
  _add_format_options(load_parser, context='target')
421
- load_parser.set_defaults(func=load_handler)
422
-
423
- pipe_parser = subparsers.add_parser(
424
- 'pipeline',
425
- help=(
426
- 'DEPRECATED: use "list" (for summary/jobs) or "run" (to execute); '
427
- 'see '
428
- f'{PROJECT_URL}/blob/main/docs/pipeline-guide.md'
429
- ),
430
- formatter_class=argparse.ArgumentDefaultsHelpFormatter,
431
- )
432
- _add_config_option(pipe_parser)
433
- pipe_parser.add_argument(
434
- '--list',
435
- action='store_true',
436
- help='List available job names and exit',
437
- )
438
- pipe_parser.add_argument(
439
- '--run',
440
- metavar='JOB',
441
- help='Run a specific job by name',
442
- )
443
- pipe_parser.set_defaults(func=pipeline_handler)
358
+ load_parser.set_defaults(func=handlers.load_handler)
444
359
 
445
360
  render_parser = subparsers.add_parser(
446
361
  'render',
@@ -476,7 +391,7 @@ def create_parser() -> argparse.ArgumentParser:
476
391
  'Explicit path to a Jinja template file (overrides template key).'
477
392
  ),
478
393
  )
479
- render_parser.set_defaults(func=render_handler)
394
+ render_parser.set_defaults(func=handlers.render_handler)
480
395
 
481
396
  check_parser = subparsers.add_parser(
482
397
  'check',
@@ -516,7 +431,7 @@ def create_parser() -> argparse.ArgumentParser:
516
431
  name='transforms',
517
432
  help_text='List data transforms',
518
433
  )
519
- check_parser.set_defaults(func=check_handler)
434
+ check_parser.set_defaults(func=handlers.check_handler)
520
435
 
521
436
  run_parser = subparsers.add_parser(
522
437
  'run',
@@ -537,7 +452,7 @@ def create_parser() -> argparse.ArgumentParser:
537
452
  '--pipeline',
538
453
  help='Name of the pipeline to run',
539
454
  )
540
- run_parser.set_defaults(func=run_handler)
455
+ run_parser.set_defaults(func=handlers.run_handler)
541
456
 
542
457
  return parser
543
458