etlplus 0.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. etlplus/__init__.py +43 -0
  2. etlplus/__main__.py +22 -0
  3. etlplus/__version__.py +14 -0
  4. etlplus/api/README.md +237 -0
  5. etlplus/api/__init__.py +136 -0
  6. etlplus/api/auth.py +432 -0
  7. etlplus/api/config.py +633 -0
  8. etlplus/api/endpoint_client.py +885 -0
  9. etlplus/api/errors.py +170 -0
  10. etlplus/api/pagination/__init__.py +47 -0
  11. etlplus/api/pagination/client.py +188 -0
  12. etlplus/api/pagination/config.py +440 -0
  13. etlplus/api/pagination/paginator.py +775 -0
  14. etlplus/api/rate_limiting/__init__.py +38 -0
  15. etlplus/api/rate_limiting/config.py +343 -0
  16. etlplus/api/rate_limiting/rate_limiter.py +266 -0
  17. etlplus/api/request_manager.py +589 -0
  18. etlplus/api/retry_manager.py +430 -0
  19. etlplus/api/transport.py +325 -0
  20. etlplus/api/types.py +172 -0
  21. etlplus/cli/__init__.py +15 -0
  22. etlplus/cli/app.py +1367 -0
  23. etlplus/cli/handlers.py +775 -0
  24. etlplus/cli/main.py +616 -0
  25. etlplus/config/__init__.py +56 -0
  26. etlplus/config/connector.py +372 -0
  27. etlplus/config/jobs.py +311 -0
  28. etlplus/config/pipeline.py +339 -0
  29. etlplus/config/profile.py +78 -0
  30. etlplus/config/types.py +204 -0
  31. etlplus/config/utils.py +120 -0
  32. etlplus/ddl.py +197 -0
  33. etlplus/enums.py +414 -0
  34. etlplus/extract.py +218 -0
  35. etlplus/file.py +657 -0
  36. etlplus/load.py +336 -0
  37. etlplus/mixins.py +62 -0
  38. etlplus/py.typed +0 -0
  39. etlplus/run.py +368 -0
  40. etlplus/run_helpers.py +843 -0
  41. etlplus/templates/__init__.py +5 -0
  42. etlplus/templates/ddl.sql.j2 +128 -0
  43. etlplus/templates/view.sql.j2 +69 -0
  44. etlplus/transform.py +1049 -0
  45. etlplus/types.py +227 -0
  46. etlplus/utils.py +638 -0
  47. etlplus/validate.py +493 -0
  48. etlplus/validation/__init__.py +44 -0
  49. etlplus/validation/utils.py +389 -0
  50. etlplus-0.5.4.dist-info/METADATA +616 -0
  51. etlplus-0.5.4.dist-info/RECORD +55 -0
  52. etlplus-0.5.4.dist-info/WHEEL +5 -0
  53. etlplus-0.5.4.dist-info/entry_points.txt +2 -0
  54. etlplus-0.5.4.dist-info/licenses/LICENSE +21 -0
  55. etlplus-0.5.4.dist-info/top_level.txt +1 -0
etlplus/cli/main.py ADDED
@@ -0,0 +1,616 @@
1
+ """
2
+ :mod:`etlplus.cli.main` module.
3
+
4
+ Entry point helpers for the Typer-powered ``etlplus`` CLI.
5
+
6
+ This module exposes :func:`main` for the console script as well as
7
+ :func:`create_parser` for callers that still need an ``argparse`` parser.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import argparse
13
+ import contextlib
14
+ import sys
15
+ from collections.abc import Sequence
16
+ from typing import Literal
17
+
18
+ import click
19
+ import typer
20
+
21
+ from .. import __version__
22
+ from ..enums import DataConnectorType
23
+ from ..enums import FileFormat
24
+ from ..utils import json_type
25
+ from .app import PROJECT_URL
26
+ from .app import app
27
+ from .handlers import cmd_check
28
+ from .handlers import cmd_extract
29
+ from .handlers import cmd_load
30
+ from .handlers import cmd_pipeline
31
+ from .handlers import cmd_render
32
+ from .handlers import cmd_run
33
+ from .handlers import cmd_transform
34
+ from .handlers import cmd_validate
35
+
36
+ # SECTION: EXPORTS ========================================================== #
37
+
38
+
39
+ __all__ = [
40
+ # Functions
41
+ 'create_parser',
42
+ 'main',
43
+ ]
44
+
45
+
46
+ # SECTION: TYPE ALIASES ===================================================== #
47
+
48
+
49
+ type FormatContext = Literal['source', 'target']
50
+
51
+
52
+ # SECTION: INTERNAL CLASSES ================================================= #
53
+
54
+
55
+ class _FormatAction(argparse.Action):
56
+ """
57
+ Argparse action that records when ``--source-format`` or
58
+ ``--target-format`` is provided."""
59
+
60
+ def __call__(
61
+ self,
62
+ parser: argparse.ArgumentParser,
63
+ namespace: argparse.Namespace,
64
+ values: str | Sequence[object] | None,
65
+ option_string: str | None = None,
66
+ ) -> None: # pragma: no cover
67
+ setattr(namespace, self.dest, values)
68
+ namespace._format_explicit = True
69
+
70
+
71
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
72
+
73
+
74
+ def _add_boolean_flag(
75
+ parser: argparse.ArgumentParser,
76
+ *,
77
+ name: str,
78
+ help_text: str,
79
+ ) -> None:
80
+ """Add a toggle that also supports the ``--no-`` prefix via 3.13.
81
+
82
+ Parameters
83
+ ----------
84
+ parser : argparse.ArgumentParser
85
+ Parser receiving the flag.
86
+ name : str
87
+ Primary flag name without leading dashes.
88
+ help_text : str
89
+ Help text rendered in ``--help`` output.
90
+ """
91
+
92
+ parser.add_argument(
93
+ f'--{name}',
94
+ action=argparse.BooleanOptionalAction,
95
+ default=False,
96
+ help=help_text,
97
+ )
98
+
99
+
100
+ def _add_config_option(
101
+ parser: argparse.ArgumentParser,
102
+ *,
103
+ required: bool = True,
104
+ ) -> None:
105
+ """Attach the shared ``--config`` option used by legacy commands.
106
+
107
+ Parameters
108
+ ----------
109
+ parser : argparse.ArgumentParser
110
+ Parser receiving the option.
111
+ required : bool, optional
112
+ Whether the flag must be provided. Defaults to ``True``.
113
+ """
114
+
115
+ parser.add_argument(
116
+ '--config',
117
+ required=required,
118
+ help='Path to pipeline YAML configuration file',
119
+ )
120
+
121
+
122
+ def _add_format_options(
123
+ parser: argparse.ArgumentParser,
124
+ *,
125
+ context: FormatContext,
126
+ ) -> None:
127
+ """
128
+ Attach shared ``--source-format`` or ``--target-format`` options to
129
+ extract/load parsers.
130
+
131
+ Parameters
132
+ ----------
133
+ parser : argparse.ArgumentParser
134
+ Parser to augment.
135
+ context : FormatContext
136
+ Context for the format option: either ``'source'`` or ``'target'``
137
+ """
138
+ parser.set_defaults(_format_explicit=False)
139
+ parser.add_argument(
140
+ '--source-format',
141
+ choices=list(FileFormat.choices()),
142
+ default='json',
143
+ action=_FormatAction,
144
+ help=(
145
+ f'Format of the {context}. Overrides filename-based inference '
146
+ 'when provided.'
147
+ ),
148
+ )
149
+ parser.add_argument(
150
+ '--target-format',
151
+ choices=list(FileFormat.choices()),
152
+ default='json',
153
+ action=_FormatAction,
154
+ help=(
155
+ f'Format of the {context}. Overrides filename-based inference '
156
+ 'when provided.'
157
+ ),
158
+ )
159
+
160
+
161
+ def _cli_description() -> str:
162
+ return '\n'.join(
163
+ [
164
+ 'ETLPlus - A Swiss Army knife for simple ETL operations.',
165
+ '',
166
+ ' Provide a subcommand and options. Examples:',
167
+ '',
168
+ ' etlplus extract file in.csv > out.json',
169
+ ' etlplus validate in.json --rules \'{"required": ["id"]}\'',
170
+ (
171
+ ' etlplus transform --from file in.csv --operations '
172
+ '\'{"select": ["id"]}\' --to file -o out.json'
173
+ ),
174
+ ' etlplus extract in.csv | etlplus load --to file out.json',
175
+ '',
176
+ ' Override format inference when extensions are misleading:',
177
+ '',
178
+ ' etlplus extract data.txt --source-format csv',
179
+ ' etlplus load payload.bin --target-format json',
180
+ ],
181
+ )
182
+
183
+
184
+ def _cli_epilog() -> str:
185
+ return '\n'.join(
186
+ [
187
+ 'Tip:',
188
+ ' --source-format and --target-format override format '
189
+ 'inference based on filename extensions when needed.',
190
+ ],
191
+ )
192
+
193
+
194
+ def _emit_context_help(
195
+ ctx: click.Context | None,
196
+ ) -> bool:
197
+ """
198
+ Mirror Click help output for the provided context onto stderr.
199
+
200
+ Parameters
201
+ ----------
202
+ ctx : click.Context | None
203
+ The Click context to emit help for.
204
+
205
+ Returns
206
+ -------
207
+ bool
208
+ ``True`` when help was emitted, ``False`` when ``ctx`` was ``None``.
209
+ """
210
+ if ctx is None:
211
+ return False
212
+
213
+ with contextlib.redirect_stdout(sys.stderr):
214
+ ctx.get_help()
215
+ return True
216
+
217
+
218
+ def _emit_root_help(
219
+ command: click.Command,
220
+ ) -> None:
221
+ """
222
+ Print the root ``etlplus`` help text to stderr.
223
+
224
+ Parameters
225
+ ----------
226
+ command : click.Command
227
+ The root Typer/Click command.
228
+ """
229
+ ctx = command.make_context('etlplus', [], resilient_parsing=True)
230
+ try:
231
+ _emit_context_help(ctx)
232
+ finally:
233
+ ctx.close()
234
+
235
+
236
+ def _is_illegal_option_error(
237
+ exc: click.exceptions.UsageError,
238
+ ) -> bool:
239
+ """
240
+ Return ``True`` when usage errors stem from invalid options.
241
+
242
+ Parameters
243
+ ----------
244
+ exc : click.exceptions.UsageError
245
+ The usage error to inspect.
246
+
247
+ Returns
248
+ -------
249
+ bool
250
+ ``True`` when the error indicates illegal options.
251
+ """
252
+ return isinstance(
253
+ exc,
254
+ (
255
+ click.exceptions.BadOptionUsage,
256
+ click.exceptions.NoSuchOption,
257
+ ),
258
+ )
259
+
260
+
261
+ def _is_unknown_command_error(
262
+ exc: click.exceptions.UsageError,
263
+ ) -> bool:
264
+ """
265
+ Return ``True`` when a :class:`UsageError` indicates bad subcommand.
266
+
267
+ Parameters
268
+ ----------
269
+ exc : click.exceptions.UsageError
270
+ The usage error to inspect.
271
+
272
+ Returns
273
+ -------
274
+ bool
275
+ ``True`` when the error indicates an unknown command.
276
+ """
277
+ message = getattr(exc, 'message', None) or str(exc)
278
+ return message.startswith('No such command ')
279
+
280
+
281
+ # SECTION: FUNCTIONS ======================================================== #
282
+
283
+
284
+ def create_parser() -> argparse.ArgumentParser:
285
+ """
286
+ Return the legacy :mod:`argparse` parser wired to current handlers.
287
+
288
+ Returns
289
+ -------
290
+ argparse.ArgumentParser
291
+ Parser compatible with historical ``etlplus`` entry points.
292
+ """
293
+
294
+ parser = argparse.ArgumentParser(
295
+ prog='etlplus',
296
+ description=_cli_description(),
297
+ epilog=_cli_epilog(),
298
+ formatter_class=argparse.RawDescriptionHelpFormatter,
299
+ )
300
+
301
+ parser.add_argument(
302
+ '-V',
303
+ '--version',
304
+ action='version',
305
+ version=f'%(prog)s {__version__}',
306
+ )
307
+
308
+ subparsers = parser.add_subparsers(
309
+ dest='command',
310
+ help='Available commands',
311
+ )
312
+ subparsers.required = True
313
+
314
+ extract_parser = subparsers.add_parser(
315
+ 'extract',
316
+ help='Extract data from sources (files, databases, REST APIs)',
317
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
318
+ )
319
+ extract_parser.add_argument(
320
+ 'source_type',
321
+ choices=list(DataConnectorType.choices()),
322
+ help='Type of source to extract from',
323
+ )
324
+ extract_parser.add_argument(
325
+ 'source',
326
+ help=(
327
+ 'Source location (file path, database connection string, '
328
+ 'or API URL)'
329
+ ),
330
+ )
331
+ _add_format_options(extract_parser, context='source')
332
+ extract_parser.set_defaults(func=cmd_extract)
333
+
334
+ validate_parser = subparsers.add_parser(
335
+ 'validate',
336
+ help='Validate data from sources',
337
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
338
+ )
339
+ validate_parser.add_argument(
340
+ 'source',
341
+ help='Data source to validate (file path or JSON string)',
342
+ )
343
+ validate_parser.add_argument(
344
+ '--rules',
345
+ type=json_type,
346
+ default={},
347
+ help='Validation rules as JSON string',
348
+ )
349
+ validate_parser.set_defaults(func=cmd_validate)
350
+
351
+ transform_parser = subparsers.add_parser(
352
+ 'transform',
353
+ help='Transform data',
354
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
355
+ )
356
+ transform_parser.add_argument(
357
+ 'source',
358
+ help='Data source to transform (file path or JSON string)',
359
+ )
360
+ transform_parser.add_argument(
361
+ '--operations',
362
+ type=json_type,
363
+ default={},
364
+ help='Transformation operations as JSON string',
365
+ )
366
+ transform_parser.add_argument(
367
+ '--from',
368
+ dest='from_',
369
+ choices=list(DataConnectorType.choices()),
370
+ help='Override the inferred source type (file, database, api).',
371
+ )
372
+ transform_parser.add_argument(
373
+ '--to',
374
+ dest='to',
375
+ choices=list(DataConnectorType.choices()),
376
+ help='Override the inferred target type (file, database, api).',
377
+ )
378
+ transform_parser.add_argument(
379
+ '--source-format',
380
+ choices=list(FileFormat.choices()),
381
+ dest='source_format',
382
+ help=(
383
+ 'Input payload format when SOURCE is - or a literal payload. '
384
+ 'File sources infer format from the extension.'
385
+ ),
386
+ )
387
+ transform_parser.add_argument(
388
+ '--target-format',
389
+ dest='target_format',
390
+ choices=list(FileFormat.choices()),
391
+ help=(
392
+ 'Output payload format '
393
+ 'when writing to stdout or non-file targets. '
394
+ 'File targets infer format from the extension.'
395
+ ),
396
+ )
397
+ transform_parser.set_defaults(func=cmd_transform)
398
+
399
+ load_parser = subparsers.add_parser(
400
+ 'load',
401
+ help='Load data to targets (files, databases, REST APIs)',
402
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
403
+ )
404
+ load_parser.add_argument(
405
+ 'source',
406
+ help='Data source to load (file path or JSON string)',
407
+ )
408
+ load_parser.add_argument(
409
+ 'target_type',
410
+ choices=list(DataConnectorType.choices()),
411
+ help='Type of target to load to',
412
+ )
413
+ load_parser.add_argument(
414
+ 'target',
415
+ help=(
416
+ 'Target location (file path, database connection string, '
417
+ 'or API URL)'
418
+ ),
419
+ )
420
+ _add_format_options(load_parser, context='target')
421
+ load_parser.set_defaults(func=cmd_load)
422
+
423
+ pipe_parser = subparsers.add_parser(
424
+ 'pipeline',
425
+ help=(
426
+ 'DEPRECATED: use "list" (for summary/jobs) or "run" (to execute); '
427
+ 'see '
428
+ f'{PROJECT_URL}/blob/main/docs/pipeline-guide.md'
429
+ ),
430
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
431
+ )
432
+ _add_config_option(pipe_parser)
433
+ pipe_parser.add_argument(
434
+ '--list',
435
+ action='store_true',
436
+ help='List available job names and exit',
437
+ )
438
+ pipe_parser.add_argument(
439
+ '--run',
440
+ metavar='JOB',
441
+ help='Run a specific job by name',
442
+ )
443
+ pipe_parser.set_defaults(func=cmd_pipeline)
444
+
445
+ render_parser = subparsers.add_parser(
446
+ 'render',
447
+ help='Render SQL DDL from table schema specs',
448
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
449
+ )
450
+ render_parser.add_argument(
451
+ '--config',
452
+ help='Pipeline YAML containing table_schemas',
453
+ )
454
+ render_parser.add_argument(
455
+ '-o',
456
+ '--output',
457
+ help='Write SQL to this path (stdout when omitted)',
458
+ )
459
+ render_parser.add_argument(
460
+ '--spec',
461
+ help='Standalone table spec file (.yml/.yaml/.json)',
462
+ )
463
+ render_parser.add_argument(
464
+ '--table',
465
+ help='Render only the table matching this name',
466
+ )
467
+ render_parser.add_argument(
468
+ '--template',
469
+ default='ddl',
470
+ help='Template key (ddl/view) or path to a Jinja template file',
471
+ )
472
+ render_parser.add_argument(
473
+ '--template-path',
474
+ dest='template_path',
475
+ help=(
476
+ 'Explicit path to a Jinja template file (overrides template key).'
477
+ ),
478
+ )
479
+ render_parser.set_defaults(func=cmd_render)
480
+
481
+ check_parser = subparsers.add_parser(
482
+ 'check',
483
+ help='Inspect ETL pipeline metadata',
484
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
485
+ )
486
+ _add_config_option(check_parser)
487
+ _add_boolean_flag(
488
+ check_parser,
489
+ name='jobs',
490
+ help_text='List ETL jobs',
491
+ )
492
+ _add_boolean_flag(
493
+ check_parser,
494
+ name='pipelines',
495
+ help_text='List ETL pipelines',
496
+ )
497
+ _add_boolean_flag(
498
+ check_parser,
499
+ name='sources',
500
+ help_text='List data sources',
501
+ )
502
+ _add_boolean_flag(
503
+ check_parser,
504
+ name='summary',
505
+ help_text=(
506
+ 'Show pipeline summary (name, version, sources, targets, jobs)'
507
+ ),
508
+ )
509
+ _add_boolean_flag(
510
+ check_parser,
511
+ name='targets',
512
+ help_text='List data targets',
513
+ )
514
+ _add_boolean_flag(
515
+ check_parser,
516
+ name='transforms',
517
+ help_text='List data transforms',
518
+ )
519
+ check_parser.set_defaults(func=cmd_check)
520
+
521
+ run_parser = subparsers.add_parser(
522
+ 'run',
523
+ help=(
524
+ 'Run an ETL pipeline '
525
+ f'(see {PROJECT_URL}/blob/main/docs/run-module.md)'
526
+ ),
527
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
528
+ )
529
+ _add_config_option(run_parser)
530
+ run_parser.add_argument(
531
+ '-j',
532
+ '--job',
533
+ help='Name of the job to run',
534
+ )
535
+ run_parser.add_argument(
536
+ '-p',
537
+ '--pipeline',
538
+ help='Name of the pipeline to run',
539
+ )
540
+ run_parser.set_defaults(func=cmd_run)
541
+
542
+ return parser
543
+
544
+
545
+ def main(
546
+ argv: list[str] | None = None,
547
+ ) -> int:
548
+ """
549
+ Run the Typer-powered CLI and normalize exit codes.
550
+
551
+ Parameters
552
+ ----------
553
+ argv : list[str] | None, optional
554
+ Sequence of command-line arguments excluding the program name. When
555
+ ``None``, defaults to ``sys.argv[1:]``.
556
+
557
+ Returns
558
+ -------
559
+ int
560
+ A conventional POSIX exit code: zero on success, non-zero on error.
561
+
562
+ Raises
563
+ ------
564
+ click.exceptions.UsageError
565
+ Re-raises Typer/Click usage errors after printing help for unknown
566
+ commands.
567
+ SystemExit
568
+ Re-raises SystemExit exceptions to preserve exit codes.
569
+
570
+ Notes
571
+ -----
572
+ This function uses Typer (Click) for parsing/dispatch, but preserves the
573
+ existing `cmd_*` handlers by adapting parsed arguments into an
574
+ :class:`argparse.Namespace`.
575
+ """
576
+ resolved_argv = sys.argv[1:] if argv is None else list(argv)
577
+ command = typer.main.get_command(app)
578
+
579
+ try:
580
+ result = command.main(
581
+ args=resolved_argv,
582
+ prog_name='etlplus',
583
+ standalone_mode=False,
584
+ )
585
+ return int(result or 0)
586
+
587
+ except click.exceptions.UsageError as exc:
588
+ if _is_unknown_command_error(exc):
589
+ typer.echo(f'Error: {exc}', err=True)
590
+ _emit_root_help(command)
591
+ return int(getattr(exc, 'exit_code', 2))
592
+ if _is_illegal_option_error(exc):
593
+ typer.echo(f'Error: {exc}', err=True)
594
+ if not _emit_context_help(exc.ctx):
595
+ _emit_root_help(command)
596
+ return int(getattr(exc, 'exit_code', 2))
597
+
598
+ raise
599
+
600
+ except typer.Exit as exc:
601
+ return int(exc.exit_code)
602
+
603
+ except typer.Abort:
604
+ return 1
605
+
606
+ except KeyboardInterrupt: # pragma: no cover - interactive path
607
+ # Conventional exit code for SIGINT
608
+ return 130
609
+
610
+ except SystemExit as e:
611
+ print(f'Error: {e}', file=sys.stderr)
612
+ raise e
613
+
614
+ except (OSError, TypeError, ValueError) as e:
615
+ print(f'Error: {e}', file=sys.stderr)
616
+ return 1
@@ -0,0 +1,56 @@
1
+ """
2
+ :mod:`etlplus.config` package.
3
+
4
+ Configuration models and helpers for ETLPlus.
5
+
6
+ This package defines models for data sources/targets ("connectors"), APIs,
7
+ pagination/rate limits, pipeline orchestration, and related utilities. The
8
+ parsers are permissive (accepting ``Mapping[str, Any]``) and normalize to
9
+ concrete types without raising on unknown/optional fields.
10
+
11
+ Notes
12
+ -----
13
+ - The models use ``@dataclass(slots=True)`` and avoid mutating inputs.
14
+ - TypedDicts are editor/type-checking hints and are not enforced at runtime.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from .connector import Connector
20
+ from .connector import ConnectorApi
21
+ from .connector import ConnectorDb
22
+ from .connector import ConnectorFile
23
+ from .connector import parse_connector
24
+ from .jobs import ExtractRef
25
+ from .jobs import JobConfig
26
+ from .jobs import LoadRef
27
+ from .jobs import TransformRef
28
+ from .jobs import ValidationRef
29
+ from .pipeline import PipelineConfig
30
+ from .pipeline import load_pipeline_config
31
+ from .profile import ProfileConfig
32
+ from .types import ConnectorType
33
+
34
+ # SECTION: EXPORTS ========================================================== #
35
+
36
+
37
+ __all__ = [
38
+ # Connectors
39
+ 'Connector',
40
+ 'ConnectorType',
41
+ 'ConnectorApi',
42
+ 'ConnectorDb',
43
+ 'ConnectorFile',
44
+ 'parse_connector',
45
+ # Jobs / Refs
46
+ 'ExtractRef',
47
+ 'JobConfig',
48
+ 'LoadRef',
49
+ 'TransformRef',
50
+ 'ValidationRef',
51
+ # Pipeline
52
+ 'PipelineConfig',
53
+ 'load_pipeline_config',
54
+ # Profile
55
+ 'ProfileConfig',
56
+ ]