etlplus 0.9.0__py3-none-any.whl → 0.9.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/README.md +37 -0
- etlplus/__init__.py +1 -26
- etlplus/api/README.md +51 -3
- etlplus/api/__init__.py +10 -0
- etlplus/api/config.py +39 -28
- etlplus/api/endpoint_client.py +3 -3
- etlplus/api/enums.py +51 -0
- etlplus/api/pagination/client.py +1 -1
- etlplus/api/rate_limiting/config.py +13 -1
- etlplus/api/rate_limiting/rate_limiter.py +8 -11
- etlplus/api/request_manager.py +11 -6
- etlplus/api/transport.py +14 -2
- etlplus/api/types.py +96 -6
- etlplus/{run_helpers.py → api/utils.py} +209 -153
- etlplus/cli/README.md +40 -0
- etlplus/cli/commands.py +94 -61
- etlplus/cli/constants.py +1 -1
- etlplus/cli/handlers.py +40 -12
- etlplus/cli/io.py +2 -2
- etlplus/cli/main.py +1 -1
- etlplus/cli/state.py +4 -7
- etlplus/database/README.md +48 -0
- etlplus/database/ddl.py +1 -1
- etlplus/database/engine.py +19 -3
- etlplus/database/orm.py +2 -0
- etlplus/database/schema.py +1 -1
- etlplus/enums.py +1 -107
- etlplus/file/README.md +105 -0
- etlplus/file/__init__.py +25 -0
- etlplus/file/_imports.py +141 -0
- etlplus/file/_io.py +160 -0
- etlplus/file/accdb.py +78 -0
- etlplus/file/arrow.py +78 -0
- etlplus/file/avro.py +176 -0
- etlplus/file/bson.py +77 -0
- etlplus/file/cbor.py +78 -0
- etlplus/file/cfg.py +79 -0
- etlplus/file/conf.py +80 -0
- etlplus/file/core.py +322 -0
- etlplus/file/csv.py +79 -0
- etlplus/file/dat.py +78 -0
- etlplus/file/dta.py +77 -0
- etlplus/file/duckdb.py +78 -0
- etlplus/file/enums.py +343 -0
- etlplus/file/feather.py +111 -0
- etlplus/file/fwf.py +77 -0
- etlplus/file/gz.py +123 -0
- etlplus/file/hbs.py +78 -0
- etlplus/file/hdf5.py +78 -0
- etlplus/file/ini.py +79 -0
- etlplus/file/ion.py +78 -0
- etlplus/file/jinja2.py +78 -0
- etlplus/file/json.py +98 -0
- etlplus/file/log.py +78 -0
- etlplus/file/mat.py +78 -0
- etlplus/file/mdb.py +78 -0
- etlplus/file/msgpack.py +78 -0
- etlplus/file/mustache.py +78 -0
- etlplus/file/nc.py +78 -0
- etlplus/file/ndjson.py +108 -0
- etlplus/file/numbers.py +75 -0
- etlplus/file/ods.py +79 -0
- etlplus/file/orc.py +111 -0
- etlplus/file/parquet.py +113 -0
- etlplus/file/pb.py +78 -0
- etlplus/file/pbf.py +77 -0
- etlplus/file/properties.py +78 -0
- etlplus/file/proto.py +77 -0
- etlplus/file/psv.py +79 -0
- etlplus/file/rda.py +78 -0
- etlplus/file/rds.py +78 -0
- etlplus/file/sas7bdat.py +78 -0
- etlplus/file/sav.py +77 -0
- etlplus/file/sqlite.py +78 -0
- etlplus/file/stub.py +84 -0
- etlplus/file/sylk.py +77 -0
- etlplus/file/tab.py +81 -0
- etlplus/file/toml.py +78 -0
- etlplus/file/tsv.py +80 -0
- etlplus/file/txt.py +102 -0
- etlplus/file/vm.py +78 -0
- etlplus/file/wks.py +77 -0
- etlplus/file/xls.py +88 -0
- etlplus/file/xlsm.py +79 -0
- etlplus/file/xlsx.py +99 -0
- etlplus/file/xml.py +185 -0
- etlplus/file/xpt.py +78 -0
- etlplus/file/yaml.py +95 -0
- etlplus/file/zip.py +175 -0
- etlplus/file/zsav.py +77 -0
- etlplus/ops/README.md +50 -0
- etlplus/ops/__init__.py +61 -0
- etlplus/{extract.py → ops/extract.py} +81 -99
- etlplus/{load.py → ops/load.py} +78 -101
- etlplus/{run.py → ops/run.py} +159 -127
- etlplus/{transform.py → ops/transform.py} +75 -68
- etlplus/{validation → ops}/utils.py +53 -17
- etlplus/{validate.py → ops/validate.py} +22 -12
- etlplus/templates/README.md +46 -0
- etlplus/types.py +5 -4
- etlplus/utils.py +136 -2
- etlplus/workflow/README.md +52 -0
- etlplus/{config → workflow}/__init__.py +10 -23
- etlplus/{config → workflow}/connector.py +58 -44
- etlplus/workflow/dag.py +105 -0
- etlplus/{config → workflow}/jobs.py +105 -32
- etlplus/{config → workflow}/pipeline.py +59 -51
- etlplus/{config → workflow}/profile.py +8 -5
- etlplus/workflow/types.py +115 -0
- {etlplus-0.9.0.dist-info → etlplus-0.9.2.dist-info}/METADATA +210 -17
- etlplus-0.9.2.dist-info/RECORD +134 -0
- {etlplus-0.9.0.dist-info → etlplus-0.9.2.dist-info}/WHEEL +1 -1
- etlplus/config/types.py +0 -204
- etlplus/config/utils.py +0 -120
- etlplus/file.py +0 -657
- etlplus/validation/__init__.py +0 -44
- etlplus-0.9.0.dist-info/RECORD +0 -65
- {etlplus-0.9.0.dist-info → etlplus-0.9.2.dist-info}/entry_points.txt +0 -0
- {etlplus-0.9.0.dist-info → etlplus-0.9.2.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.9.0.dist-info → etlplus-0.9.2.dist-info}/top_level.txt +0 -0
etlplus/cli/commands.py
CHANGED
|
@@ -36,7 +36,7 @@ from typing import cast
|
|
|
36
36
|
import typer
|
|
37
37
|
|
|
38
38
|
from .. import __version__
|
|
39
|
-
from ..
|
|
39
|
+
from ..file import FileFormat
|
|
40
40
|
from . import handlers
|
|
41
41
|
from .constants import CLI_DESCRIPTION
|
|
42
42
|
from .constants import CLI_EPILOG
|
|
@@ -61,6 +61,24 @@ __all__ = ['app']
|
|
|
61
61
|
|
|
62
62
|
# SECTION: TYPE ALIASES ==================================================== #
|
|
63
63
|
|
|
64
|
+
|
|
65
|
+
JobOption = Annotated[
|
|
66
|
+
str | None,
|
|
67
|
+
typer.Option(
|
|
68
|
+
'-j',
|
|
69
|
+
'--job',
|
|
70
|
+
help='Name of the job to run',
|
|
71
|
+
),
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
JobsOption = Annotated[
|
|
75
|
+
bool,
|
|
76
|
+
typer.Option(
|
|
77
|
+
'--jobs',
|
|
78
|
+
help='List available job names and exit',
|
|
79
|
+
),
|
|
80
|
+
]
|
|
81
|
+
|
|
64
82
|
OperationsOption = Annotated[
|
|
65
83
|
str,
|
|
66
84
|
typer.Option(
|
|
@@ -89,6 +107,23 @@ PipelineConfigOption = Annotated[
|
|
|
89
107
|
),
|
|
90
108
|
]
|
|
91
109
|
|
|
110
|
+
PipelineOption = Annotated[
|
|
111
|
+
str | None,
|
|
112
|
+
typer.Option(
|
|
113
|
+
'-p',
|
|
114
|
+
'--pipeline',
|
|
115
|
+
help='Name of the pipeline to run',
|
|
116
|
+
),
|
|
117
|
+
]
|
|
118
|
+
|
|
119
|
+
PipelinesOption = Annotated[
|
|
120
|
+
bool,
|
|
121
|
+
typer.Option(
|
|
122
|
+
'--pipelines',
|
|
123
|
+
help='List ETL pipelines',
|
|
124
|
+
),
|
|
125
|
+
]
|
|
126
|
+
|
|
92
127
|
RenderConfigOption = Annotated[
|
|
93
128
|
str | None,
|
|
94
129
|
typer.Option(
|
|
@@ -193,6 +228,22 @@ SourceTypeOption = Annotated[
|
|
|
193
228
|
),
|
|
194
229
|
]
|
|
195
230
|
|
|
231
|
+
SourcesOption = Annotated[
|
|
232
|
+
bool,
|
|
233
|
+
typer.Option(
|
|
234
|
+
'--sources',
|
|
235
|
+
help='List data sources',
|
|
236
|
+
),
|
|
237
|
+
]
|
|
238
|
+
|
|
239
|
+
SummaryOption = Annotated[
|
|
240
|
+
bool,
|
|
241
|
+
typer.Option(
|
|
242
|
+
'--summary',
|
|
243
|
+
help='Show pipeline summary (name, version, sources, targets, jobs)',
|
|
244
|
+
),
|
|
245
|
+
]
|
|
246
|
+
|
|
196
247
|
TargetArg = Annotated[
|
|
197
248
|
str,
|
|
198
249
|
typer.Argument(
|
|
@@ -227,6 +278,22 @@ TargetTypeOption = Annotated[
|
|
|
227
278
|
),
|
|
228
279
|
]
|
|
229
280
|
|
|
281
|
+
TargetsOption = Annotated[
|
|
282
|
+
bool,
|
|
283
|
+
typer.Option(
|
|
284
|
+
'--targets',
|
|
285
|
+
help='List data targets',
|
|
286
|
+
),
|
|
287
|
+
]
|
|
288
|
+
|
|
289
|
+
TransformsOption = Annotated[
|
|
290
|
+
bool,
|
|
291
|
+
typer.Option(
|
|
292
|
+
'--transforms',
|
|
293
|
+
help='List data transforms',
|
|
294
|
+
),
|
|
295
|
+
]
|
|
296
|
+
|
|
230
297
|
|
|
231
298
|
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
232
299
|
|
|
@@ -341,36 +408,12 @@ def _root(
|
|
|
341
408
|
def check_cmd(
|
|
342
409
|
ctx: typer.Context,
|
|
343
410
|
config: PipelineConfigOption,
|
|
344
|
-
jobs:
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
False,
|
|
351
|
-
'--pipelines',
|
|
352
|
-
help='List ETL pipelines',
|
|
353
|
-
),
|
|
354
|
-
sources: bool = typer.Option(
|
|
355
|
-
False,
|
|
356
|
-
'--sources',
|
|
357
|
-
help='List data sources',
|
|
358
|
-
),
|
|
359
|
-
summary: bool = typer.Option(
|
|
360
|
-
False,
|
|
361
|
-
'--summary',
|
|
362
|
-
help='Show pipeline summary (name, version, sources, targets, jobs)',
|
|
363
|
-
),
|
|
364
|
-
targets: bool = typer.Option(
|
|
365
|
-
False,
|
|
366
|
-
'--targets',
|
|
367
|
-
help='List data targets',
|
|
368
|
-
),
|
|
369
|
-
transforms: bool = typer.Option(
|
|
370
|
-
False,
|
|
371
|
-
'--transforms',
|
|
372
|
-
help='List data transforms',
|
|
373
|
-
),
|
|
411
|
+
jobs: JobsOption = False,
|
|
412
|
+
pipelines: PipelinesOption = False,
|
|
413
|
+
sources: SourcesOption = False,
|
|
414
|
+
summary: SummaryOption = False,
|
|
415
|
+
targets: TargetsOption = False,
|
|
416
|
+
transforms: TransformsOption = False,
|
|
374
417
|
) -> int:
|
|
375
418
|
"""
|
|
376
419
|
Inspect a pipeline configuration.
|
|
@@ -443,9 +486,9 @@ def extract_cmd(
|
|
|
443
486
|
Source (JSON payload, file/folder path, URL/URI, or - for STDIN)
|
|
444
487
|
from which to extract data. Default is ``-``.
|
|
445
488
|
source_format : SourceFormatOption, optional
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
489
|
+
Data source format. Overrides the inferred format (``csv``, ``json``,
|
|
490
|
+
etc.) based on filename extension or STDIN content. Default is
|
|
491
|
+
``None``.
|
|
449
492
|
source_type : SourceTypeOption, optional
|
|
450
493
|
Data source type. Overrides the inferred type (``api``, ``database``,
|
|
451
494
|
``file``, ``folder``) based on URI/URL schema. Default is ``None``.
|
|
@@ -523,15 +566,15 @@ def load_cmd(
|
|
|
523
566
|
ctx : typer.Context
|
|
524
567
|
The Typer context.
|
|
525
568
|
source_format : SourceFormatOption, optional
|
|
526
|
-
|
|
527
|
-
|
|
569
|
+
Data source format. Overrides the inferred format (``csv``, ``json``,
|
|
570
|
+
etc.) based on filename extension or STDIN content. Default is
|
|
571
|
+
``None``.
|
|
528
572
|
target : TargetArg, optional
|
|
529
573
|
Target (file/folder path, URL/URI, or - for STDOUT) into which to load
|
|
530
574
|
data. Default is ``-``.
|
|
531
575
|
target_format : TargetFormatOption, optional
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
``None``.
|
|
576
|
+
Target data format. Overrides the inferred format (``csv``, ``json``,
|
|
577
|
+
etc.) based on filename extension. Default is ``None``.
|
|
535
578
|
target_type : TargetTypeOption, optional
|
|
536
579
|
Data target type. Overrides the inferred type (``api``, ``database``,
|
|
537
580
|
``file``, ``folder``) based on URI/URL schema. Default is ``None``.
|
|
@@ -683,18 +726,8 @@ def render_cmd(
|
|
|
683
726
|
def run_cmd(
|
|
684
727
|
ctx: typer.Context,
|
|
685
728
|
config: PipelineConfigOption,
|
|
686
|
-
job:
|
|
687
|
-
|
|
688
|
-
'-j',
|
|
689
|
-
'--job',
|
|
690
|
-
help='Name of the job to run',
|
|
691
|
-
),
|
|
692
|
-
pipeline: str | None = typer.Option(
|
|
693
|
-
None,
|
|
694
|
-
'-p',
|
|
695
|
-
'--pipeline',
|
|
696
|
-
help='Name of the pipeline to run',
|
|
697
|
-
),
|
|
729
|
+
job: JobOption = None,
|
|
730
|
+
pipeline: PipelineOption = None,
|
|
698
731
|
) -> int:
|
|
699
732
|
"""
|
|
700
733
|
Execute an ETL job or pipeline from a YAML configuration.
|
|
@@ -760,9 +793,9 @@ def transform_cmd(
|
|
|
760
793
|
Source (JSON payload, file/folder path, URL/URI, or - for STDIN) from
|
|
761
794
|
which to extract data. Default is ``-``.
|
|
762
795
|
source_format : SourceFormatOption, optional
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
796
|
+
Data source format. Overrides the inferred format (``csv``, ``json``,
|
|
797
|
+
etc.) based on filename extension or STDIN content. Default is
|
|
798
|
+
``None``.
|
|
766
799
|
source_type : SourceTypeOption, optional
|
|
767
800
|
Data source type. Overrides the inferred type (``api``, ``database``,
|
|
768
801
|
``file``, ``folder``) based on URI/URL schema. Default is ``None``.
|
|
@@ -770,9 +803,8 @@ def transform_cmd(
|
|
|
770
803
|
Target (file/folder path, URL/URI, or - for STDOUT) into which to load
|
|
771
804
|
data. Default is ``-``.
|
|
772
805
|
target_format : TargetFormatOption, optional
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
``None``.
|
|
806
|
+
Target data format. Overrides the inferred format (``csv``, ``json``,
|
|
807
|
+
etc.) based on filename extension. Default is ``None``.
|
|
776
808
|
target_type : TargetTypeOption, optional
|
|
777
809
|
Data target type. Overrides the inferred type (``api``, ``database``,
|
|
778
810
|
``file``, ``folder``) based on URI/URL schema. Default is ``None``.
|
|
@@ -876,11 +908,12 @@ def validate_cmd(
|
|
|
876
908
|
source : SourceArg
|
|
877
909
|
Data source to validate (path, JSON payload, or - for STDIN).
|
|
878
910
|
source_format : SourceFormatOption, optional
|
|
879
|
-
|
|
880
|
-
Default is
|
|
881
|
-
source_type : SourceTypeOption, optional
|
|
882
|
-
Override the inferred source type (file, database, api). Default is
|
|
911
|
+
Data source format. Overrides the inferred format (``csv``, ``json``,
|
|
912
|
+
etc.) based on filename extension or STDIN content. Default is
|
|
883
913
|
``None``.
|
|
914
|
+
source_type : SourceTypeOption, optional
|
|
915
|
+
Data source type. Overrides the inferred type (``api``, ``database``,
|
|
916
|
+
``file``, ``folder``) based on URI/URL schema. Default is ``None``.
|
|
884
917
|
output : OutputOption, optional
|
|
885
918
|
Output file for validated output (- for STDOUT). Default is ``None``.
|
|
886
919
|
|
etlplus/cli/constants.py
CHANGED
etlplus/cli/handlers.py
CHANGED
|
@@ -14,19 +14,20 @@ from typing import Any
|
|
|
14
14
|
from typing import Literal
|
|
15
15
|
from typing import cast
|
|
16
16
|
|
|
17
|
-
from ..config import PipelineConfig
|
|
18
|
-
from ..config import load_pipeline_config
|
|
19
17
|
from ..database import load_table_spec
|
|
20
18
|
from ..database import render_tables
|
|
21
|
-
from ..extract import extract
|
|
22
19
|
from ..file import File
|
|
23
|
-
from ..
|
|
24
|
-
from ..
|
|
25
|
-
from ..
|
|
20
|
+
from ..file import FileFormat
|
|
21
|
+
from ..ops import extract
|
|
22
|
+
from ..ops import load
|
|
23
|
+
from ..ops import run
|
|
24
|
+
from ..ops import transform
|
|
25
|
+
from ..ops import validate
|
|
26
|
+
from ..ops.validate import FieldRules
|
|
26
27
|
from ..types import JSONData
|
|
27
28
|
from ..types import TemplateKey
|
|
28
|
-
from ..
|
|
29
|
-
from ..
|
|
29
|
+
from ..workflow import PipelineConfig
|
|
30
|
+
from ..workflow import load_pipeline_config
|
|
30
31
|
from . import io as cli_io
|
|
31
32
|
|
|
32
33
|
# SECTION: EXPORTS ========================================================== #
|
|
@@ -120,9 +121,12 @@ def _check_sections(
|
|
|
120
121
|
if targets:
|
|
121
122
|
sections['targets'] = [tgt.name for tgt in cfg.targets]
|
|
122
123
|
if transforms:
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
124
|
+
if isinstance(cfg.transforms, Mapping):
|
|
125
|
+
sections['transforms'] = list(cfg.transforms)
|
|
126
|
+
else:
|
|
127
|
+
sections['transforms'] = [
|
|
128
|
+
getattr(trf, 'name', None) for trf in cfg.transforms
|
|
129
|
+
]
|
|
126
130
|
if not sections:
|
|
127
131
|
sections['jobs'] = _pipeline_summary(cfg)['jobs']
|
|
128
132
|
return sections
|
|
@@ -156,6 +160,29 @@ def _pipeline_summary(
|
|
|
156
160
|
}
|
|
157
161
|
|
|
158
162
|
|
|
163
|
+
def _write_file_payload(
|
|
164
|
+
payload: JSONData,
|
|
165
|
+
target: str,
|
|
166
|
+
*,
|
|
167
|
+
format_hint: str | None,
|
|
168
|
+
) -> None:
|
|
169
|
+
"""
|
|
170
|
+
Write a JSON-like payload to a file path using an optional format hint.
|
|
171
|
+
|
|
172
|
+
Parameters
|
|
173
|
+
----------
|
|
174
|
+
payload : JSONData
|
|
175
|
+
The structured data to write.
|
|
176
|
+
target : str
|
|
177
|
+
File path to write to.
|
|
178
|
+
format_hint : str | None
|
|
179
|
+
Optional format hint for :class:`FileFormat`.
|
|
180
|
+
"""
|
|
181
|
+
file_path = Path(target)
|
|
182
|
+
file_format = FileFormat.coerce(format_hint) if format_hint else None
|
|
183
|
+
File(file_path, file_format=file_format).write(payload)
|
|
184
|
+
|
|
185
|
+
|
|
159
186
|
# SECTION: FUNCTIONS ======================================================== #
|
|
160
187
|
|
|
161
188
|
|
|
@@ -569,8 +596,9 @@ def transform_handler(
|
|
|
569
596
|
|
|
570
597
|
data = transform(payload, cast(TransformOperations, operations_payload))
|
|
571
598
|
|
|
599
|
+
# TODO: Generalize to handle non-file targets.
|
|
572
600
|
if target and target != '-':
|
|
573
|
-
|
|
601
|
+
_write_file_payload(data, target, format_hint=target_format)
|
|
574
602
|
print(f'Data transformed and saved to {target}')
|
|
575
603
|
return 0
|
|
576
604
|
|
etlplus/cli/io.py
CHANGED
|
@@ -15,8 +15,8 @@ from pathlib import Path
|
|
|
15
15
|
from typing import Any
|
|
16
16
|
from typing import cast
|
|
17
17
|
|
|
18
|
-
from ..enums import FileFormat
|
|
19
18
|
from ..file import File
|
|
19
|
+
from ..file import FileFormat
|
|
20
20
|
from ..types import JSONData
|
|
21
21
|
from ..utils import print_json
|
|
22
22
|
|
|
@@ -331,6 +331,6 @@ def write_json_output(
|
|
|
331
331
|
"""
|
|
332
332
|
if not output_path or output_path == '-':
|
|
333
333
|
return False
|
|
334
|
-
File(Path(output_path), FileFormat.JSON).
|
|
334
|
+
File(Path(output_path), FileFormat.JSON).write(data)
|
|
335
335
|
print(f'{success_message} {output_path}')
|
|
336
336
|
return True
|
etlplus/cli/main.py
CHANGED
etlplus/cli/state.py
CHANGED
|
@@ -15,6 +15,7 @@ from typing import Final
|
|
|
15
15
|
|
|
16
16
|
import typer
|
|
17
17
|
|
|
18
|
+
from ..utils import normalize_str
|
|
18
19
|
from .constants import DATA_CONNECTORS
|
|
19
20
|
|
|
20
21
|
# SECTION: EXPORTS ========================================================== #
|
|
@@ -322,14 +323,10 @@ def validate_choice(
|
|
|
322
323
|
typer.BadParameter
|
|
323
324
|
If the input value is not in the set of valid choices.
|
|
324
325
|
"""
|
|
325
|
-
v = str(value or '')
|
|
326
|
-
normalized_choices = {c
|
|
326
|
+
v = normalize_str(str(value or ''))
|
|
327
|
+
normalized_choices = {normalize_str(c): c for c in choices}
|
|
327
328
|
if v in normalized_choices:
|
|
328
|
-
|
|
329
|
-
for choice in choices:
|
|
330
|
-
if choice.lower() == v:
|
|
331
|
-
return choice
|
|
332
|
-
return v
|
|
329
|
+
return normalized_choices[v]
|
|
333
330
|
allowed = ', '.join(sorted(choices))
|
|
334
331
|
raise typer.BadParameter(
|
|
335
332
|
f"Invalid {label} '{value}'. Choose from: {allowed}",
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# `etlplus.database` Subpackage
|
|
2
|
+
|
|
3
|
+
Documentation for the `etlplus.database` subpackage: database engine, schema, and ORM helpers.
|
|
4
|
+
|
|
5
|
+
- Provides database engine and connection management
|
|
6
|
+
- Supports schema definition and DDL generation
|
|
7
|
+
- Includes lightweight ORM utilities for tabular data
|
|
8
|
+
- Exposes type definitions for database objects
|
|
9
|
+
|
|
10
|
+
Back to project overview: see the top-level [README](../../README.md).
|
|
11
|
+
|
|
12
|
+
- [`etlplus.database` Subpackage](#etlplusdatabase-subpackage)
|
|
13
|
+
- [Database Engine and Connections](#database-engine-and-connections)
|
|
14
|
+
- [Schema and DDL Helpers](#schema-and-ddl-helpers)
|
|
15
|
+
- [ORM Utilities](#orm-utilities)
|
|
16
|
+
- [Example: Creating a Table](#example-creating-a-table)
|
|
17
|
+
- [See Also](#see-also)
|
|
18
|
+
|
|
19
|
+
## Database Engine and Connections
|
|
20
|
+
|
|
21
|
+
- Manage connections to supported databases
|
|
22
|
+
- Configure engines for different backends
|
|
23
|
+
|
|
24
|
+
## Schema and DDL Helpers
|
|
25
|
+
|
|
26
|
+
- Define table schemas and columns
|
|
27
|
+
- Generate DDL statements for supported databases
|
|
28
|
+
|
|
29
|
+
## ORM Utilities
|
|
30
|
+
|
|
31
|
+
- Map rows to Python objects
|
|
32
|
+
- Simple CRUD helpers for tabular data
|
|
33
|
+
|
|
34
|
+
## Example: Creating a Table
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
from etlplus.database import Schema, Engine
|
|
38
|
+
|
|
39
|
+
engine = Engine.connect("sqlite:///example.db")
|
|
40
|
+
schema = Schema.from_dict({"name": "users", "columns": [ ... ]})
|
|
41
|
+
engine.create_table(schema)
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## See Also
|
|
45
|
+
|
|
46
|
+
- Top-level CLI and library usage in the main [README](../../README.md)
|
|
47
|
+
- Schema helpers in [schema.py](schema.py)
|
|
48
|
+
- ORM utilities in [orm.py](orm.py)
|
etlplus/database/ddl.py
CHANGED
|
@@ -203,7 +203,7 @@ def load_table_spec(
|
|
|
203
203
|
raise ValueError('Spec must be .json, .yml, or .yaml')
|
|
204
204
|
|
|
205
205
|
try:
|
|
206
|
-
spec = File
|
|
206
|
+
spec = File(spec_path).read()
|
|
207
207
|
except ImportError as e:
|
|
208
208
|
if suffix in {'.yml', '.yaml'}:
|
|
209
209
|
raise RuntimeError(
|
etlplus/database/engine.py
CHANGED
|
@@ -113,7 +113,7 @@ def load_database_url_from_config(
|
|
|
113
113
|
ValueError
|
|
114
114
|
If no connection string/URL/DSN is found for the specified entry.
|
|
115
115
|
"""
|
|
116
|
-
cfg = File
|
|
116
|
+
cfg = File(Path(path)).read()
|
|
117
117
|
if not isinstance(cfg, Mapping):
|
|
118
118
|
raise TypeError('Database config must be a mapping')
|
|
119
119
|
|
|
@@ -136,9 +136,25 @@ def load_database_url_from_config(
|
|
|
136
136
|
return url
|
|
137
137
|
|
|
138
138
|
|
|
139
|
-
def make_engine(
|
|
140
|
-
|
|
139
|
+
def make_engine(
|
|
140
|
+
url: str | None = None,
|
|
141
|
+
**engine_kwargs: Any,
|
|
142
|
+
) -> Engine:
|
|
143
|
+
"""
|
|
144
|
+
Create a SQLAlchemy Engine, defaulting to env config if no URL given.
|
|
145
|
+
|
|
146
|
+
Parameters
|
|
147
|
+
----------
|
|
148
|
+
url : str | None, optional
|
|
149
|
+
Database URL/DSN string. When omitted, ``DATABASE_URL`` is used.
|
|
150
|
+
**engine_kwargs : Any
|
|
151
|
+
Extra keyword arguments forwarded to ``create_engine``.
|
|
141
152
|
|
|
153
|
+
Returns
|
|
154
|
+
-------
|
|
155
|
+
Engine
|
|
156
|
+
Configured SQLAlchemy engine instance.
|
|
157
|
+
"""
|
|
142
158
|
resolved_url = url or DATABASE_URL
|
|
143
159
|
return create_engine(resolved_url, pool_pre_ping=True, **engine_kwargs)
|
|
144
160
|
|
etlplus/database/orm.py
CHANGED
|
@@ -201,12 +201,14 @@ def build_models(
|
|
|
201
201
|
) -> ModelRegistry:
|
|
202
202
|
"""
|
|
203
203
|
Build SQLAlchemy ORM models from table specifications.
|
|
204
|
+
|
|
204
205
|
Parameters
|
|
205
206
|
----------
|
|
206
207
|
specs : list[TableSpec]
|
|
207
208
|
List of table specifications.
|
|
208
209
|
base : type[DeclarativeBase], optional
|
|
209
210
|
Base class for the ORM models (default: :class:`Base`).
|
|
211
|
+
|
|
210
212
|
Returns
|
|
211
213
|
-------
|
|
212
214
|
ModelRegistry
|
etlplus/database/schema.py
CHANGED
etlplus/enums.py
CHANGED
|
@@ -19,16 +19,12 @@ from .types import StrStrMap
|
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
__all__ = [
|
|
22
|
+
# Enums
|
|
22
23
|
'AggregateName',
|
|
23
24
|
'CoercibleStrEnum',
|
|
24
25
|
'DataConnectorType',
|
|
25
|
-
'FileFormat',
|
|
26
|
-
'HttpMethod',
|
|
27
26
|
'OperatorName',
|
|
28
27
|
'PipelineStep',
|
|
29
|
-
'coerce_data_connector_type',
|
|
30
|
-
'coerce_file_format',
|
|
31
|
-
'coerce_http_method',
|
|
32
28
|
]
|
|
33
29
|
|
|
34
30
|
|
|
@@ -203,69 +199,6 @@ class DataConnectorType(CoercibleStrEnum):
|
|
|
203
199
|
}
|
|
204
200
|
|
|
205
201
|
|
|
206
|
-
class FileFormat(CoercibleStrEnum):
|
|
207
|
-
"""Supported file formats for extraction."""
|
|
208
|
-
|
|
209
|
-
# -- Constants -- #
|
|
210
|
-
|
|
211
|
-
CSV = 'csv'
|
|
212
|
-
JSON = 'json'
|
|
213
|
-
XML = 'xml'
|
|
214
|
-
YAML = 'yaml'
|
|
215
|
-
|
|
216
|
-
# -- Class Methods -- #
|
|
217
|
-
|
|
218
|
-
@classmethod
|
|
219
|
-
def aliases(cls) -> StrStrMap:
|
|
220
|
-
"""
|
|
221
|
-
Return a mapping of common aliases for each enum member.
|
|
222
|
-
|
|
223
|
-
Returns
|
|
224
|
-
-------
|
|
225
|
-
StrStrMap
|
|
226
|
-
A mapping of alias names to their corresponding enum member names.
|
|
227
|
-
"""
|
|
228
|
-
return {
|
|
229
|
-
# Common shorthand
|
|
230
|
-
'yml': 'yaml',
|
|
231
|
-
# MIME types
|
|
232
|
-
'text/csv': 'csv',
|
|
233
|
-
'application/json': 'json',
|
|
234
|
-
'application/xml': 'xml',
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
class HttpMethod(CoercibleStrEnum):
|
|
239
|
-
"""Supported HTTP verbs that accept JSON payloads."""
|
|
240
|
-
|
|
241
|
-
# -- Constants -- #
|
|
242
|
-
|
|
243
|
-
CONNECT = 'connect'
|
|
244
|
-
DELETE = 'delete'
|
|
245
|
-
GET = 'get'
|
|
246
|
-
HEAD = 'head'
|
|
247
|
-
OPTIONS = 'options'
|
|
248
|
-
PATCH = 'patch'
|
|
249
|
-
POST = 'post'
|
|
250
|
-
PUT = 'put'
|
|
251
|
-
TRACE = 'trace'
|
|
252
|
-
|
|
253
|
-
# -- Getters -- #
|
|
254
|
-
|
|
255
|
-
@property
|
|
256
|
-
def allows_body(self) -> bool:
|
|
257
|
-
"""
|
|
258
|
-
Whether the method typically allows a request body.
|
|
259
|
-
|
|
260
|
-
Notes
|
|
261
|
-
-----
|
|
262
|
-
- RFCs do not strictly forbid bodies on some other methods (e.g.,
|
|
263
|
-
``DELETE``), but many servers/clients do not expect them. We mark
|
|
264
|
-
``POST``, ``PUT``, and ``PATCH`` as True.
|
|
265
|
-
"""
|
|
266
|
-
return self in {HttpMethod.POST, HttpMethod.PUT, HttpMethod.PATCH}
|
|
267
|
-
|
|
268
|
-
|
|
269
202
|
class OperatorName(CoercibleStrEnum):
|
|
270
203
|
"""Supported comparison operators with helpers."""
|
|
271
204
|
|
|
@@ -373,42 +306,3 @@ _PIPELINE_ORDER_INDEX: dict[PipelineStep, int] = {
|
|
|
373
306
|
PipelineStep.SORT: 3,
|
|
374
307
|
PipelineStep.AGGREGATE: 4,
|
|
375
308
|
}
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
# SECTION: FUNCTIONS ======================================================== #
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
def coerce_data_connector_type(
|
|
382
|
-
connector: DataConnectorType | str,
|
|
383
|
-
) -> DataConnectorType:
|
|
384
|
-
"""
|
|
385
|
-
Normalize textual data connector values to :class:`DataConnectorType`.
|
|
386
|
-
|
|
387
|
-
This thin wrapper is kept for backward compatibility; prefer
|
|
388
|
-
:meth:`DataConnectorType.coerce` going forward.
|
|
389
|
-
"""
|
|
390
|
-
return DataConnectorType.coerce(connector)
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
def coerce_file_format(
|
|
394
|
-
file_format: FileFormat | str,
|
|
395
|
-
) -> FileFormat:
|
|
396
|
-
"""
|
|
397
|
-
Normalize textual file format values to :class:`FileFormat`.
|
|
398
|
-
|
|
399
|
-
This thin wrapper is kept for backward compatibility; prefer
|
|
400
|
-
:meth:`FileFormat.coerce` going forward.
|
|
401
|
-
"""
|
|
402
|
-
return FileFormat.coerce(file_format)
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
def coerce_http_method(
|
|
406
|
-
http_method: HttpMethod | str,
|
|
407
|
-
) -> HttpMethod:
|
|
408
|
-
"""
|
|
409
|
-
Normalize textual HTTP method values to :class:`HttpMethod`.
|
|
410
|
-
|
|
411
|
-
This thin wrapper is kept for backward compatibility; prefer
|
|
412
|
-
:meth:`HttpMethod.coerce` going forward.
|
|
413
|
-
"""
|
|
414
|
-
return HttpMethod.coerce(http_method)
|