etlplus 0.9.1__py3-none-any.whl → 0.9.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/README.md +37 -0
- etlplus/__init__.py +1 -26
- etlplus/api/README.md +51 -3
- etlplus/api/__init__.py +10 -0
- etlplus/api/config.py +39 -28
- etlplus/api/endpoint_client.py +3 -3
- etlplus/api/enums.py +51 -0
- etlplus/api/pagination/client.py +1 -1
- etlplus/api/rate_limiting/config.py +13 -1
- etlplus/api/rate_limiting/rate_limiter.py +8 -11
- etlplus/api/request_manager.py +11 -6
- etlplus/api/transport.py +14 -2
- etlplus/api/types.py +96 -6
- etlplus/{run_helpers.py → api/utils.py} +209 -153
- etlplus/cli/README.md +40 -0
- etlplus/cli/commands.py +76 -43
- etlplus/cli/constants.py +1 -1
- etlplus/cli/handlers.py +40 -12
- etlplus/cli/io.py +2 -2
- etlplus/cli/main.py +1 -1
- etlplus/cli/state.py +4 -7
- etlplus/database/README.md +48 -0
- etlplus/database/ddl.py +1 -1
- etlplus/database/engine.py +19 -3
- etlplus/database/orm.py +2 -0
- etlplus/database/schema.py +1 -1
- etlplus/enums.py +1 -157
- etlplus/file/README.md +105 -0
- etlplus/file/__init__.py +25 -0
- etlplus/file/_imports.py +141 -0
- etlplus/file/_io.py +160 -0
- etlplus/file/accdb.py +78 -0
- etlplus/file/arrow.py +78 -0
- etlplus/file/avro.py +176 -0
- etlplus/file/bson.py +77 -0
- etlplus/file/cbor.py +78 -0
- etlplus/file/cfg.py +79 -0
- etlplus/file/conf.py +80 -0
- etlplus/file/core.py +322 -0
- etlplus/file/csv.py +79 -0
- etlplus/file/dat.py +78 -0
- etlplus/file/dta.py +77 -0
- etlplus/file/duckdb.py +78 -0
- etlplus/file/enums.py +343 -0
- etlplus/file/feather.py +111 -0
- etlplus/file/fwf.py +77 -0
- etlplus/file/gz.py +123 -0
- etlplus/file/hbs.py +78 -0
- etlplus/file/hdf5.py +78 -0
- etlplus/file/ini.py +79 -0
- etlplus/file/ion.py +78 -0
- etlplus/file/jinja2.py +78 -0
- etlplus/file/json.py +98 -0
- etlplus/file/log.py +78 -0
- etlplus/file/mat.py +78 -0
- etlplus/file/mdb.py +78 -0
- etlplus/file/msgpack.py +78 -0
- etlplus/file/mustache.py +78 -0
- etlplus/file/nc.py +78 -0
- etlplus/file/ndjson.py +108 -0
- etlplus/file/numbers.py +75 -0
- etlplus/file/ods.py +79 -0
- etlplus/file/orc.py +111 -0
- etlplus/file/parquet.py +113 -0
- etlplus/file/pb.py +78 -0
- etlplus/file/pbf.py +77 -0
- etlplus/file/properties.py +78 -0
- etlplus/file/proto.py +77 -0
- etlplus/file/psv.py +79 -0
- etlplus/file/rda.py +78 -0
- etlplus/file/rds.py +78 -0
- etlplus/file/sas7bdat.py +78 -0
- etlplus/file/sav.py +77 -0
- etlplus/file/sqlite.py +78 -0
- etlplus/file/stub.py +84 -0
- etlplus/file/sylk.py +77 -0
- etlplus/file/tab.py +81 -0
- etlplus/file/toml.py +78 -0
- etlplus/file/tsv.py +80 -0
- etlplus/file/txt.py +102 -0
- etlplus/file/vm.py +78 -0
- etlplus/file/wks.py +77 -0
- etlplus/file/xls.py +88 -0
- etlplus/file/xlsm.py +79 -0
- etlplus/file/xlsx.py +99 -0
- etlplus/file/xml.py +185 -0
- etlplus/file/xpt.py +78 -0
- etlplus/file/yaml.py +95 -0
- etlplus/file/zip.py +175 -0
- etlplus/file/zsav.py +77 -0
- etlplus/ops/README.md +50 -0
- etlplus/ops/__init__.py +61 -0
- etlplus/{extract.py → ops/extract.py} +81 -99
- etlplus/{load.py → ops/load.py} +78 -101
- etlplus/{run.py → ops/run.py} +159 -127
- etlplus/{transform.py → ops/transform.py} +75 -68
- etlplus/{validation → ops}/utils.py +53 -17
- etlplus/{validate.py → ops/validate.py} +22 -12
- etlplus/templates/README.md +46 -0
- etlplus/types.py +5 -4
- etlplus/utils.py +136 -2
- etlplus/workflow/README.md +52 -0
- etlplus/{config → workflow}/__init__.py +10 -23
- etlplus/{config → workflow}/connector.py +58 -44
- etlplus/workflow/dag.py +105 -0
- etlplus/{config → workflow}/jobs.py +105 -32
- etlplus/{config → workflow}/pipeline.py +59 -51
- etlplus/{config → workflow}/profile.py +8 -5
- etlplus/workflow/types.py +115 -0
- {etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/METADATA +210 -17
- etlplus-0.9.2.dist-info/RECORD +134 -0
- {etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/WHEEL +1 -1
- etlplus/config/types.py +0 -204
- etlplus/config/utils.py +0 -120
- etlplus/file.py +0 -657
- etlplus/validation/__init__.py +0 -44
- etlplus-0.9.1.dist-info/RECORD +0 -65
- {etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/entry_points.txt +0 -0
- {etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/top_level.txt +0 -0
etlplus/cli/commands.py
CHANGED
|
@@ -36,7 +36,7 @@ from typing import cast
|
|
|
36
36
|
import typer
|
|
37
37
|
|
|
38
38
|
from .. import __version__
|
|
39
|
-
from ..
|
|
39
|
+
from ..file import FileFormat
|
|
40
40
|
from . import handlers
|
|
41
41
|
from .constants import CLI_DESCRIPTION
|
|
42
42
|
from .constants import CLI_EPILOG
|
|
@@ -61,6 +61,24 @@ __all__ = ['app']
|
|
|
61
61
|
|
|
62
62
|
# SECTION: TYPE ALIASES ==================================================== #
|
|
63
63
|
|
|
64
|
+
|
|
65
|
+
JobOption = Annotated[
|
|
66
|
+
str | None,
|
|
67
|
+
typer.Option(
|
|
68
|
+
'-j',
|
|
69
|
+
'--job',
|
|
70
|
+
help='Name of the job to run',
|
|
71
|
+
),
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
JobsOption = Annotated[
|
|
75
|
+
bool,
|
|
76
|
+
typer.Option(
|
|
77
|
+
'--jobs',
|
|
78
|
+
help='List available job names and exit',
|
|
79
|
+
),
|
|
80
|
+
]
|
|
81
|
+
|
|
64
82
|
OperationsOption = Annotated[
|
|
65
83
|
str,
|
|
66
84
|
typer.Option(
|
|
@@ -89,6 +107,23 @@ PipelineConfigOption = Annotated[
|
|
|
89
107
|
),
|
|
90
108
|
]
|
|
91
109
|
|
|
110
|
+
PipelineOption = Annotated[
|
|
111
|
+
str | None,
|
|
112
|
+
typer.Option(
|
|
113
|
+
'-p',
|
|
114
|
+
'--pipeline',
|
|
115
|
+
help='Name of the pipeline to run',
|
|
116
|
+
),
|
|
117
|
+
]
|
|
118
|
+
|
|
119
|
+
PipelinesOption = Annotated[
|
|
120
|
+
bool,
|
|
121
|
+
typer.Option(
|
|
122
|
+
'--pipelines',
|
|
123
|
+
help='List ETL pipelines',
|
|
124
|
+
),
|
|
125
|
+
]
|
|
126
|
+
|
|
92
127
|
RenderConfigOption = Annotated[
|
|
93
128
|
str | None,
|
|
94
129
|
typer.Option(
|
|
@@ -193,6 +228,22 @@ SourceTypeOption = Annotated[
|
|
|
193
228
|
),
|
|
194
229
|
]
|
|
195
230
|
|
|
231
|
+
SourcesOption = Annotated[
|
|
232
|
+
bool,
|
|
233
|
+
typer.Option(
|
|
234
|
+
'--sources',
|
|
235
|
+
help='List data sources',
|
|
236
|
+
),
|
|
237
|
+
]
|
|
238
|
+
|
|
239
|
+
SummaryOption = Annotated[
|
|
240
|
+
bool,
|
|
241
|
+
typer.Option(
|
|
242
|
+
'--summary',
|
|
243
|
+
help='Show pipeline summary (name, version, sources, targets, jobs)',
|
|
244
|
+
),
|
|
245
|
+
]
|
|
246
|
+
|
|
196
247
|
TargetArg = Annotated[
|
|
197
248
|
str,
|
|
198
249
|
typer.Argument(
|
|
@@ -227,6 +278,22 @@ TargetTypeOption = Annotated[
|
|
|
227
278
|
),
|
|
228
279
|
]
|
|
229
280
|
|
|
281
|
+
TargetsOption = Annotated[
|
|
282
|
+
bool,
|
|
283
|
+
typer.Option(
|
|
284
|
+
'--targets',
|
|
285
|
+
help='List data targets',
|
|
286
|
+
),
|
|
287
|
+
]
|
|
288
|
+
|
|
289
|
+
TransformsOption = Annotated[
|
|
290
|
+
bool,
|
|
291
|
+
typer.Option(
|
|
292
|
+
'--transforms',
|
|
293
|
+
help='List data transforms',
|
|
294
|
+
),
|
|
295
|
+
]
|
|
296
|
+
|
|
230
297
|
|
|
231
298
|
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
232
299
|
|
|
@@ -341,36 +408,12 @@ def _root(
|
|
|
341
408
|
def check_cmd(
|
|
342
409
|
ctx: typer.Context,
|
|
343
410
|
config: PipelineConfigOption,
|
|
344
|
-
jobs:
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
False,
|
|
351
|
-
'--pipelines',
|
|
352
|
-
help='List ETL pipelines',
|
|
353
|
-
),
|
|
354
|
-
sources: bool = typer.Option(
|
|
355
|
-
False,
|
|
356
|
-
'--sources',
|
|
357
|
-
help='List data sources',
|
|
358
|
-
),
|
|
359
|
-
summary: bool = typer.Option(
|
|
360
|
-
False,
|
|
361
|
-
'--summary',
|
|
362
|
-
help='Show pipeline summary (name, version, sources, targets, jobs)',
|
|
363
|
-
),
|
|
364
|
-
targets: bool = typer.Option(
|
|
365
|
-
False,
|
|
366
|
-
'--targets',
|
|
367
|
-
help='List data targets',
|
|
368
|
-
),
|
|
369
|
-
transforms: bool = typer.Option(
|
|
370
|
-
False,
|
|
371
|
-
'--transforms',
|
|
372
|
-
help='List data transforms',
|
|
373
|
-
),
|
|
411
|
+
jobs: JobsOption = False,
|
|
412
|
+
pipelines: PipelinesOption = False,
|
|
413
|
+
sources: SourcesOption = False,
|
|
414
|
+
summary: SummaryOption = False,
|
|
415
|
+
targets: TargetsOption = False,
|
|
416
|
+
transforms: TransformsOption = False,
|
|
374
417
|
) -> int:
|
|
375
418
|
"""
|
|
376
419
|
Inspect a pipeline configuration.
|
|
@@ -683,18 +726,8 @@ def render_cmd(
|
|
|
683
726
|
def run_cmd(
|
|
684
727
|
ctx: typer.Context,
|
|
685
728
|
config: PipelineConfigOption,
|
|
686
|
-
job:
|
|
687
|
-
|
|
688
|
-
'-j',
|
|
689
|
-
'--job',
|
|
690
|
-
help='Name of the job to run',
|
|
691
|
-
),
|
|
692
|
-
pipeline: str | None = typer.Option(
|
|
693
|
-
None,
|
|
694
|
-
'-p',
|
|
695
|
-
'--pipeline',
|
|
696
|
-
help='Name of the pipeline to run',
|
|
697
|
-
),
|
|
729
|
+
job: JobOption = None,
|
|
730
|
+
pipeline: PipelineOption = None,
|
|
698
731
|
) -> int:
|
|
699
732
|
"""
|
|
700
733
|
Execute an ETL job or pipeline from a YAML configuration.
|
etlplus/cli/constants.py
CHANGED
etlplus/cli/handlers.py
CHANGED
|
@@ -14,19 +14,20 @@ from typing import Any
|
|
|
14
14
|
from typing import Literal
|
|
15
15
|
from typing import cast
|
|
16
16
|
|
|
17
|
-
from ..config import PipelineConfig
|
|
18
|
-
from ..config import load_pipeline_config
|
|
19
17
|
from ..database import load_table_spec
|
|
20
18
|
from ..database import render_tables
|
|
21
|
-
from ..extract import extract
|
|
22
19
|
from ..file import File
|
|
23
|
-
from ..
|
|
24
|
-
from ..
|
|
25
|
-
from ..
|
|
20
|
+
from ..file import FileFormat
|
|
21
|
+
from ..ops import extract
|
|
22
|
+
from ..ops import load
|
|
23
|
+
from ..ops import run
|
|
24
|
+
from ..ops import transform
|
|
25
|
+
from ..ops import validate
|
|
26
|
+
from ..ops.validate import FieldRules
|
|
26
27
|
from ..types import JSONData
|
|
27
28
|
from ..types import TemplateKey
|
|
28
|
-
from ..
|
|
29
|
-
from ..
|
|
29
|
+
from ..workflow import PipelineConfig
|
|
30
|
+
from ..workflow import load_pipeline_config
|
|
30
31
|
from . import io as cli_io
|
|
31
32
|
|
|
32
33
|
# SECTION: EXPORTS ========================================================== #
|
|
@@ -120,9 +121,12 @@ def _check_sections(
|
|
|
120
121
|
if targets:
|
|
121
122
|
sections['targets'] = [tgt.name for tgt in cfg.targets]
|
|
122
123
|
if transforms:
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
124
|
+
if isinstance(cfg.transforms, Mapping):
|
|
125
|
+
sections['transforms'] = list(cfg.transforms)
|
|
126
|
+
else:
|
|
127
|
+
sections['transforms'] = [
|
|
128
|
+
getattr(trf, 'name', None) for trf in cfg.transforms
|
|
129
|
+
]
|
|
126
130
|
if not sections:
|
|
127
131
|
sections['jobs'] = _pipeline_summary(cfg)['jobs']
|
|
128
132
|
return sections
|
|
@@ -156,6 +160,29 @@ def _pipeline_summary(
|
|
|
156
160
|
}
|
|
157
161
|
|
|
158
162
|
|
|
163
|
+
def _write_file_payload(
|
|
164
|
+
payload: JSONData,
|
|
165
|
+
target: str,
|
|
166
|
+
*,
|
|
167
|
+
format_hint: str | None,
|
|
168
|
+
) -> None:
|
|
169
|
+
"""
|
|
170
|
+
Write a JSON-like payload to a file path using an optional format hint.
|
|
171
|
+
|
|
172
|
+
Parameters
|
|
173
|
+
----------
|
|
174
|
+
payload : JSONData
|
|
175
|
+
The structured data to write.
|
|
176
|
+
target : str
|
|
177
|
+
File path to write to.
|
|
178
|
+
format_hint : str | None
|
|
179
|
+
Optional format hint for :class:`FileFormat`.
|
|
180
|
+
"""
|
|
181
|
+
file_path = Path(target)
|
|
182
|
+
file_format = FileFormat.coerce(format_hint) if format_hint else None
|
|
183
|
+
File(file_path, file_format=file_format).write(payload)
|
|
184
|
+
|
|
185
|
+
|
|
159
186
|
# SECTION: FUNCTIONS ======================================================== #
|
|
160
187
|
|
|
161
188
|
|
|
@@ -569,8 +596,9 @@ def transform_handler(
|
|
|
569
596
|
|
|
570
597
|
data = transform(payload, cast(TransformOperations, operations_payload))
|
|
571
598
|
|
|
599
|
+
# TODO: Generalize to handle non-file targets.
|
|
572
600
|
if target and target != '-':
|
|
573
|
-
|
|
601
|
+
_write_file_payload(data, target, format_hint=target_format)
|
|
574
602
|
print(f'Data transformed and saved to {target}')
|
|
575
603
|
return 0
|
|
576
604
|
|
etlplus/cli/io.py
CHANGED
|
@@ -15,8 +15,8 @@ from pathlib import Path
|
|
|
15
15
|
from typing import Any
|
|
16
16
|
from typing import cast
|
|
17
17
|
|
|
18
|
-
from ..enums import FileFormat
|
|
19
18
|
from ..file import File
|
|
19
|
+
from ..file import FileFormat
|
|
20
20
|
from ..types import JSONData
|
|
21
21
|
from ..utils import print_json
|
|
22
22
|
|
|
@@ -331,6 +331,6 @@ def write_json_output(
|
|
|
331
331
|
"""
|
|
332
332
|
if not output_path or output_path == '-':
|
|
333
333
|
return False
|
|
334
|
-
File(Path(output_path), FileFormat.JSON).
|
|
334
|
+
File(Path(output_path), FileFormat.JSON).write(data)
|
|
335
335
|
print(f'{success_message} {output_path}')
|
|
336
336
|
return True
|
etlplus/cli/main.py
CHANGED
etlplus/cli/state.py
CHANGED
|
@@ -15,6 +15,7 @@ from typing import Final
|
|
|
15
15
|
|
|
16
16
|
import typer
|
|
17
17
|
|
|
18
|
+
from ..utils import normalize_str
|
|
18
19
|
from .constants import DATA_CONNECTORS
|
|
19
20
|
|
|
20
21
|
# SECTION: EXPORTS ========================================================== #
|
|
@@ -322,14 +323,10 @@ def validate_choice(
|
|
|
322
323
|
typer.BadParameter
|
|
323
324
|
If the input value is not in the set of valid choices.
|
|
324
325
|
"""
|
|
325
|
-
v = str(value or '')
|
|
326
|
-
normalized_choices = {c
|
|
326
|
+
v = normalize_str(str(value or ''))
|
|
327
|
+
normalized_choices = {normalize_str(c): c for c in choices}
|
|
327
328
|
if v in normalized_choices:
|
|
328
|
-
|
|
329
|
-
for choice in choices:
|
|
330
|
-
if choice.lower() == v:
|
|
331
|
-
return choice
|
|
332
|
-
return v
|
|
329
|
+
return normalized_choices[v]
|
|
333
330
|
allowed = ', '.join(sorted(choices))
|
|
334
331
|
raise typer.BadParameter(
|
|
335
332
|
f"Invalid {label} '{value}'. Choose from: {allowed}",
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# `etlplus.database` Subpackage
|
|
2
|
+
|
|
3
|
+
Documentation for the `etlplus.database` subpackage: database engine, schema, and ORM helpers.
|
|
4
|
+
|
|
5
|
+
- Provides database engine and connection management
|
|
6
|
+
- Supports schema definition and DDL generation
|
|
7
|
+
- Includes lightweight ORM utilities for tabular data
|
|
8
|
+
- Exposes type definitions for database objects
|
|
9
|
+
|
|
10
|
+
Back to project overview: see the top-level [README](../../README.md).
|
|
11
|
+
|
|
12
|
+
- [`etlplus.database` Subpackage](#etlplusdatabase-subpackage)
|
|
13
|
+
- [Database Engine and Connections](#database-engine-and-connections)
|
|
14
|
+
- [Schema and DDL Helpers](#schema-and-ddl-helpers)
|
|
15
|
+
- [ORM Utilities](#orm-utilities)
|
|
16
|
+
- [Example: Creating a Table](#example-creating-a-table)
|
|
17
|
+
- [See Also](#see-also)
|
|
18
|
+
|
|
19
|
+
## Database Engine and Connections
|
|
20
|
+
|
|
21
|
+
- Manage connections to supported databases
|
|
22
|
+
- Configure engines for different backends
|
|
23
|
+
|
|
24
|
+
## Schema and DDL Helpers
|
|
25
|
+
|
|
26
|
+
- Define table schemas and columns
|
|
27
|
+
- Generate DDL statements for supported databases
|
|
28
|
+
|
|
29
|
+
## ORM Utilities
|
|
30
|
+
|
|
31
|
+
- Map rows to Python objects
|
|
32
|
+
- Simple CRUD helpers for tabular data
|
|
33
|
+
|
|
34
|
+
## Example: Creating a Table
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
from etlplus.database import Schema, Engine
|
|
38
|
+
|
|
39
|
+
engine = Engine.connect("sqlite:///example.db")
|
|
40
|
+
schema = Schema.from_dict({"name": "users", "columns": [ ... ]})
|
|
41
|
+
engine.create_table(schema)
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## See Also
|
|
45
|
+
|
|
46
|
+
- Top-level CLI and library usage in the main [README](../../README.md)
|
|
47
|
+
- Schema helpers in [schema.py](schema.py)
|
|
48
|
+
- ORM utilities in [orm.py](orm.py)
|
etlplus/database/ddl.py
CHANGED
|
@@ -203,7 +203,7 @@ def load_table_spec(
|
|
|
203
203
|
raise ValueError('Spec must be .json, .yml, or .yaml')
|
|
204
204
|
|
|
205
205
|
try:
|
|
206
|
-
spec = File
|
|
206
|
+
spec = File(spec_path).read()
|
|
207
207
|
except ImportError as e:
|
|
208
208
|
if suffix in {'.yml', '.yaml'}:
|
|
209
209
|
raise RuntimeError(
|
etlplus/database/engine.py
CHANGED
|
@@ -113,7 +113,7 @@ def load_database_url_from_config(
|
|
|
113
113
|
ValueError
|
|
114
114
|
If no connection string/URL/DSN is found for the specified entry.
|
|
115
115
|
"""
|
|
116
|
-
cfg = File
|
|
116
|
+
cfg = File(Path(path)).read()
|
|
117
117
|
if not isinstance(cfg, Mapping):
|
|
118
118
|
raise TypeError('Database config must be a mapping')
|
|
119
119
|
|
|
@@ -136,9 +136,25 @@ def load_database_url_from_config(
|
|
|
136
136
|
return url
|
|
137
137
|
|
|
138
138
|
|
|
139
|
-
def make_engine(
|
|
140
|
-
|
|
139
|
+
def make_engine(
|
|
140
|
+
url: str | None = None,
|
|
141
|
+
**engine_kwargs: Any,
|
|
142
|
+
) -> Engine:
|
|
143
|
+
"""
|
|
144
|
+
Create a SQLAlchemy Engine, defaulting to env config if no URL given.
|
|
145
|
+
|
|
146
|
+
Parameters
|
|
147
|
+
----------
|
|
148
|
+
url : str | None, optional
|
|
149
|
+
Database URL/DSN string. When omitted, ``DATABASE_URL`` is used.
|
|
150
|
+
**engine_kwargs : Any
|
|
151
|
+
Extra keyword arguments forwarded to ``create_engine``.
|
|
141
152
|
|
|
153
|
+
Returns
|
|
154
|
+
-------
|
|
155
|
+
Engine
|
|
156
|
+
Configured SQLAlchemy engine instance.
|
|
157
|
+
"""
|
|
142
158
|
resolved_url = url or DATABASE_URL
|
|
143
159
|
return create_engine(resolved_url, pool_pre_ping=True, **engine_kwargs)
|
|
144
160
|
|
etlplus/database/orm.py
CHANGED
|
@@ -201,12 +201,14 @@ def build_models(
|
|
|
201
201
|
) -> ModelRegistry:
|
|
202
202
|
"""
|
|
203
203
|
Build SQLAlchemy ORM models from table specifications.
|
|
204
|
+
|
|
204
205
|
Parameters
|
|
205
206
|
----------
|
|
206
207
|
specs : list[TableSpec]
|
|
207
208
|
List of table specifications.
|
|
208
209
|
base : type[DeclarativeBase], optional
|
|
209
210
|
Base class for the ORM models (default: :class:`Base`).
|
|
211
|
+
|
|
210
212
|
Returns
|
|
211
213
|
-------
|
|
212
214
|
ModelRegistry
|
etlplus/database/schema.py
CHANGED
etlplus/enums.py
CHANGED
|
@@ -19,16 +19,12 @@ from .types import StrStrMap
|
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
__all__ = [
|
|
22
|
+
# Enums
|
|
22
23
|
'AggregateName',
|
|
23
24
|
'CoercibleStrEnum',
|
|
24
25
|
'DataConnectorType',
|
|
25
|
-
'FileFormat',
|
|
26
|
-
'HttpMethod',
|
|
27
26
|
'OperatorName',
|
|
28
27
|
'PipelineStep',
|
|
29
|
-
'coerce_data_connector_type',
|
|
30
|
-
'coerce_file_format',
|
|
31
|
-
'coerce_http_method',
|
|
32
28
|
]
|
|
33
29
|
|
|
34
30
|
|
|
@@ -203,119 +199,6 @@ class DataConnectorType(CoercibleStrEnum):
|
|
|
203
199
|
}
|
|
204
200
|
|
|
205
201
|
|
|
206
|
-
class FileFormat(CoercibleStrEnum):
|
|
207
|
-
"""Supported file formats for extraction."""
|
|
208
|
-
|
|
209
|
-
# -- Constants -- #
|
|
210
|
-
|
|
211
|
-
AVRO = 'avro'
|
|
212
|
-
CSV = 'csv'
|
|
213
|
-
FEATHER = 'feather'
|
|
214
|
-
GZ = 'gz'
|
|
215
|
-
JSON = 'json'
|
|
216
|
-
NDJSON = 'ndjson'
|
|
217
|
-
ORC = 'orc'
|
|
218
|
-
PARQUET = 'parquet'
|
|
219
|
-
TSV = 'tsv'
|
|
220
|
-
TXT = 'txt'
|
|
221
|
-
XLS = 'xls'
|
|
222
|
-
XLSX = 'xlsx'
|
|
223
|
-
ZIP = 'zip'
|
|
224
|
-
XML = 'xml'
|
|
225
|
-
YAML = 'yaml'
|
|
226
|
-
|
|
227
|
-
# -- Class Methods -- #
|
|
228
|
-
|
|
229
|
-
@classmethod
|
|
230
|
-
def aliases(cls) -> StrStrMap:
|
|
231
|
-
"""
|
|
232
|
-
Return a mapping of common aliases for each enum member.
|
|
233
|
-
|
|
234
|
-
Returns
|
|
235
|
-
-------
|
|
236
|
-
StrStrMap
|
|
237
|
-
A mapping of alias names to their corresponding enum member names.
|
|
238
|
-
"""
|
|
239
|
-
return {
|
|
240
|
-
# Common shorthand
|
|
241
|
-
'parq': 'parquet',
|
|
242
|
-
'yml': 'yaml',
|
|
243
|
-
# File extensions
|
|
244
|
-
'.avro': 'avro',
|
|
245
|
-
'.csv': 'csv',
|
|
246
|
-
'.feather': 'feather',
|
|
247
|
-
'.gz': 'gz',
|
|
248
|
-
'.json': 'json',
|
|
249
|
-
'.jsonl': 'ndjson',
|
|
250
|
-
'.ndjson': 'ndjson',
|
|
251
|
-
'.orc': 'orc',
|
|
252
|
-
'.parquet': 'parquet',
|
|
253
|
-
'.pq': 'parquet',
|
|
254
|
-
'.tsv': 'tsv',
|
|
255
|
-
'.txt': 'txt',
|
|
256
|
-
'.xls': 'xls',
|
|
257
|
-
'.xlsx': 'xlsx',
|
|
258
|
-
'.zip': 'zip',
|
|
259
|
-
'.xml': 'xml',
|
|
260
|
-
'.yaml': 'yaml',
|
|
261
|
-
'.yml': 'yaml',
|
|
262
|
-
# MIME types
|
|
263
|
-
'application/avro': 'avro',
|
|
264
|
-
'application/feather': 'feather',
|
|
265
|
-
'application/gzip': 'gz',
|
|
266
|
-
'application/json': 'json',
|
|
267
|
-
'application/jsonlines': 'ndjson',
|
|
268
|
-
'application/ndjson': 'ndjson',
|
|
269
|
-
'application/orc': 'orc',
|
|
270
|
-
'application/vnd.apache.arrow.file': 'feather',
|
|
271
|
-
'application/vnd.apache.orc': 'orc',
|
|
272
|
-
'application/vnd.ms-excel': 'xls',
|
|
273
|
-
(
|
|
274
|
-
'application/vnd.openxmlformats-'
|
|
275
|
-
'officedocument.spreadsheetml.sheet'
|
|
276
|
-
): 'xlsx',
|
|
277
|
-
'application/x-avro': 'avro',
|
|
278
|
-
'application/x-ndjson': 'ndjson',
|
|
279
|
-
'application/x-parquet': 'parquet',
|
|
280
|
-
'application/xml': 'xml',
|
|
281
|
-
'application/zip': 'zip',
|
|
282
|
-
'text/csv': 'csv',
|
|
283
|
-
'text/plain': 'txt',
|
|
284
|
-
'text/tab-separated-values': 'tsv',
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
class HttpMethod(CoercibleStrEnum):
|
|
289
|
-
"""Supported HTTP verbs that accept JSON payloads."""
|
|
290
|
-
|
|
291
|
-
# -- Constants -- #
|
|
292
|
-
|
|
293
|
-
CONNECT = 'connect'
|
|
294
|
-
DELETE = 'delete'
|
|
295
|
-
GET = 'get'
|
|
296
|
-
HEAD = 'head'
|
|
297
|
-
OPTIONS = 'options'
|
|
298
|
-
PATCH = 'patch'
|
|
299
|
-
POST = 'post'
|
|
300
|
-
PUT = 'put'
|
|
301
|
-
TRACE = 'trace'
|
|
302
|
-
|
|
303
|
-
# -- Getters -- #
|
|
304
|
-
|
|
305
|
-
@property
|
|
306
|
-
def allows_body(self) -> bool:
|
|
307
|
-
"""
|
|
308
|
-
Whether the method typically allows a request body.
|
|
309
|
-
|
|
310
|
-
Notes
|
|
311
|
-
-----
|
|
312
|
-
- RFCs do not strictly forbid bodies on some other methods (e.g.,
|
|
313
|
-
``DELETE``), but many servers/clients do not expect them. We mark
|
|
314
|
-
``POST``, ``PUT``, and ``PATCH`` as True.
|
|
315
|
-
"""
|
|
316
|
-
return self in {HttpMethod.POST, HttpMethod.PUT, HttpMethod.PATCH}
|
|
317
|
-
|
|
318
|
-
|
|
319
202
|
class OperatorName(CoercibleStrEnum):
|
|
320
203
|
"""Supported comparison operators with helpers."""
|
|
321
204
|
|
|
@@ -423,42 +306,3 @@ _PIPELINE_ORDER_INDEX: dict[PipelineStep, int] = {
|
|
|
423
306
|
PipelineStep.SORT: 3,
|
|
424
307
|
PipelineStep.AGGREGATE: 4,
|
|
425
308
|
}
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
# SECTION: FUNCTIONS ======================================================== #
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
def coerce_data_connector_type(
|
|
432
|
-
connector: DataConnectorType | str,
|
|
433
|
-
) -> DataConnectorType:
|
|
434
|
-
"""
|
|
435
|
-
Normalize textual data connector values to :class:`DataConnectorType`.
|
|
436
|
-
|
|
437
|
-
This thin wrapper is kept for backward compatibility; prefer
|
|
438
|
-
:meth:`DataConnectorType.coerce` going forward.
|
|
439
|
-
"""
|
|
440
|
-
return DataConnectorType.coerce(connector)
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
def coerce_file_format(
|
|
444
|
-
file_format: FileFormat | str,
|
|
445
|
-
) -> FileFormat:
|
|
446
|
-
"""
|
|
447
|
-
Normalize textual file format values to :class:`FileFormat`.
|
|
448
|
-
|
|
449
|
-
This thin wrapper is kept for backward compatibility; prefer
|
|
450
|
-
:meth:`FileFormat.coerce` going forward.
|
|
451
|
-
"""
|
|
452
|
-
return FileFormat.coerce(file_format)
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
def coerce_http_method(
|
|
456
|
-
http_method: HttpMethod | str,
|
|
457
|
-
) -> HttpMethod:
|
|
458
|
-
"""
|
|
459
|
-
Normalize textual HTTP method values to :class:`HttpMethod`.
|
|
460
|
-
|
|
461
|
-
This thin wrapper is kept for backward compatibility; prefer
|
|
462
|
-
:meth:`HttpMethod.coerce` going forward.
|
|
463
|
-
"""
|
|
464
|
-
return HttpMethod.coerce(http_method)
|