etlplus 0.14.3__py3-none-any.whl → 0.15.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/README.md +4 -4
- etlplus/api/README.md +33 -2
- etlplus/api/config.py +3 -8
- etlplus/api/types.py +89 -0
- etlplus/api/utils.py +5 -1
- etlplus/cli/README.md +2 -2
- etlplus/cli/commands.py +75 -42
- etlplus/cli/handlers.py +32 -14
- etlplus/cli/main.py +1 -1
- etlplus/cli/state.py +4 -7
- etlplus/database/README.md +2 -2
- etlplus/database/engine.py +18 -2
- etlplus/database/orm.py +2 -0
- etlplus/file/README.md +2 -2
- etlplus/file/_io.py +39 -0
- etlplus/file/json.py +2 -14
- etlplus/file/yaml.py +2 -14
- etlplus/ops/run.py +14 -9
- etlplus/ops/utils.py +4 -33
- etlplus/ops/validate.py +3 -3
- etlplus/templates/README.md +2 -2
- etlplus/types.py +3 -2
- etlplus/utils.py +136 -2
- etlplus/{config → workflow}/README.md +6 -6
- etlplus/{config → workflow}/__init__.py +10 -23
- etlplus/{config → workflow}/connector.py +58 -44
- etlplus/{dag.py → workflow/dag.py} +6 -4
- etlplus/{config → workflow}/jobs.py +101 -38
- etlplus/{config → workflow}/pipeline.py +57 -49
- etlplus/{config → workflow}/profile.py +8 -5
- etlplus/workflow/types.py +115 -0
- {etlplus-0.14.3.dist-info → etlplus-0.15.2.dist-info}/METADATA +4 -4
- {etlplus-0.14.3.dist-info → etlplus-0.15.2.dist-info}/RECORD +37 -38
- {etlplus-0.14.3.dist-info → etlplus-0.15.2.dist-info}/WHEEL +1 -1
- etlplus/config/types.py +0 -204
- etlplus/config/utils.py +0 -120
- {etlplus-0.14.3.dist-info → etlplus-0.15.2.dist-info}/entry_points.txt +0 -0
- {etlplus-0.14.3.dist-info → etlplus-0.15.2.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.14.3.dist-info → etlplus-0.15.2.dist-info}/top_level.txt +0 -0
etlplus/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# etlplus
|
|
1
|
+
# `etlplus` Package
|
|
2
2
|
|
|
3
3
|
The `etlplus` package provides a unified Python API and CLI for ETL operations: extraction,
|
|
4
4
|
validation, transformation, and loading of data from files, APIs, and databases.
|
|
@@ -13,12 +13,12 @@ Back to project overview: see the top-level [README](../README.md).
|
|
|
13
13
|
|
|
14
14
|
- [etlplus.api](api/README.md): Lightweight HTTP client and paginated REST helpers
|
|
15
15
|
- [etlplus.file](file/README.md): Unified file format support and helpers
|
|
16
|
-
- [etlplus.
|
|
17
|
-
profiles
|
|
18
|
-
- [etlplus.cli](cli/README.md): Command-line interface for ETLPlus workflows
|
|
16
|
+
- [etlplus.cli](cli/README.md): Command-line interface definitions for `etlplus`
|
|
19
17
|
- [etlplus.database](database/README.md): Database engine, schema, and ORM helpers
|
|
20
18
|
- [etlplus.templates](templates/README.md): SQL and DDL template helpers
|
|
21
19
|
- [etlplus.validation](validation/README.md): Data validation utilities and helpers
|
|
20
|
+
- [etlplus.workflow](etlplus/workflow/README.md): Helpers for data connectors, pipelines, jobs, and
|
|
21
|
+
profiles
|
|
22
22
|
|
|
23
23
|
## Quickstart
|
|
24
24
|
|
etlplus/api/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# etlplus.api
|
|
1
|
+
# `etlplus.api` Subpackage
|
|
2
2
|
|
|
3
3
|
Documentation for the `etlplus.api` subpackage: a lightweight HTTP client and helpers for paginated
|
|
4
4
|
REST endpoints.
|
|
@@ -12,7 +12,7 @@ REST endpoints.
|
|
|
12
12
|
|
|
13
13
|
Back to project overview: see the top-level [README](../../README.md).
|
|
14
14
|
|
|
15
|
-
- [etlplus.api
|
|
15
|
+
- [`etlplus.api` Subpackage](#etlplusapi-subpackage)
|
|
16
16
|
- [Installation](#installation)
|
|
17
17
|
- [Quickstart](#quickstart)
|
|
18
18
|
- [Overriding Rate Limits Per Call](#overriding-rate-limits-per-call)
|
|
@@ -22,6 +22,7 @@ Back to project overview: see the top-level [README](../../README.md).
|
|
|
22
22
|
- [Authentication](#authentication)
|
|
23
23
|
- [Errors and Rate Limiting](#errors-and-rate-limiting)
|
|
24
24
|
- [Types and Transport](#types-and-transport)
|
|
25
|
+
- [Config Schemas](#config-schemas)
|
|
25
26
|
- [Supporting Modules](#supporting-modules)
|
|
26
27
|
- [Minimal Contract](#minimal-contract)
|
|
27
28
|
- [See also](#see-also)
|
|
@@ -225,6 +226,36 @@ providers can fall back to their own defaults. If you already possess a static t
|
|
|
225
226
|
`etlplus/api/request_manager.py` wraps `requests` sessions plus retry orchestration. Advanced
|
|
226
227
|
users may consult those modules to adapt behavior.
|
|
227
228
|
|
|
229
|
+
## Config Schemas
|
|
230
|
+
|
|
231
|
+
`etlplus.api.types` defines TypedDict-based configuration shapes for API profiles and endpoints.
|
|
232
|
+
Runtime parsing remains permissive in `etlplus.api.config`, but these types improve IDE
|
|
233
|
+
autocomplete and static analysis.
|
|
234
|
+
|
|
235
|
+
Exported types:
|
|
236
|
+
|
|
237
|
+
- `ApiConfigMap`: top-level API config shape
|
|
238
|
+
- `ApiProfileConfigMap`: per-profile API config shape
|
|
239
|
+
- `ApiProfileDefaultsMap`: defaults block within a profile
|
|
240
|
+
- `EndpointMap`: endpoint config shape
|
|
241
|
+
|
|
242
|
+
Example:
|
|
243
|
+
|
|
244
|
+
```python
|
|
245
|
+
from etlplus.api import ApiConfigMap
|
|
246
|
+
|
|
247
|
+
api_cfg: ApiConfigMap = {
|
|
248
|
+
"base_url": "https://example.test",
|
|
249
|
+
"headers": {"Authorization": "Bearer token"},
|
|
250
|
+
"endpoints": {
|
|
251
|
+
"users": {
|
|
252
|
+
"path": "/users",
|
|
253
|
+
"method": "GET",
|
|
254
|
+
},
|
|
255
|
+
},
|
|
256
|
+
}
|
|
257
|
+
```
|
|
258
|
+
|
|
228
259
|
## Supporting Modules
|
|
229
260
|
|
|
230
261
|
- `etlplus.api.types` collects friendly aliases such as `Headers`, `Params`, `Url`, and
|
etlplus/api/config.py
CHANGED
|
@@ -3,11 +3,6 @@
|
|
|
3
3
|
|
|
4
4
|
Configuration dataclasses for REST API services, profiles, and endpoints.
|
|
5
5
|
|
|
6
|
-
These models used to live under :mod:`etlplus.config`, but they belong in the
|
|
7
|
-
API layer because they compose runtime types such as
|
|
8
|
-
:class:`etlplus.api.EndpointClient`, :class:`etlplus.api.PaginationConfig`, and
|
|
9
|
-
:class:`etlplus.api.RateLimitConfig`.
|
|
10
|
-
|
|
11
6
|
Notes
|
|
12
7
|
-----
|
|
13
8
|
- TypedDict references remain editor hints only; :meth:`from_obj` accepts
|
|
@@ -41,9 +36,9 @@ from .pagination import PaginationConfig
|
|
|
41
36
|
from .rate_limiting import RateLimitConfig
|
|
42
37
|
|
|
43
38
|
if TYPE_CHECKING:
|
|
44
|
-
from
|
|
45
|
-
from
|
|
46
|
-
from
|
|
39
|
+
from .types import ApiConfigMap
|
|
40
|
+
from .types import ApiProfileConfigMap
|
|
41
|
+
from .types import EndpointMap
|
|
47
42
|
|
|
48
43
|
|
|
49
44
|
# SECTION: EXPORTS ========================================================== #
|
etlplus/api/types.py
CHANGED
|
@@ -20,9 +20,11 @@ Examples
|
|
|
20
20
|
from __future__ import annotations
|
|
21
21
|
|
|
22
22
|
from collections.abc import Callable
|
|
23
|
+
from collections.abc import Mapping
|
|
23
24
|
from dataclasses import dataclass
|
|
24
25
|
from typing import Any
|
|
25
26
|
from typing import Self
|
|
27
|
+
from typing import TypedDict
|
|
26
28
|
from typing import cast
|
|
27
29
|
|
|
28
30
|
from ..types import JSONData
|
|
@@ -40,6 +42,11 @@ __all__ = [
|
|
|
40
42
|
'Headers',
|
|
41
43
|
'Params',
|
|
42
44
|
'Url',
|
|
45
|
+
# Typed Dicts
|
|
46
|
+
'ApiConfigMap',
|
|
47
|
+
'ApiProfileConfigMap',
|
|
48
|
+
'ApiProfileDefaultsMap',
|
|
49
|
+
'EndpointMap',
|
|
43
50
|
]
|
|
44
51
|
|
|
45
52
|
|
|
@@ -49,6 +56,88 @@ __all__ = [
|
|
|
49
56
|
_UNSET = object()
|
|
50
57
|
|
|
51
58
|
|
|
59
|
+
# SECTION: TYPED DICTS ====================================================== #
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class ApiConfigMap(TypedDict, total=False):
|
|
63
|
+
"""
|
|
64
|
+
Top-level API config shape parsed by ApiConfig.from_obj.
|
|
65
|
+
|
|
66
|
+
Either provide a ``base_url`` with optional ``headers`` and ``endpoints``,
|
|
67
|
+
or provide ``profiles`` with at least one profile having a ``base_url``.
|
|
68
|
+
|
|
69
|
+
See Also
|
|
70
|
+
--------
|
|
71
|
+
- :class:`etlplus.api.config.ApiConfig`
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
base_url: str
|
|
75
|
+
headers: StrAnyMap
|
|
76
|
+
endpoints: Mapping[str, EndpointMap | str]
|
|
77
|
+
profiles: Mapping[str, ApiProfileConfigMap]
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class ApiProfileConfigMap(TypedDict, total=False):
|
|
81
|
+
"""
|
|
82
|
+
Shape accepted for a profile entry under ApiConfigMap.profiles.
|
|
83
|
+
|
|
84
|
+
Notes
|
|
85
|
+
-----
|
|
86
|
+
``base_url`` is required at runtime when profiles are provided.
|
|
87
|
+
|
|
88
|
+
See Also
|
|
89
|
+
--------
|
|
90
|
+
- :class:`etlplus.api.config.ApiProfileConfig`
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
base_url: str
|
|
94
|
+
headers: StrAnyMap
|
|
95
|
+
base_path: str
|
|
96
|
+
auth: StrAnyMap
|
|
97
|
+
defaults: ApiProfileDefaultsMap
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class ApiProfileDefaultsMap(TypedDict, total=False):
|
|
101
|
+
"""
|
|
102
|
+
Defaults block available under a profile (all keys optional).
|
|
103
|
+
|
|
104
|
+
Notes
|
|
105
|
+
-----
|
|
106
|
+
Runtime expects header values to be str; typing remains permissive.
|
|
107
|
+
|
|
108
|
+
See Also
|
|
109
|
+
--------
|
|
110
|
+
- :class:`etlplus.api.config.ApiProfileConfig`
|
|
111
|
+
- :class:`etlplus.api.pagination.PaginationConfig`
|
|
112
|
+
- :class:`etlplus.api.rate_limiting.RateLimitConfig`
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
headers: StrAnyMap
|
|
116
|
+
pagination: Any
|
|
117
|
+
rate_limit: Any
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class EndpointMap(TypedDict, total=False):
|
|
121
|
+
"""
|
|
122
|
+
Shape accepted by EndpointConfig.from_obj.
|
|
123
|
+
|
|
124
|
+
One of ``path`` or ``url`` should be provided.
|
|
125
|
+
|
|
126
|
+
See Also
|
|
127
|
+
--------
|
|
128
|
+
- :class:`etlplus.api.config.EndpointConfig`
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
path: str
|
|
132
|
+
url: str
|
|
133
|
+
method: str
|
|
134
|
+
path_params: StrAnyMap
|
|
135
|
+
query_params: StrAnyMap
|
|
136
|
+
body: Any
|
|
137
|
+
pagination: Any
|
|
138
|
+
rate_limit: Any
|
|
139
|
+
|
|
140
|
+
|
|
52
141
|
# SECTION: DATA CLASSES ===================================================== #
|
|
53
142
|
|
|
54
143
|
|
etlplus/api/utils.py
CHANGED
|
@@ -892,4 +892,8 @@ def resolve_request(
|
|
|
892
892
|
'Session object must supply a callable '
|
|
893
893
|
f'"{http_method.value}" method',
|
|
894
894
|
)
|
|
895
|
-
|
|
895
|
+
typed_request_callable = cast(
|
|
896
|
+
Callable[..., requests.Response],
|
|
897
|
+
request_callable,
|
|
898
|
+
)
|
|
899
|
+
return typed_request_callable, request_timeout, http_method
|
etlplus/cli/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# etlplus.cli
|
|
1
|
+
# `etlplus.cli` Subpackage
|
|
2
2
|
|
|
3
3
|
Documentation for the `etlplus.cli` subpackage: command-line interface for ETLPlus workflows.
|
|
4
4
|
|
|
@@ -9,7 +9,7 @@ Documentation for the `etlplus.cli` subpackage: command-line interface for ETLPl
|
|
|
9
9
|
|
|
10
10
|
Back to project overview: see the top-level [README](../../README.md).
|
|
11
11
|
|
|
12
|
-
- [etlplus.cli
|
|
12
|
+
- [`etlplus.cli` Subpackage](#etlpluscli-subpackage)
|
|
13
13
|
- [Available Commands](#available-commands)
|
|
14
14
|
- [Command Options](#command-options)
|
|
15
15
|
- [Example: Running a Pipeline](#example-running-a-pipeline)
|
etlplus/cli/commands.py
CHANGED
|
@@ -61,6 +61,24 @@ __all__ = ['app']
|
|
|
61
61
|
|
|
62
62
|
# SECTION: TYPE ALIASES ==================================================== #
|
|
63
63
|
|
|
64
|
+
|
|
65
|
+
JobOption = Annotated[
|
|
66
|
+
str | None,
|
|
67
|
+
typer.Option(
|
|
68
|
+
'-j',
|
|
69
|
+
'--job',
|
|
70
|
+
help='Name of the job to run',
|
|
71
|
+
),
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
JobsOption = Annotated[
|
|
75
|
+
bool,
|
|
76
|
+
typer.Option(
|
|
77
|
+
'--jobs',
|
|
78
|
+
help='List available job names and exit',
|
|
79
|
+
),
|
|
80
|
+
]
|
|
81
|
+
|
|
64
82
|
OperationsOption = Annotated[
|
|
65
83
|
str,
|
|
66
84
|
typer.Option(
|
|
@@ -89,6 +107,23 @@ PipelineConfigOption = Annotated[
|
|
|
89
107
|
),
|
|
90
108
|
]
|
|
91
109
|
|
|
110
|
+
PipelineOption = Annotated[
|
|
111
|
+
str | None,
|
|
112
|
+
typer.Option(
|
|
113
|
+
'-p',
|
|
114
|
+
'--pipeline',
|
|
115
|
+
help='Name of the pipeline to run',
|
|
116
|
+
),
|
|
117
|
+
]
|
|
118
|
+
|
|
119
|
+
PipelinesOption = Annotated[
|
|
120
|
+
bool,
|
|
121
|
+
typer.Option(
|
|
122
|
+
'--pipelines',
|
|
123
|
+
help='List ETL pipelines',
|
|
124
|
+
),
|
|
125
|
+
]
|
|
126
|
+
|
|
92
127
|
RenderConfigOption = Annotated[
|
|
93
128
|
str | None,
|
|
94
129
|
typer.Option(
|
|
@@ -193,6 +228,22 @@ SourceTypeOption = Annotated[
|
|
|
193
228
|
),
|
|
194
229
|
]
|
|
195
230
|
|
|
231
|
+
SourcesOption = Annotated[
|
|
232
|
+
bool,
|
|
233
|
+
typer.Option(
|
|
234
|
+
'--sources',
|
|
235
|
+
help='List data sources',
|
|
236
|
+
),
|
|
237
|
+
]
|
|
238
|
+
|
|
239
|
+
SummaryOption = Annotated[
|
|
240
|
+
bool,
|
|
241
|
+
typer.Option(
|
|
242
|
+
'--summary',
|
|
243
|
+
help='Show pipeline summary (name, version, sources, targets, jobs)',
|
|
244
|
+
),
|
|
245
|
+
]
|
|
246
|
+
|
|
196
247
|
TargetArg = Annotated[
|
|
197
248
|
str,
|
|
198
249
|
typer.Argument(
|
|
@@ -227,6 +278,22 @@ TargetTypeOption = Annotated[
|
|
|
227
278
|
),
|
|
228
279
|
]
|
|
229
280
|
|
|
281
|
+
TargetsOption = Annotated[
|
|
282
|
+
bool,
|
|
283
|
+
typer.Option(
|
|
284
|
+
'--targets',
|
|
285
|
+
help='List data targets',
|
|
286
|
+
),
|
|
287
|
+
]
|
|
288
|
+
|
|
289
|
+
TransformsOption = Annotated[
|
|
290
|
+
bool,
|
|
291
|
+
typer.Option(
|
|
292
|
+
'--transforms',
|
|
293
|
+
help='List data transforms',
|
|
294
|
+
),
|
|
295
|
+
]
|
|
296
|
+
|
|
230
297
|
|
|
231
298
|
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
232
299
|
|
|
@@ -341,36 +408,12 @@ def _root(
|
|
|
341
408
|
def check_cmd(
|
|
342
409
|
ctx: typer.Context,
|
|
343
410
|
config: PipelineConfigOption,
|
|
344
|
-
jobs:
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
False,
|
|
351
|
-
'--pipelines',
|
|
352
|
-
help='List ETL pipelines',
|
|
353
|
-
),
|
|
354
|
-
sources: bool = typer.Option(
|
|
355
|
-
False,
|
|
356
|
-
'--sources',
|
|
357
|
-
help='List data sources',
|
|
358
|
-
),
|
|
359
|
-
summary: bool = typer.Option(
|
|
360
|
-
False,
|
|
361
|
-
'--summary',
|
|
362
|
-
help='Show pipeline summary (name, version, sources, targets, jobs)',
|
|
363
|
-
),
|
|
364
|
-
targets: bool = typer.Option(
|
|
365
|
-
False,
|
|
366
|
-
'--targets',
|
|
367
|
-
help='List data targets',
|
|
368
|
-
),
|
|
369
|
-
transforms: bool = typer.Option(
|
|
370
|
-
False,
|
|
371
|
-
'--transforms',
|
|
372
|
-
help='List data transforms',
|
|
373
|
-
),
|
|
411
|
+
jobs: JobsOption = False,
|
|
412
|
+
pipelines: PipelinesOption = False,
|
|
413
|
+
sources: SourcesOption = False,
|
|
414
|
+
summary: SummaryOption = False,
|
|
415
|
+
targets: TargetsOption = False,
|
|
416
|
+
transforms: TransformsOption = False,
|
|
374
417
|
) -> int:
|
|
375
418
|
"""
|
|
376
419
|
Inspect a pipeline configuration.
|
|
@@ -683,18 +726,8 @@ def render_cmd(
|
|
|
683
726
|
def run_cmd(
|
|
684
727
|
ctx: typer.Context,
|
|
685
728
|
config: PipelineConfigOption,
|
|
686
|
-
job:
|
|
687
|
-
|
|
688
|
-
'-j',
|
|
689
|
-
'--job',
|
|
690
|
-
help='Name of the job to run',
|
|
691
|
-
),
|
|
692
|
-
pipeline: str | None = typer.Option(
|
|
693
|
-
None,
|
|
694
|
-
'-p',
|
|
695
|
-
'--pipeline',
|
|
696
|
-
help='Name of the pipeline to run',
|
|
697
|
-
),
|
|
729
|
+
job: JobOption = None,
|
|
730
|
+
pipeline: PipelineOption = None,
|
|
698
731
|
) -> int:
|
|
699
732
|
"""
|
|
700
733
|
Execute an ETL job or pipeline from a YAML configuration.
|
etlplus/cli/handlers.py
CHANGED
|
@@ -14,8 +14,6 @@ from typing import Any
|
|
|
14
14
|
from typing import Literal
|
|
15
15
|
from typing import cast
|
|
16
16
|
|
|
17
|
-
from ..config import PipelineConfig
|
|
18
|
-
from ..config import load_pipeline_config
|
|
19
17
|
from ..database import load_table_spec
|
|
20
18
|
from ..database import render_tables
|
|
21
19
|
from ..file import File
|
|
@@ -28,6 +26,8 @@ from ..ops import validate
|
|
|
28
26
|
from ..ops.validate import FieldRules
|
|
29
27
|
from ..types import JSONData
|
|
30
28
|
from ..types import TemplateKey
|
|
29
|
+
from ..workflow import PipelineConfig
|
|
30
|
+
from ..workflow import load_pipeline_config
|
|
31
31
|
from . import io as cli_io
|
|
32
32
|
|
|
33
33
|
# SECTION: EXPORTS ========================================================== #
|
|
@@ -121,9 +121,12 @@ def _check_sections(
|
|
|
121
121
|
if targets:
|
|
122
122
|
sections['targets'] = [tgt.name for tgt in cfg.targets]
|
|
123
123
|
if transforms:
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
124
|
+
if isinstance(cfg.transforms, Mapping):
|
|
125
|
+
sections['transforms'] = list(cfg.transforms)
|
|
126
|
+
else:
|
|
127
|
+
sections['transforms'] = [
|
|
128
|
+
getattr(trf, 'name', None) for trf in cfg.transforms
|
|
129
|
+
]
|
|
127
130
|
if not sections:
|
|
128
131
|
sections['jobs'] = _pipeline_summary(cfg)['jobs']
|
|
129
132
|
return sections
|
|
@@ -157,6 +160,29 @@ def _pipeline_summary(
|
|
|
157
160
|
}
|
|
158
161
|
|
|
159
162
|
|
|
163
|
+
def _write_file_payload(
|
|
164
|
+
payload: JSONData,
|
|
165
|
+
target: str,
|
|
166
|
+
*,
|
|
167
|
+
format_hint: str | None,
|
|
168
|
+
) -> None:
|
|
169
|
+
"""
|
|
170
|
+
Write a JSON-like payload to a file path using an optional format hint.
|
|
171
|
+
|
|
172
|
+
Parameters
|
|
173
|
+
----------
|
|
174
|
+
payload : JSONData
|
|
175
|
+
The structured data to write.
|
|
176
|
+
target : str
|
|
177
|
+
File path to write to.
|
|
178
|
+
format_hint : str | None
|
|
179
|
+
Optional format hint for :class:`FileFormat`.
|
|
180
|
+
"""
|
|
181
|
+
file_path = Path(target)
|
|
182
|
+
file_format = FileFormat.coerce(format_hint) if format_hint else None
|
|
183
|
+
File(file_path, file_format=file_format).write(payload)
|
|
184
|
+
|
|
185
|
+
|
|
160
186
|
# SECTION: FUNCTIONS ======================================================== #
|
|
161
187
|
|
|
162
188
|
|
|
@@ -572,15 +598,7 @@ def transform_handler(
|
|
|
572
598
|
|
|
573
599
|
# TODO: Generalize to handle non-file targets.
|
|
574
600
|
if target and target != '-':
|
|
575
|
-
|
|
576
|
-
file_path = Path(target)
|
|
577
|
-
file_format = None
|
|
578
|
-
if target_format is not None:
|
|
579
|
-
try:
|
|
580
|
-
file_format = FileFormat(target_format)
|
|
581
|
-
except ValueError:
|
|
582
|
-
file_format = None # or handle error as appropriate
|
|
583
|
-
File(file_path, file_format=file_format).write(data)
|
|
601
|
+
_write_file_payload(data, target, format_hint=target_format)
|
|
584
602
|
print(f'Data transformed and saved to {target}')
|
|
585
603
|
return 0
|
|
586
604
|
|
etlplus/cli/main.py
CHANGED
etlplus/cli/state.py
CHANGED
|
@@ -15,6 +15,7 @@ from typing import Final
|
|
|
15
15
|
|
|
16
16
|
import typer
|
|
17
17
|
|
|
18
|
+
from ..utils import normalize_str
|
|
18
19
|
from .constants import DATA_CONNECTORS
|
|
19
20
|
|
|
20
21
|
# SECTION: EXPORTS ========================================================== #
|
|
@@ -322,14 +323,10 @@ def validate_choice(
|
|
|
322
323
|
typer.BadParameter
|
|
323
324
|
If the input value is not in the set of valid choices.
|
|
324
325
|
"""
|
|
325
|
-
v = str(value or '')
|
|
326
|
-
normalized_choices = {c
|
|
326
|
+
v = normalize_str(str(value or ''))
|
|
327
|
+
normalized_choices = {normalize_str(c): c for c in choices}
|
|
327
328
|
if v in normalized_choices:
|
|
328
|
-
|
|
329
|
-
for choice in choices:
|
|
330
|
-
if choice.lower() == v:
|
|
331
|
-
return choice
|
|
332
|
-
return v
|
|
329
|
+
return normalized_choices[v]
|
|
333
330
|
allowed = ', '.join(sorted(choices))
|
|
334
331
|
raise typer.BadParameter(
|
|
335
332
|
f"Invalid {label} '{value}'. Choose from: {allowed}",
|
etlplus/database/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# etlplus.database
|
|
1
|
+
# `etlplus.database` Subpackage
|
|
2
2
|
|
|
3
3
|
Documentation for the `etlplus.database` subpackage: database engine, schema, and ORM helpers.
|
|
4
4
|
|
|
@@ -9,7 +9,7 @@ Documentation for the `etlplus.database` subpackage: database engine, schema, an
|
|
|
9
9
|
|
|
10
10
|
Back to project overview: see the top-level [README](../../README.md).
|
|
11
11
|
|
|
12
|
-
- [etlplus.database
|
|
12
|
+
- [`etlplus.database` Subpackage](#etlplusdatabase-subpackage)
|
|
13
13
|
- [Database Engine and Connections](#database-engine-and-connections)
|
|
14
14
|
- [Schema and DDL Helpers](#schema-and-ddl-helpers)
|
|
15
15
|
- [ORM Utilities](#orm-utilities)
|
etlplus/database/engine.py
CHANGED
|
@@ -136,9 +136,25 @@ def load_database_url_from_config(
|
|
|
136
136
|
return url
|
|
137
137
|
|
|
138
138
|
|
|
139
|
-
def make_engine(
|
|
140
|
-
|
|
139
|
+
def make_engine(
|
|
140
|
+
url: str | None = None,
|
|
141
|
+
**engine_kwargs: Any,
|
|
142
|
+
) -> Engine:
|
|
143
|
+
"""
|
|
144
|
+
Create a SQLAlchemy Engine, defaulting to env config if no URL given.
|
|
145
|
+
|
|
146
|
+
Parameters
|
|
147
|
+
----------
|
|
148
|
+
url : str | None, optional
|
|
149
|
+
Database URL/DSN string. When omitted, ``DATABASE_URL`` is used.
|
|
150
|
+
**engine_kwargs : Any
|
|
151
|
+
Extra keyword arguments forwarded to ``create_engine``.
|
|
141
152
|
|
|
153
|
+
Returns
|
|
154
|
+
-------
|
|
155
|
+
Engine
|
|
156
|
+
Configured SQLAlchemy engine instance.
|
|
157
|
+
"""
|
|
142
158
|
resolved_url = url or DATABASE_URL
|
|
143
159
|
return create_engine(resolved_url, pool_pre_ping=True, **engine_kwargs)
|
|
144
160
|
|
etlplus/database/orm.py
CHANGED
|
@@ -201,12 +201,14 @@ def build_models(
|
|
|
201
201
|
) -> ModelRegistry:
|
|
202
202
|
"""
|
|
203
203
|
Build SQLAlchemy ORM models from table specifications.
|
|
204
|
+
|
|
204
205
|
Parameters
|
|
205
206
|
----------
|
|
206
207
|
specs : list[TableSpec]
|
|
207
208
|
List of table specifications.
|
|
208
209
|
base : type[DeclarativeBase], optional
|
|
209
210
|
Base class for the ORM models (default: :class:`Base`).
|
|
211
|
+
|
|
210
212
|
Returns
|
|
211
213
|
-------
|
|
212
214
|
ModelRegistry
|
etlplus/file/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# etlplus.file
|
|
1
|
+
# `etlplus.file` Subpackage
|
|
2
2
|
|
|
3
3
|
Documentation for the `etlplus.file` subpackage: unified file format support and helpers for reading
|
|
4
4
|
and writing data files.
|
|
@@ -11,7 +11,7 @@ and writing data files.
|
|
|
11
11
|
|
|
12
12
|
Back to project overview: see the top-level [README](../../README.md).
|
|
13
13
|
|
|
14
|
-
- [etlplus.file
|
|
14
|
+
- [`etlplus.file` Subpackage](#etlplusfile-subpackage)
|
|
15
15
|
- [Supported File Formats](#supported-file-formats)
|
|
16
16
|
- [Inferring File Format and Compression](#inferring-file-format-and-compression)
|
|
17
17
|
- [Reading and Writing Files](#reading-and-writing-files)
|
etlplus/file/_io.py
CHANGED
|
@@ -8,6 +8,7 @@ from __future__ import annotations
|
|
|
8
8
|
|
|
9
9
|
import csv
|
|
10
10
|
from pathlib import Path
|
|
11
|
+
from typing import Any
|
|
11
12
|
from typing import cast
|
|
12
13
|
|
|
13
14
|
from ..types import JSONData
|
|
@@ -17,6 +18,44 @@ from ..types import JSONList
|
|
|
17
18
|
# SECTION: FUNCTIONS ======================================================== #
|
|
18
19
|
|
|
19
20
|
|
|
21
|
+
def coerce_record_payload(
|
|
22
|
+
payload: Any,
|
|
23
|
+
*,
|
|
24
|
+
format_name: str,
|
|
25
|
+
) -> JSONData:
|
|
26
|
+
"""
|
|
27
|
+
Validate that ``payload`` is an object or list of objects.
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
payload : Any
|
|
32
|
+
Parsed payload to validate.
|
|
33
|
+
format_name : str
|
|
34
|
+
Human-readable format name for error messages.
|
|
35
|
+
|
|
36
|
+
Returns
|
|
37
|
+
-------
|
|
38
|
+
JSONData
|
|
39
|
+
``payload`` when it is a dict or a list of dicts.
|
|
40
|
+
|
|
41
|
+
Raises
|
|
42
|
+
------
|
|
43
|
+
TypeError
|
|
44
|
+
If the payload is not a dict or list of dicts.
|
|
45
|
+
"""
|
|
46
|
+
if isinstance(payload, dict):
|
|
47
|
+
return cast(JSONDict, payload)
|
|
48
|
+
if isinstance(payload, list):
|
|
49
|
+
if all(isinstance(item, dict) for item in payload):
|
|
50
|
+
return cast(JSONList, payload)
|
|
51
|
+
raise TypeError(
|
|
52
|
+
f'{format_name} array must contain only objects (dicts)',
|
|
53
|
+
)
|
|
54
|
+
raise TypeError(
|
|
55
|
+
f'{format_name} root must be an object or an array of objects',
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
20
59
|
def normalize_records(
|
|
21
60
|
data: JSONData,
|
|
22
61
|
format_name: str,
|
etlplus/file/json.py
CHANGED
|
@@ -20,12 +20,10 @@ from __future__ import annotations
|
|
|
20
20
|
|
|
21
21
|
import json
|
|
22
22
|
from pathlib import Path
|
|
23
|
-
from typing import cast
|
|
24
23
|
|
|
25
24
|
from ..types import JSONData
|
|
26
|
-
from ..types import JSONDict
|
|
27
|
-
from ..types import JSONList
|
|
28
25
|
from ..utils import count_records
|
|
26
|
+
from ._io import coerce_record_payload
|
|
29
27
|
|
|
30
28
|
# SECTION: EXPORTS ========================================================== #
|
|
31
29
|
|
|
@@ -65,17 +63,7 @@ def read(
|
|
|
65
63
|
with path.open('r', encoding='utf-8') as handle:
|
|
66
64
|
loaded = json.load(handle)
|
|
67
65
|
|
|
68
|
-
|
|
69
|
-
return cast(JSONDict, loaded)
|
|
70
|
-
if isinstance(loaded, list):
|
|
71
|
-
if all(isinstance(item, dict) for item in loaded):
|
|
72
|
-
return cast(JSONList, loaded)
|
|
73
|
-
raise TypeError(
|
|
74
|
-
'JSON array must contain only objects (dicts) when loading file',
|
|
75
|
-
)
|
|
76
|
-
raise TypeError(
|
|
77
|
-
'JSON root must be an object or an array of objects when loading file',
|
|
78
|
-
)
|
|
66
|
+
return coerce_record_payload(loaded, format_name='JSON')
|
|
79
67
|
|
|
80
68
|
|
|
81
69
|
def write(
|