etlplus 0.12.12__py3-none-any.whl → 0.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/README.md +2 -2
- etlplus/__init__.py +1 -26
- etlplus/api/README.md +2 -2
- etlplus/api/__init__.py +10 -0
- etlplus/api/config.py +36 -20
- etlplus/api/endpoint_client.py +3 -3
- etlplus/api/enums.py +51 -0
- etlplus/api/pagination/client.py +1 -1
- etlplus/api/rate_limiting/config.py +13 -1
- etlplus/api/rate_limiting/rate_limiter.py +8 -11
- etlplus/api/request_manager.py +11 -6
- etlplus/api/transport.py +14 -2
- etlplus/api/types.py +7 -6
- etlplus/{run_helpers.py → api/utils.py} +209 -153
- etlplus/cli/README.md +2 -2
- etlplus/cli/handlers.py +19 -9
- etlplus/config/README.md +31 -33
- etlplus/config/__init__.py +9 -32
- etlplus/config/types.py +0 -64
- etlplus/dag.py +103 -0
- etlplus/database/README.md +2 -2
- etlplus/enums.py +0 -32
- etlplus/file/README.md +2 -2
- etlplus/file/enums.py +1 -1
- etlplus/{validation → ops}/README.md +2 -2
- etlplus/ops/__init__.py +61 -0
- etlplus/{extract.py → ops/extract.py} +78 -94
- etlplus/{load.py → ops/load.py} +73 -93
- etlplus/{run.py → ops/run.py} +153 -118
- etlplus/{transform.py → ops/transform.py} +75 -68
- etlplus/{validation → ops}/utils.py +80 -15
- etlplus/{validate.py → ops/validate.py} +19 -9
- etlplus/templates/README.md +2 -2
- etlplus/types.py +2 -2
- etlplus/workflow/README.md +52 -0
- etlplus/workflow/__init__.py +43 -0
- etlplus/{config → workflow}/connector.py +17 -16
- etlplus/workflow/dag.py +105 -0
- etlplus/{config → workflow}/jobs.py +31 -15
- etlplus/{config → workflow}/pipeline.py +11 -3
- etlplus/{config → workflow}/profile.py +8 -5
- etlplus/workflow/types.py +115 -0
- {etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/METADATA +91 -60
- {etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/RECORD +49 -43
- {etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/WHEEL +1 -1
- etlplus/validation/__init__.py +0 -44
- /etlplus/{config → workflow}/utils.py +0 -0
- {etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/entry_points.txt +0 -0
- {etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.12.12.dist-info → etlplus-0.15.0.dist-info}/top_level.txt +0 -0
etlplus/config/README.md
CHANGED
|
@@ -1,52 +1,50 @@
|
|
|
1
|
-
# etlplus.config
|
|
1
|
+
# `etlplus.config` Subpackage
|
|
2
2
|
|
|
3
|
-
Documentation for the `etlplus.config` subpackage:
|
|
4
|
-
|
|
3
|
+
Documentation for the `etlplus.config` subpackage: type definitions and config shape helpers for
|
|
4
|
+
ETLPlus.
|
|
5
5
|
|
|
6
|
-
-
|
|
7
|
-
-
|
|
8
|
-
-
|
|
9
|
-
- Exposes type definitions for config schemas
|
|
6
|
+
- Exposes TypedDict-based config schemas for API profiles and endpoints
|
|
7
|
+
- Provides exported type aliases for API configuration maps
|
|
8
|
+
- Designed for Python 3.13 typing and editor assistance (runtime parsing lives elsewhere)
|
|
10
9
|
|
|
11
10
|
Back to project overview: see the top-level [README](../../README.md).
|
|
12
11
|
|
|
13
|
-
- [etlplus.config
|
|
14
|
-
- [
|
|
15
|
-
- [
|
|
16
|
-
- [Example:
|
|
12
|
+
- [`etlplus.config` Subpackage](#etlplusconfig-subpackage)
|
|
13
|
+
- [Modules](#modules)
|
|
14
|
+
- [Exported Types](#exported-types)
|
|
15
|
+
- [Example: Typing an API Config](#example-typing-an-api-config)
|
|
17
16
|
- [See Also](#see-also)
|
|
18
17
|
|
|
19
|
-
##
|
|
18
|
+
## Modules
|
|
20
19
|
|
|
21
|
-
-
|
|
22
|
-
-
|
|
23
|
-
- **Pipeline**: End-to-end pipeline configuration
|
|
24
|
-
- **Profile**: User or environment-specific settings
|
|
20
|
+
- `etlplus.config.__init__`: package exports and high-level package notes
|
|
21
|
+
- `etlplus.config.types`: TypedDict-based config schemas
|
|
25
22
|
|
|
26
|
-
##
|
|
23
|
+
## Exported Types
|
|
27
24
|
|
|
28
|
-
|
|
25
|
+
- `ApiConfigMap`: top-level API config shape
|
|
26
|
+
- `ApiProfileConfigMap`: per-profile API config shape
|
|
27
|
+
- `ApiProfileDefaultsMap`: defaults block within a profile
|
|
28
|
+
- `EndpointMap`: endpoint config shape
|
|
29
29
|
|
|
30
|
-
|
|
31
|
-
from etlplus.config import PipelineConfig
|
|
32
|
-
|
|
33
|
-
cfg = PipelineConfig.from_yaml("pipeline.yml")
|
|
34
|
-
```
|
|
35
|
-
|
|
36
|
-
- Supports YAML and JSON formats
|
|
37
|
-
- Validates against expected schema
|
|
38
|
-
|
|
39
|
-
## Example: Loading a Pipeline Config
|
|
30
|
+
## Example: Typing an API Config
|
|
40
31
|
|
|
41
32
|
```python
|
|
42
|
-
from etlplus.config import
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
33
|
+
from etlplus.config import ApiConfigMap
|
|
34
|
+
|
|
35
|
+
api_cfg: ApiConfigMap = {
|
|
36
|
+
"base_url": "https://example.test",
|
|
37
|
+
"headers": {"Authorization": "Bearer token"},
|
|
38
|
+
"endpoints": {
|
|
39
|
+
"users": {
|
|
40
|
+
"path": "/users",
|
|
41
|
+
"method": "GET",
|
|
42
|
+
},
|
|
43
|
+
},
|
|
44
|
+
}
|
|
46
45
|
```
|
|
47
46
|
|
|
48
47
|
## See Also
|
|
49
48
|
|
|
50
49
|
- Top-level CLI and library usage in the main [README](../../README.md)
|
|
51
50
|
- Config type definitions in [types.py](types.py)
|
|
52
|
-
- Config utilities in [utils.py](utils.py)
|
etlplus/config/__init__.py
CHANGED
|
@@ -16,41 +16,18 @@ Notes
|
|
|
16
16
|
|
|
17
17
|
from __future__ import annotations
|
|
18
18
|
|
|
19
|
-
from .
|
|
20
|
-
from .
|
|
21
|
-
from .
|
|
22
|
-
from .
|
|
23
|
-
from .connector import parse_connector
|
|
24
|
-
from .jobs import ExtractRef
|
|
25
|
-
from .jobs import JobConfig
|
|
26
|
-
from .jobs import LoadRef
|
|
27
|
-
from .jobs import TransformRef
|
|
28
|
-
from .jobs import ValidationRef
|
|
29
|
-
from .pipeline import PipelineConfig
|
|
30
|
-
from .pipeline import load_pipeline_config
|
|
31
|
-
from .profile import ProfileConfig
|
|
32
|
-
from .types import ConnectorType
|
|
19
|
+
from .types import ApiConfigMap
|
|
20
|
+
from .types import ApiProfileConfigMap
|
|
21
|
+
from .types import ApiProfileDefaultsMap
|
|
22
|
+
from .types import EndpointMap
|
|
33
23
|
|
|
34
24
|
# SECTION: EXPORTS ========================================================== #
|
|
35
25
|
|
|
36
26
|
|
|
37
27
|
__all__ = [
|
|
38
|
-
#
|
|
39
|
-
'
|
|
40
|
-
'
|
|
41
|
-
'
|
|
42
|
-
'
|
|
43
|
-
'ConnectorFile',
|
|
44
|
-
'parse_connector',
|
|
45
|
-
# Jobs / Refs
|
|
46
|
-
'ExtractRef',
|
|
47
|
-
'JobConfig',
|
|
48
|
-
'LoadRef',
|
|
49
|
-
'TransformRef',
|
|
50
|
-
'ValidationRef',
|
|
51
|
-
# Pipeline
|
|
52
|
-
'PipelineConfig',
|
|
53
|
-
'load_pipeline_config',
|
|
54
|
-
# Profile
|
|
55
|
-
'ProfileConfig',
|
|
28
|
+
# Typed Dicts
|
|
29
|
+
'ApiConfigMap',
|
|
30
|
+
'ApiProfileConfigMap',
|
|
31
|
+
'ApiProfileDefaultsMap',
|
|
32
|
+
'EndpointMap',
|
|
56
33
|
]
|
etlplus/config/types.py
CHANGED
|
@@ -33,7 +33,6 @@ from __future__ import annotations
|
|
|
33
33
|
|
|
34
34
|
from collections.abc import Mapping
|
|
35
35
|
from typing import Any
|
|
36
|
-
from typing import Literal
|
|
37
36
|
from typing import TypedDict
|
|
38
37
|
|
|
39
38
|
from ..api import PaginationConfigMap
|
|
@@ -44,26 +43,17 @@ from ..types import StrAnyMap
|
|
|
44
43
|
|
|
45
44
|
|
|
46
45
|
__all__ = [
|
|
47
|
-
# Type aliases
|
|
48
|
-
'ConnectorType',
|
|
49
|
-
# 'PaginationType',
|
|
50
46
|
# TypedDicts
|
|
51
47
|
'ApiProfileDefaultsMap',
|
|
52
48
|
'ApiProfileConfigMap',
|
|
53
49
|
'ApiConfigMap',
|
|
54
50
|
'EndpointMap',
|
|
55
|
-
'ConnectorApiConfigMap',
|
|
56
|
-
'ConnectorDbConfigMap',
|
|
57
|
-
'ConnectorFileConfigMap',
|
|
58
51
|
]
|
|
59
52
|
|
|
60
53
|
|
|
61
54
|
# SECTION: TYPE ALIASES ===================================================== #
|
|
62
55
|
|
|
63
56
|
|
|
64
|
-
# Literal type for supported connector kinds
|
|
65
|
-
type ConnectorType = Literal['api', 'database', 'file']
|
|
66
|
-
|
|
67
57
|
# Literal type for supported pagination kinds
|
|
68
58
|
# type PaginationType = Literal['page', 'offset', 'cursor']
|
|
69
59
|
|
|
@@ -129,60 +119,6 @@ class ApiProfileDefaultsMap(TypedDict, total=False):
|
|
|
129
119
|
rate_limit: RateLimitConfigMap | StrAnyMap
|
|
130
120
|
|
|
131
121
|
|
|
132
|
-
class ConnectorApiConfigMap(TypedDict, total=False):
|
|
133
|
-
"""
|
|
134
|
-
Shape accepted by ConnectorApi.from_obj (all keys optional).
|
|
135
|
-
|
|
136
|
-
See Also
|
|
137
|
-
--------
|
|
138
|
-
- etlplus.config.connector.ConnectorApi.from_obj
|
|
139
|
-
"""
|
|
140
|
-
|
|
141
|
-
name: str
|
|
142
|
-
type: ConnectorType
|
|
143
|
-
url: str
|
|
144
|
-
method: str
|
|
145
|
-
headers: StrAnyMap
|
|
146
|
-
query_params: StrAnyMap
|
|
147
|
-
pagination: PaginationConfigMap
|
|
148
|
-
rate_limit: RateLimitConfigMap
|
|
149
|
-
api: str
|
|
150
|
-
endpoint: str
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
class ConnectorDbConfigMap(TypedDict, total=False):
|
|
154
|
-
"""
|
|
155
|
-
Shape accepted by ConnectorDb.from_obj (all keys optional).
|
|
156
|
-
|
|
157
|
-
See Also
|
|
158
|
-
--------
|
|
159
|
-
- etlplus.config.connector.ConnectorDb.from_obj
|
|
160
|
-
"""
|
|
161
|
-
|
|
162
|
-
name: str
|
|
163
|
-
type: ConnectorType
|
|
164
|
-
connection_string: str
|
|
165
|
-
query: str
|
|
166
|
-
table: str
|
|
167
|
-
mode: str
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
class ConnectorFileConfigMap(TypedDict, total=False):
|
|
171
|
-
"""
|
|
172
|
-
Shape accepted by ConnectorFile.from_obj (all keys optional).
|
|
173
|
-
|
|
174
|
-
See Also
|
|
175
|
-
--------
|
|
176
|
-
- etlplus.config.connector.ConnectorFile.from_obj
|
|
177
|
-
"""
|
|
178
|
-
|
|
179
|
-
name: str
|
|
180
|
-
type: ConnectorType
|
|
181
|
-
format: str
|
|
182
|
-
path: str
|
|
183
|
-
options: StrAnyMap
|
|
184
|
-
|
|
185
|
-
|
|
186
122
|
class EndpointMap(TypedDict, total=False):
|
|
187
123
|
"""
|
|
188
124
|
Shape accepted by EndpointConfig.from_obj.
|
etlplus/dag.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.dag` module.
|
|
3
|
+
|
|
4
|
+
Lightweight directed acyclic graph (DAG) helpers for ordering jobs based on
|
|
5
|
+
``depends_on``.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from collections import deque
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
|
|
13
|
+
from .config.jobs import JobConfig
|
|
14
|
+
|
|
15
|
+
# SECTION: EXPORTS ========================================================== #
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
'DagError',
|
|
20
|
+
'topological_sort_jobs',
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# SECTION: ERRORS =========================================================== #
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass(slots=True)
|
|
28
|
+
class DagError(ValueError):
|
|
29
|
+
"""
|
|
30
|
+
Raised when the job dependency graph is invalid.
|
|
31
|
+
|
|
32
|
+
Attributes
|
|
33
|
+
----------
|
|
34
|
+
message : str
|
|
35
|
+
Error message.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
# -- Attributes -- #
|
|
39
|
+
|
|
40
|
+
message: str
|
|
41
|
+
|
|
42
|
+
# -- Magic Methods (Object Representation) -- #
|
|
43
|
+
|
|
44
|
+
def __str__(self) -> str:
|
|
45
|
+
return self.message
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def topological_sort_jobs(
|
|
52
|
+
jobs: list[JobConfig],
|
|
53
|
+
) -> list[JobConfig]:
|
|
54
|
+
"""
|
|
55
|
+
Return jobs in topological order based on ``depends_on``.
|
|
56
|
+
|
|
57
|
+
Parameters
|
|
58
|
+
----------
|
|
59
|
+
jobs : list[JobConfig]
|
|
60
|
+
List of job configurations to sort.
|
|
61
|
+
|
|
62
|
+
Returns
|
|
63
|
+
-------
|
|
64
|
+
list[JobConfig]
|
|
65
|
+
Jobs sorted in topological order.
|
|
66
|
+
|
|
67
|
+
Raises
|
|
68
|
+
------
|
|
69
|
+
DagError
|
|
70
|
+
If a dependency is missing, self-referential, or when a cycle is
|
|
71
|
+
detected.
|
|
72
|
+
"""
|
|
73
|
+
index = {job.name: job for job in jobs}
|
|
74
|
+
edges: dict[str, set[str]] = {name: set() for name in index}
|
|
75
|
+
indegree: dict[str, int] = {name: 0 for name in index}
|
|
76
|
+
|
|
77
|
+
for job in jobs:
|
|
78
|
+
for dep in job.depends_on:
|
|
79
|
+
if dep not in index:
|
|
80
|
+
raise DagError(
|
|
81
|
+
f'Unknown dependency "{dep}" in job "{job.name}"',
|
|
82
|
+
)
|
|
83
|
+
if dep == job.name:
|
|
84
|
+
raise DagError(f'Job "{job.name}" depends on itself')
|
|
85
|
+
if job.name not in edges[dep]:
|
|
86
|
+
edges[dep].add(job.name)
|
|
87
|
+
indegree[job.name] += 1
|
|
88
|
+
|
|
89
|
+
queue = deque(sorted(name for name, deg in indegree.items() if deg == 0))
|
|
90
|
+
ordered: list[str] = []
|
|
91
|
+
|
|
92
|
+
while queue:
|
|
93
|
+
name = queue.popleft()
|
|
94
|
+
ordered.append(name)
|
|
95
|
+
for child in sorted(edges[name]):
|
|
96
|
+
indegree[child] -= 1
|
|
97
|
+
if indegree[child] == 0:
|
|
98
|
+
queue.append(child)
|
|
99
|
+
|
|
100
|
+
if len(ordered) != len(jobs):
|
|
101
|
+
raise DagError('Dependency cycle detected')
|
|
102
|
+
|
|
103
|
+
return [index[name] for name in ordered]
|
etlplus/database/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# etlplus.database
|
|
1
|
+
# `etlplus.database` Subpackage
|
|
2
2
|
|
|
3
3
|
Documentation for the `etlplus.database` subpackage: database engine, schema, and ORM helpers.
|
|
4
4
|
|
|
@@ -9,7 +9,7 @@ Documentation for the `etlplus.database` subpackage: database engine, schema, an
|
|
|
9
9
|
|
|
10
10
|
Back to project overview: see the top-level [README](../../README.md).
|
|
11
11
|
|
|
12
|
-
- [etlplus.database
|
|
12
|
+
- [`etlplus.database` Subpackage](#etlplusdatabase-subpackage)
|
|
13
13
|
- [Database Engine and Connections](#database-engine-and-connections)
|
|
14
14
|
- [Schema and DDL Helpers](#schema-and-ddl-helpers)
|
|
15
15
|
- [ORM Utilities](#orm-utilities)
|
etlplus/enums.py
CHANGED
|
@@ -23,7 +23,6 @@ __all__ = [
|
|
|
23
23
|
'AggregateName',
|
|
24
24
|
'CoercibleStrEnum',
|
|
25
25
|
'DataConnectorType',
|
|
26
|
-
'HttpMethod',
|
|
27
26
|
'OperatorName',
|
|
28
27
|
'PipelineStep',
|
|
29
28
|
]
|
|
@@ -200,37 +199,6 @@ class DataConnectorType(CoercibleStrEnum):
|
|
|
200
199
|
}
|
|
201
200
|
|
|
202
201
|
|
|
203
|
-
class HttpMethod(CoercibleStrEnum):
|
|
204
|
-
"""Supported HTTP verbs that accept JSON payloads."""
|
|
205
|
-
|
|
206
|
-
# -- Constants -- #
|
|
207
|
-
|
|
208
|
-
CONNECT = 'connect'
|
|
209
|
-
DELETE = 'delete'
|
|
210
|
-
GET = 'get'
|
|
211
|
-
HEAD = 'head'
|
|
212
|
-
OPTIONS = 'options'
|
|
213
|
-
PATCH = 'patch'
|
|
214
|
-
POST = 'post'
|
|
215
|
-
PUT = 'put'
|
|
216
|
-
TRACE = 'trace'
|
|
217
|
-
|
|
218
|
-
# -- Getters -- #
|
|
219
|
-
|
|
220
|
-
@property
|
|
221
|
-
def allows_body(self) -> bool:
|
|
222
|
-
"""
|
|
223
|
-
Whether the method typically allows a request body.
|
|
224
|
-
|
|
225
|
-
Notes
|
|
226
|
-
-----
|
|
227
|
-
- RFCs do not strictly forbid bodies on some other methods (e.g.,
|
|
228
|
-
``DELETE``), but many servers/clients do not expect them. We mark
|
|
229
|
-
``POST``, ``PUT``, and ``PATCH`` as True.
|
|
230
|
-
"""
|
|
231
|
-
return self in {HttpMethod.POST, HttpMethod.PUT, HttpMethod.PATCH}
|
|
232
|
-
|
|
233
|
-
|
|
234
202
|
class OperatorName(CoercibleStrEnum):
|
|
235
203
|
"""Supported comparison operators with helpers."""
|
|
236
204
|
|
etlplus/file/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# etlplus.file
|
|
1
|
+
# `etlplus.file` Subpackage
|
|
2
2
|
|
|
3
3
|
Documentation for the `etlplus.file` subpackage: unified file format support and helpers for reading
|
|
4
4
|
and writing data files.
|
|
@@ -11,7 +11,7 @@ and writing data files.
|
|
|
11
11
|
|
|
12
12
|
Back to project overview: see the top-level [README](../../README.md).
|
|
13
13
|
|
|
14
|
-
- [etlplus.file
|
|
14
|
+
- [`etlplus.file` Subpackage](#etlplusfile-subpackage)
|
|
15
15
|
- [Supported File Formats](#supported-file-formats)
|
|
16
16
|
- [Inferring File Format and Compression](#inferring-file-format-and-compression)
|
|
17
17
|
- [Reading and Writing Files](#reading-and-writing-files)
|
etlplus/file/enums.py
CHANGED
|
@@ -123,7 +123,7 @@ class FileFormat(CoercibleStrEnum):
|
|
|
123
123
|
RDS = 'rds' # R data file
|
|
124
124
|
SAS7BDAT = 'sas7bdat' # SAS data file
|
|
125
125
|
SAV = 'sav' # SPSS data file
|
|
126
|
-
SYLK = 'sylk' # Symbolic Link
|
|
126
|
+
SYLK = 'sylk' # Symbolic Link
|
|
127
127
|
XPT = 'xpt' # SAS Transport file
|
|
128
128
|
ZSAV = 'zsav' # Compressed SPSS data file
|
|
129
129
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# etlplus.
|
|
1
|
+
# etlplus.ops subpackage
|
|
2
2
|
|
|
3
3
|
Documentation for the `etlplus.validation` subpackage: data validation utilities and helpers.
|
|
4
4
|
|
|
@@ -8,7 +8,7 @@ Documentation for the `etlplus.validation` subpackage: data validation utilities
|
|
|
8
8
|
|
|
9
9
|
Back to project overview: see the top-level [README](../../README.md).
|
|
10
10
|
|
|
11
|
-
- [etlplus.
|
|
11
|
+
- [etlplus.ops subpackage](#etlplusops-subpackage)
|
|
12
12
|
- [Validation Features](#validation-features)
|
|
13
13
|
- [Defining Validation Rules](#defining-validation-rules)
|
|
14
14
|
- [Example: Validating Data](#example-validating-data)
|
etlplus/ops/__init__.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.ops` package.
|
|
3
|
+
|
|
4
|
+
Data operations helpers.
|
|
5
|
+
|
|
6
|
+
Importing :mod:`etlplus.ops` exposes the coarse-grained helpers most users care
|
|
7
|
+
about: ``extract``, ``transform``, ``load``, ``validate``, ``run``, and
|
|
8
|
+
``run_pipeline``. Each helper delegates to the richer modules under
|
|
9
|
+
``etlplus.ops.*`` while presenting a compact public API surface. Conditional
|
|
10
|
+
validation orchestration is available via
|
|
11
|
+
:func:`etlplus.ops.utils.maybe_validate`. The legacy compatibility module
|
|
12
|
+
:mod:`etlplus.ops.__init__validation` is deprecated in favor of this package.
|
|
13
|
+
|
|
14
|
+
Examples
|
|
15
|
+
--------
|
|
16
|
+
>>> from etlplus.ops import extract, transform
|
|
17
|
+
>>> raw = extract('file', 'input.json')
|
|
18
|
+
>>> curated = transform(raw, {'select': ['id', 'name']})
|
|
19
|
+
|
|
20
|
+
>>> from etlplus.ops.utils import maybe_validate
|
|
21
|
+
>>> payload = {'name': 'Alice'}
|
|
22
|
+
>>> rules = {'required': ['name']}
|
|
23
|
+
>>> def validator(data, config):
|
|
24
|
+
... missing = [field for field in config['required'] if field not in data]
|
|
25
|
+
... return {'valid': not missing, 'errors': missing, 'data': data}
|
|
26
|
+
>>> maybe_validate(
|
|
27
|
+
... payload,
|
|
28
|
+
... when='both',
|
|
29
|
+
... enabled=True,
|
|
30
|
+
... rules=rules,
|
|
31
|
+
... phase='before_transform',
|
|
32
|
+
... severity='warn',
|
|
33
|
+
... validate_fn=validator,
|
|
34
|
+
... print_json_fn=lambda message: message,
|
|
35
|
+
... )
|
|
36
|
+
{'name': 'Alice'}
|
|
37
|
+
|
|
38
|
+
See Also
|
|
39
|
+
--------
|
|
40
|
+
:mod:`etlplus.ops.run`
|
|
41
|
+
:mod:`etlplus.ops.utils`
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
from .extract import extract
|
|
45
|
+
from .load import load
|
|
46
|
+
from .run import run
|
|
47
|
+
from .run import run_pipeline
|
|
48
|
+
from .transform import transform
|
|
49
|
+
from .validate import validate
|
|
50
|
+
|
|
51
|
+
# SECTION: EXPORTS ========================================================== #
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
__all__ = [
|
|
55
|
+
'extract',
|
|
56
|
+
'load',
|
|
57
|
+
'run',
|
|
58
|
+
'run_pipeline',
|
|
59
|
+
'transform',
|
|
60
|
+
'validate',
|
|
61
|
+
]
|