etlplus 0.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/__init__.py +43 -0
- etlplus/__main__.py +22 -0
- etlplus/__version__.py +14 -0
- etlplus/api/README.md +237 -0
- etlplus/api/__init__.py +136 -0
- etlplus/api/auth.py +432 -0
- etlplus/api/config.py +633 -0
- etlplus/api/endpoint_client.py +885 -0
- etlplus/api/errors.py +170 -0
- etlplus/api/pagination/__init__.py +47 -0
- etlplus/api/pagination/client.py +188 -0
- etlplus/api/pagination/config.py +440 -0
- etlplus/api/pagination/paginator.py +775 -0
- etlplus/api/rate_limiting/__init__.py +38 -0
- etlplus/api/rate_limiting/config.py +343 -0
- etlplus/api/rate_limiting/rate_limiter.py +266 -0
- etlplus/api/request_manager.py +589 -0
- etlplus/api/retry_manager.py +430 -0
- etlplus/api/transport.py +325 -0
- etlplus/api/types.py +172 -0
- etlplus/cli/__init__.py +15 -0
- etlplus/cli/app.py +1367 -0
- etlplus/cli/handlers.py +775 -0
- etlplus/cli/main.py +616 -0
- etlplus/config/__init__.py +56 -0
- etlplus/config/connector.py +372 -0
- etlplus/config/jobs.py +311 -0
- etlplus/config/pipeline.py +339 -0
- etlplus/config/profile.py +78 -0
- etlplus/config/types.py +204 -0
- etlplus/config/utils.py +120 -0
- etlplus/ddl.py +197 -0
- etlplus/enums.py +414 -0
- etlplus/extract.py +218 -0
- etlplus/file.py +657 -0
- etlplus/load.py +336 -0
- etlplus/mixins.py +62 -0
- etlplus/py.typed +0 -0
- etlplus/run.py +368 -0
- etlplus/run_helpers.py +843 -0
- etlplus/templates/__init__.py +5 -0
- etlplus/templates/ddl.sql.j2 +128 -0
- etlplus/templates/view.sql.j2 +69 -0
- etlplus/transform.py +1049 -0
- etlplus/types.py +227 -0
- etlplus/utils.py +638 -0
- etlplus/validate.py +493 -0
- etlplus/validation/__init__.py +44 -0
- etlplus/validation/utils.py +389 -0
- etlplus-0.5.4.dist-info/METADATA +616 -0
- etlplus-0.5.4.dist-info/RECORD +55 -0
- etlplus-0.5.4.dist-info/WHEEL +5 -0
- etlplus-0.5.4.dist-info/entry_points.txt +2 -0
- etlplus-0.5.4.dist-info/licenses/LICENSE +21 -0
- etlplus-0.5.4.dist-info/top_level.txt +1 -0
etlplus/load.py
ADDED
|
@@ -0,0 +1,336 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.load` module.
|
|
3
|
+
|
|
4
|
+
Helpers to load data into files, databases, and REST APIs.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import sys
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any
|
|
13
|
+
from typing import cast
|
|
14
|
+
|
|
15
|
+
import requests # type: ignore[import]
|
|
16
|
+
|
|
17
|
+
from .enums import DataConnectorType
|
|
18
|
+
from .enums import FileFormat
|
|
19
|
+
from .enums import HttpMethod
|
|
20
|
+
from .enums import coerce_data_connector_type
|
|
21
|
+
from .enums import coerce_file_format
|
|
22
|
+
from .enums import coerce_http_method
|
|
23
|
+
from .file import File
|
|
24
|
+
from .types import JSONData
|
|
25
|
+
from .types import JSONDict
|
|
26
|
+
from .types import JSONList
|
|
27
|
+
from .types import StrPath
|
|
28
|
+
from .utils import count_records
|
|
29
|
+
|
|
30
|
+
# SECTION: INTERNAL FUNCTIONS ============================================== #
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _parse_json_string(
|
|
34
|
+
raw: str,
|
|
35
|
+
) -> JSONData:
|
|
36
|
+
"""
|
|
37
|
+
Parse JSON data from ``raw`` text.
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
raw : str
|
|
42
|
+
Raw JSON string to parse.
|
|
43
|
+
|
|
44
|
+
Returns
|
|
45
|
+
-------
|
|
46
|
+
JSONData
|
|
47
|
+
Parsed object or list of objects.
|
|
48
|
+
|
|
49
|
+
Raises
|
|
50
|
+
------
|
|
51
|
+
ValueError
|
|
52
|
+
If the JSON is invalid or not an object/array.
|
|
53
|
+
"""
|
|
54
|
+
try:
|
|
55
|
+
loaded = json.loads(raw)
|
|
56
|
+
except json.JSONDecodeError as exc:
|
|
57
|
+
raise ValueError(f'Invalid data source: {raw}') from exc
|
|
58
|
+
|
|
59
|
+
if isinstance(loaded, dict):
|
|
60
|
+
return cast(JSONDict, loaded)
|
|
61
|
+
if isinstance(loaded, list):
|
|
62
|
+
if all(isinstance(item, dict) for item in loaded):
|
|
63
|
+
return cast(JSONList, loaded)
|
|
64
|
+
raise ValueError(
|
|
65
|
+
'JSON array must contain only objects (dicts) when parsing string',
|
|
66
|
+
)
|
|
67
|
+
raise ValueError(
|
|
68
|
+
'JSON root must be an object or array when parsing string',
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# -- Data Loading -- #
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def load_data(
|
|
79
|
+
source: StrPath | JSONData,
|
|
80
|
+
) -> JSONData:
|
|
81
|
+
"""
|
|
82
|
+
Load data from a file path, JSON string, or direct object.
|
|
83
|
+
|
|
84
|
+
Parameters
|
|
85
|
+
----------
|
|
86
|
+
source : StrPath | JSONData
|
|
87
|
+
Data source to load. If a path is provided and exists, JSON will be
|
|
88
|
+
read from it. Otherwise, a JSON string will be parsed.
|
|
89
|
+
|
|
90
|
+
Returns
|
|
91
|
+
-------
|
|
92
|
+
JSONData
|
|
93
|
+
Parsed object or list of objects.
|
|
94
|
+
|
|
95
|
+
Raises
|
|
96
|
+
------
|
|
97
|
+
TypeError
|
|
98
|
+
If `source` is not a string, path, or JSON-like object.
|
|
99
|
+
"""
|
|
100
|
+
if isinstance(source, (dict, list)):
|
|
101
|
+
return cast(JSONData, source)
|
|
102
|
+
|
|
103
|
+
if isinstance(source, Path):
|
|
104
|
+
return File(source, FileFormat.JSON).read_json()
|
|
105
|
+
|
|
106
|
+
if isinstance(source, str):
|
|
107
|
+
# Special case: '-' means read JSON from stdin (Unix convention).
|
|
108
|
+
if source == '-':
|
|
109
|
+
raw = sys.stdin.read()
|
|
110
|
+
return _parse_json_string(raw)
|
|
111
|
+
candidate = Path(source)
|
|
112
|
+
if candidate.exists():
|
|
113
|
+
try:
|
|
114
|
+
return File(candidate, FileFormat.JSON).read_json()
|
|
115
|
+
except (OSError, json.JSONDecodeError, ValueError):
|
|
116
|
+
# Fall back to treating the string as raw JSON content.
|
|
117
|
+
pass
|
|
118
|
+
return _parse_json_string(source)
|
|
119
|
+
|
|
120
|
+
raise TypeError(
|
|
121
|
+
'source must be a mapping, sequence of mappings, path, or JSON string',
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
# -- File Loading -- #
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def load_to_file(
|
|
129
|
+
data: JSONData,
|
|
130
|
+
file_path: StrPath,
|
|
131
|
+
file_format: FileFormat | str | None = None,
|
|
132
|
+
) -> JSONDict:
|
|
133
|
+
"""
|
|
134
|
+
Persist data to a local file.
|
|
135
|
+
|
|
136
|
+
Parameters
|
|
137
|
+
----------
|
|
138
|
+
data : JSONData
|
|
139
|
+
Data to write.
|
|
140
|
+
file_path : StrPath
|
|
141
|
+
Target file path.
|
|
142
|
+
file_format : FileFormat | str | None, optional
|
|
143
|
+
Output format. If omitted (None), the format is inferred from the
|
|
144
|
+
filename extension.
|
|
145
|
+
|
|
146
|
+
Returns
|
|
147
|
+
-------
|
|
148
|
+
JSONDict
|
|
149
|
+
Result dictionary with status and record count.
|
|
150
|
+
"""
|
|
151
|
+
path = Path(file_path)
|
|
152
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
153
|
+
|
|
154
|
+
# If no explicit format is provided, let File infer from extension.
|
|
155
|
+
if file_format is None:
|
|
156
|
+
records = File(path).write(data)
|
|
157
|
+
ext = path.suffix.lstrip('.').lower()
|
|
158
|
+
fmt = coerce_file_format(ext) if ext else FileFormat.JSON
|
|
159
|
+
else:
|
|
160
|
+
fmt = coerce_file_format(file_format)
|
|
161
|
+
records = File(path, fmt).write(data)
|
|
162
|
+
if fmt is FileFormat.CSV and records == 0:
|
|
163
|
+
message = 'No data to write'
|
|
164
|
+
else:
|
|
165
|
+
message = f'Data loaded to {path}'
|
|
166
|
+
|
|
167
|
+
return {
|
|
168
|
+
'status': 'success',
|
|
169
|
+
'message': message,
|
|
170
|
+
'records': records,
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
# -- Database Loading (Placeholder) -- #
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def load_to_database(
|
|
178
|
+
data: JSONData,
|
|
179
|
+
connection_string: str,
|
|
180
|
+
) -> JSONDict:
|
|
181
|
+
"""
|
|
182
|
+
Load data to a database.
|
|
183
|
+
|
|
184
|
+
Notes
|
|
185
|
+
-----
|
|
186
|
+
Placeholder implementation. To enable database loading, install and
|
|
187
|
+
configure database-specific drivers and query logic.
|
|
188
|
+
|
|
189
|
+
Parameters
|
|
190
|
+
----------
|
|
191
|
+
data : JSONData
|
|
192
|
+
Data to load.
|
|
193
|
+
connection_string : str
|
|
194
|
+
Database connection string.
|
|
195
|
+
|
|
196
|
+
Returns
|
|
197
|
+
-------
|
|
198
|
+
JSONDict
|
|
199
|
+
Result object describing the operation.
|
|
200
|
+
"""
|
|
201
|
+
records = count_records(data)
|
|
202
|
+
|
|
203
|
+
return {
|
|
204
|
+
'status': 'not_implemented',
|
|
205
|
+
'message': 'Database loading not yet implemented',
|
|
206
|
+
'connection_string': connection_string,
|
|
207
|
+
'records': records,
|
|
208
|
+
'note': 'Install database-specific drivers to enable this feature',
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
# -- REST API Loading -- #
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def load_to_api(
|
|
216
|
+
data: JSONData,
|
|
217
|
+
url: str,
|
|
218
|
+
method: HttpMethod | str,
|
|
219
|
+
**kwargs: Any,
|
|
220
|
+
) -> JSONDict:
|
|
221
|
+
"""
|
|
222
|
+
Load data to a REST API.
|
|
223
|
+
|
|
224
|
+
Parameters
|
|
225
|
+
----------
|
|
226
|
+
data : JSONData
|
|
227
|
+
Data to send as JSON.
|
|
228
|
+
url : str
|
|
229
|
+
API endpoint URL.
|
|
230
|
+
method : HttpMethod | str
|
|
231
|
+
HTTP method to use.
|
|
232
|
+
**kwargs : Any
|
|
233
|
+
Extra arguments forwarded to ``requests`` (e.g., ``timeout``).
|
|
234
|
+
|
|
235
|
+
Returns
|
|
236
|
+
-------
|
|
237
|
+
JSONDict
|
|
238
|
+
Result dictionary including response payload or text.
|
|
239
|
+
|
|
240
|
+
Raises
|
|
241
|
+
------
|
|
242
|
+
TypeError
|
|
243
|
+
If the session object is not valid.
|
|
244
|
+
"""
|
|
245
|
+
http_method = coerce_http_method(method)
|
|
246
|
+
|
|
247
|
+
# Apply a conservative timeout to guard against hanging requests.
|
|
248
|
+
timeout = kwargs.pop('timeout', 10.0)
|
|
249
|
+
session = kwargs.pop('session', None)
|
|
250
|
+
requester = session or requests
|
|
251
|
+
|
|
252
|
+
request_callable = getattr(requester, http_method.value, None)
|
|
253
|
+
if not callable(request_callable):
|
|
254
|
+
raise TypeError(
|
|
255
|
+
'Session object must supply a '
|
|
256
|
+
f'callable "{http_method.value}" method',
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
response = request_callable(url, json=data, timeout=timeout, **kwargs)
|
|
260
|
+
response.raise_for_status()
|
|
261
|
+
|
|
262
|
+
# Try JSON first, fall back to text.
|
|
263
|
+
try:
|
|
264
|
+
payload: Any = response.json()
|
|
265
|
+
except ValueError:
|
|
266
|
+
payload = response.text
|
|
267
|
+
|
|
268
|
+
return {
|
|
269
|
+
'status': 'success',
|
|
270
|
+
'status_code': response.status_code,
|
|
271
|
+
'message': f'Data loaded to {url}',
|
|
272
|
+
'response': payload,
|
|
273
|
+
'records': count_records(data),
|
|
274
|
+
'method': http_method.value.upper(),
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
# -- Orchestration -- #
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def load(
|
|
282
|
+
source: StrPath | JSONData,
|
|
283
|
+
target_type: DataConnectorType | str,
|
|
284
|
+
target: StrPath,
|
|
285
|
+
file_format: FileFormat | str | None = None,
|
|
286
|
+
method: HttpMethod | str | None = None,
|
|
287
|
+
**kwargs: Any,
|
|
288
|
+
) -> JSONData:
|
|
289
|
+
"""
|
|
290
|
+
Load data to a target (file, database, or API).
|
|
291
|
+
|
|
292
|
+
Parameters
|
|
293
|
+
----------
|
|
294
|
+
source : StrPath | JSONData
|
|
295
|
+
Data source to load.
|
|
296
|
+
target_type : DataConnectorType | str
|
|
297
|
+
Type of data target.
|
|
298
|
+
target : StrPath
|
|
299
|
+
Target location (file path, connection string, or API URL).
|
|
300
|
+
file_format : FileFormat | str | None, optional
|
|
301
|
+
File format, inferred from filename extension if omitted.
|
|
302
|
+
method : HttpMethod | str | None, optional
|
|
303
|
+
HTTP method for API targets. Defaults to POST if omitted.
|
|
304
|
+
**kwargs : Any
|
|
305
|
+
Additional arguments forwarded to target-specific loaders.
|
|
306
|
+
|
|
307
|
+
Returns
|
|
308
|
+
-------
|
|
309
|
+
JSONData
|
|
310
|
+
Result dictionary with status.
|
|
311
|
+
|
|
312
|
+
Raises
|
|
313
|
+
------
|
|
314
|
+
ValueError
|
|
315
|
+
If `target_type` is not one of the supported values.
|
|
316
|
+
"""
|
|
317
|
+
data = load_data(source)
|
|
318
|
+
|
|
319
|
+
match coerce_data_connector_type(target_type):
|
|
320
|
+
case DataConnectorType.FILE:
|
|
321
|
+
# Prefer explicit format if provided, else infer from filename.
|
|
322
|
+
return load_to_file(data, target, file_format)
|
|
323
|
+
case DataConnectorType.DATABASE:
|
|
324
|
+
return load_to_database(data, str(target))
|
|
325
|
+
case DataConnectorType.API:
|
|
326
|
+
api_method = method if method is not None else HttpMethod.POST
|
|
327
|
+
return load_to_api(
|
|
328
|
+
data,
|
|
329
|
+
str(target),
|
|
330
|
+
method=api_method,
|
|
331
|
+
**kwargs,
|
|
332
|
+
)
|
|
333
|
+
case _:
|
|
334
|
+
# `coerce_data_connector_type` covers invalid entries, but keep
|
|
335
|
+
# explicit guard.
|
|
336
|
+
raise ValueError(f'Invalid target type: {target_type}')
|
etlplus/mixins.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.mixins` module.
|
|
3
|
+
|
|
4
|
+
Shared mixin utilities used across configuration and API layers.
|
|
5
|
+
|
|
6
|
+
Notes
|
|
7
|
+
------
|
|
8
|
+
- Mixins are stateless helpers.
|
|
9
|
+
- ``__slots__`` prevents accidental attribute mutation at runtime.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from typing import Final
|
|
15
|
+
|
|
16
|
+
# SECTION: EXPORTS ========================================================== #
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
__all__ = ['BoundsWarningsMixin']
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# SECTION: EXPORTS ========================================================== #
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class BoundsWarningsMixin:
|
|
26
|
+
"""
|
|
27
|
+
Append human-readable warnings without raising exceptions.
|
|
28
|
+
|
|
29
|
+
Examples
|
|
30
|
+
--------
|
|
31
|
+
>>> warnings: list[str] = []
|
|
32
|
+
>>> BoundsWarningsMixin._warn_if(True, 'oops', warnings)
|
|
33
|
+
>>> warnings
|
|
34
|
+
['oops']
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
__slots__ = ()
|
|
38
|
+
|
|
39
|
+
_APPEND: Final = list.append
|
|
40
|
+
|
|
41
|
+
# -- Static Methods -- #
|
|
42
|
+
|
|
43
|
+
@staticmethod
|
|
44
|
+
def _warn_if(
|
|
45
|
+
condition: bool,
|
|
46
|
+
message: str,
|
|
47
|
+
bucket: list[str],
|
|
48
|
+
) -> None:
|
|
49
|
+
"""
|
|
50
|
+
Append a warning to a list if a condition is met.
|
|
51
|
+
|
|
52
|
+
Parameters
|
|
53
|
+
----------
|
|
54
|
+
condition : bool
|
|
55
|
+
Whether to issue the warning.
|
|
56
|
+
message : str
|
|
57
|
+
Warning message to append.
|
|
58
|
+
bucket : list[str]
|
|
59
|
+
Target list for collected warnings.
|
|
60
|
+
"""
|
|
61
|
+
if condition:
|
|
62
|
+
BoundsWarningsMixin._APPEND(bucket, message)
|
etlplus/py.typed
ADDED
|
File without changes
|