etlplus 0.12.12__py3-none-any.whl → 0.14.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/README.md +1 -1
- etlplus/__init__.py +1 -26
- etlplus/api/__init__.py +10 -0
- etlplus/api/config.py +36 -20
- etlplus/api/endpoint_client.py +3 -3
- etlplus/api/enums.py +51 -0
- etlplus/api/pagination/client.py +1 -1
- etlplus/api/rate_limiting/config.py +13 -1
- etlplus/api/rate_limiting/rate_limiter.py +8 -11
- etlplus/api/request_manager.py +11 -6
- etlplus/api/transport.py +14 -2
- etlplus/api/types.py +7 -6
- etlplus/{run_helpers.py → api/utils.py} +205 -153
- etlplus/cli/handlers.py +17 -7
- etlplus/config/jobs.py +14 -4
- etlplus/dag.py +103 -0
- etlplus/enums.py +0 -32
- etlplus/file/enums.py +1 -1
- etlplus/{validation → ops}/README.md +2 -2
- etlplus/ops/__init__.py +61 -0
- etlplus/{extract.py → ops/extract.py} +78 -94
- etlplus/{load.py → ops/load.py} +73 -93
- etlplus/{run.py → ops/run.py} +140 -110
- etlplus/{transform.py → ops/transform.py} +75 -68
- etlplus/{validation → ops}/utils.py +80 -15
- etlplus/{validate.py → ops/validate.py} +19 -9
- etlplus/types.py +2 -2
- {etlplus-0.12.12.dist-info → etlplus-0.14.3.dist-info}/METADATA +91 -60
- {etlplus-0.12.12.dist-info → etlplus-0.14.3.dist-info}/RECORD +33 -31
- etlplus/validation/__init__.py +0 -44
- {etlplus-0.12.12.dist-info → etlplus-0.14.3.dist-info}/WHEEL +0 -0
- {etlplus-0.12.12.dist-info → etlplus-0.14.3.dist-info}/entry_points.txt +0 -0
- {etlplus-0.12.12.dist-info → etlplus-0.14.3.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.12.12.dist-info → etlplus-0.14.3.dist-info}/top_level.txt +0 -0
etlplus/dag.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.dag` module.
|
|
3
|
+
|
|
4
|
+
Lightweight directed acyclic graph (DAG) helpers for ordering jobs based on
|
|
5
|
+
``depends_on``.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from collections import deque
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
|
|
13
|
+
from .config.jobs import JobConfig
|
|
14
|
+
|
|
15
|
+
# SECTION: EXPORTS ========================================================== #
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
'DagError',
|
|
20
|
+
'topological_sort_jobs',
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# SECTION: ERRORS =========================================================== #
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass(slots=True)
|
|
28
|
+
class DagError(ValueError):
|
|
29
|
+
"""
|
|
30
|
+
Raised when the job dependency graph is invalid.
|
|
31
|
+
|
|
32
|
+
Attributes
|
|
33
|
+
----------
|
|
34
|
+
message : str
|
|
35
|
+
Error message.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
# -- Attributes -- #
|
|
39
|
+
|
|
40
|
+
message: str
|
|
41
|
+
|
|
42
|
+
# -- Magic Methods (Object Representation) -- #
|
|
43
|
+
|
|
44
|
+
def __str__(self) -> str:
|
|
45
|
+
return self.message
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def topological_sort_jobs(
|
|
52
|
+
jobs: list[JobConfig],
|
|
53
|
+
) -> list[JobConfig]:
|
|
54
|
+
"""
|
|
55
|
+
Return jobs in topological order based on ``depends_on``.
|
|
56
|
+
|
|
57
|
+
Parameters
|
|
58
|
+
----------
|
|
59
|
+
jobs : list[JobConfig]
|
|
60
|
+
List of job configurations to sort.
|
|
61
|
+
|
|
62
|
+
Returns
|
|
63
|
+
-------
|
|
64
|
+
list[JobConfig]
|
|
65
|
+
Jobs sorted in topological order.
|
|
66
|
+
|
|
67
|
+
Raises
|
|
68
|
+
------
|
|
69
|
+
DagError
|
|
70
|
+
If a dependency is missing, self-referential, or when a cycle is
|
|
71
|
+
detected.
|
|
72
|
+
"""
|
|
73
|
+
index = {job.name: job for job in jobs}
|
|
74
|
+
edges: dict[str, set[str]] = {name: set() for name in index}
|
|
75
|
+
indegree: dict[str, int] = {name: 0 for name in index}
|
|
76
|
+
|
|
77
|
+
for job in jobs:
|
|
78
|
+
for dep in job.depends_on:
|
|
79
|
+
if dep not in index:
|
|
80
|
+
raise DagError(
|
|
81
|
+
f'Unknown dependency "{dep}" in job "{job.name}"',
|
|
82
|
+
)
|
|
83
|
+
if dep == job.name:
|
|
84
|
+
raise DagError(f'Job "{job.name}" depends on itself')
|
|
85
|
+
if job.name not in edges[dep]:
|
|
86
|
+
edges[dep].add(job.name)
|
|
87
|
+
indegree[job.name] += 1
|
|
88
|
+
|
|
89
|
+
queue = deque(sorted(name for name, deg in indegree.items() if deg == 0))
|
|
90
|
+
ordered: list[str] = []
|
|
91
|
+
|
|
92
|
+
while queue:
|
|
93
|
+
name = queue.popleft()
|
|
94
|
+
ordered.append(name)
|
|
95
|
+
for child in sorted(edges[name]):
|
|
96
|
+
indegree[child] -= 1
|
|
97
|
+
if indegree[child] == 0:
|
|
98
|
+
queue.append(child)
|
|
99
|
+
|
|
100
|
+
if len(ordered) != len(jobs):
|
|
101
|
+
raise DagError('Dependency cycle detected')
|
|
102
|
+
|
|
103
|
+
return [index[name] for name in ordered]
|
etlplus/enums.py
CHANGED
|
@@ -23,7 +23,6 @@ __all__ = [
|
|
|
23
23
|
'AggregateName',
|
|
24
24
|
'CoercibleStrEnum',
|
|
25
25
|
'DataConnectorType',
|
|
26
|
-
'HttpMethod',
|
|
27
26
|
'OperatorName',
|
|
28
27
|
'PipelineStep',
|
|
29
28
|
]
|
|
@@ -200,37 +199,6 @@ class DataConnectorType(CoercibleStrEnum):
|
|
|
200
199
|
}
|
|
201
200
|
|
|
202
201
|
|
|
203
|
-
class HttpMethod(CoercibleStrEnum):
|
|
204
|
-
"""Supported HTTP verbs that accept JSON payloads."""
|
|
205
|
-
|
|
206
|
-
# -- Constants -- #
|
|
207
|
-
|
|
208
|
-
CONNECT = 'connect'
|
|
209
|
-
DELETE = 'delete'
|
|
210
|
-
GET = 'get'
|
|
211
|
-
HEAD = 'head'
|
|
212
|
-
OPTIONS = 'options'
|
|
213
|
-
PATCH = 'patch'
|
|
214
|
-
POST = 'post'
|
|
215
|
-
PUT = 'put'
|
|
216
|
-
TRACE = 'trace'
|
|
217
|
-
|
|
218
|
-
# -- Getters -- #
|
|
219
|
-
|
|
220
|
-
@property
|
|
221
|
-
def allows_body(self) -> bool:
|
|
222
|
-
"""
|
|
223
|
-
Whether the method typically allows a request body.
|
|
224
|
-
|
|
225
|
-
Notes
|
|
226
|
-
-----
|
|
227
|
-
- RFCs do not strictly forbid bodies on some other methods (e.g.,
|
|
228
|
-
``DELETE``), but many servers/clients do not expect them. We mark
|
|
229
|
-
``POST``, ``PUT``, and ``PATCH`` as True.
|
|
230
|
-
"""
|
|
231
|
-
return self in {HttpMethod.POST, HttpMethod.PUT, HttpMethod.PATCH}
|
|
232
|
-
|
|
233
|
-
|
|
234
202
|
class OperatorName(CoercibleStrEnum):
|
|
235
203
|
"""Supported comparison operators with helpers."""
|
|
236
204
|
|
etlplus/file/enums.py
CHANGED
|
@@ -123,7 +123,7 @@ class FileFormat(CoercibleStrEnum):
|
|
|
123
123
|
RDS = 'rds' # R data file
|
|
124
124
|
SAS7BDAT = 'sas7bdat' # SAS data file
|
|
125
125
|
SAV = 'sav' # SPSS data file
|
|
126
|
-
SYLK = 'sylk' # Symbolic Link
|
|
126
|
+
SYLK = 'sylk' # Symbolic Link
|
|
127
127
|
XPT = 'xpt' # SAS Transport file
|
|
128
128
|
ZSAV = 'zsav' # Compressed SPSS data file
|
|
129
129
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# etlplus.
|
|
1
|
+
# etlplus.ops subpackage
|
|
2
2
|
|
|
3
3
|
Documentation for the `etlplus.validation` subpackage: data validation utilities and helpers.
|
|
4
4
|
|
|
@@ -8,7 +8,7 @@ Documentation for the `etlplus.validation` subpackage: data validation utilities
|
|
|
8
8
|
|
|
9
9
|
Back to project overview: see the top-level [README](../../README.md).
|
|
10
10
|
|
|
11
|
-
- [etlplus.
|
|
11
|
+
- [etlplus.ops subpackage](#etlplusops-subpackage)
|
|
12
12
|
- [Validation Features](#validation-features)
|
|
13
13
|
- [Defining Validation Rules](#defining-validation-rules)
|
|
14
14
|
- [Example: Validating Data](#example-validating-data)
|
etlplus/ops/__init__.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.ops` package.
|
|
3
|
+
|
|
4
|
+
Data operations helpers.
|
|
5
|
+
|
|
6
|
+
Importing :mod:`etlplus.ops` exposes the coarse-grained helpers most users care
|
|
7
|
+
about: ``extract``, ``transform``, ``load``, ``validate``, ``run``, and
|
|
8
|
+
``run_pipeline``. Each helper delegates to the richer modules under
|
|
9
|
+
``etlplus.ops.*`` while presenting a compact public API surface. Conditional
|
|
10
|
+
validation orchestration is available via
|
|
11
|
+
:func:`etlplus.ops.utils.maybe_validate`. The legacy compatibility module
|
|
12
|
+
:mod:`etlplus.ops.__init__validation` is deprecated in favor of this package.
|
|
13
|
+
|
|
14
|
+
Examples
|
|
15
|
+
--------
|
|
16
|
+
>>> from etlplus.ops import extract, transform
|
|
17
|
+
>>> raw = extract('file', 'input.json')
|
|
18
|
+
>>> curated = transform(raw, {'select': ['id', 'name']})
|
|
19
|
+
|
|
20
|
+
>>> from etlplus.ops.utils import maybe_validate
|
|
21
|
+
>>> payload = {'name': 'Alice'}
|
|
22
|
+
>>> rules = {'required': ['name']}
|
|
23
|
+
>>> def validator(data, config):
|
|
24
|
+
... missing = [field for field in config['required'] if field not in data]
|
|
25
|
+
... return {'valid': not missing, 'errors': missing, 'data': data}
|
|
26
|
+
>>> maybe_validate(
|
|
27
|
+
... payload,
|
|
28
|
+
... when='both',
|
|
29
|
+
... enabled=True,
|
|
30
|
+
... rules=rules,
|
|
31
|
+
... phase='before_transform',
|
|
32
|
+
... severity='warn',
|
|
33
|
+
... validate_fn=validator,
|
|
34
|
+
... print_json_fn=lambda message: message,
|
|
35
|
+
... )
|
|
36
|
+
{'name': 'Alice'}
|
|
37
|
+
|
|
38
|
+
See Also
|
|
39
|
+
--------
|
|
40
|
+
:mod:`etlplus.ops.run`
|
|
41
|
+
:mod:`etlplus.ops.utils`
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
from .extract import extract
|
|
45
|
+
from .load import load
|
|
46
|
+
from .run import run
|
|
47
|
+
from .run import run_pipeline
|
|
48
|
+
from .transform import transform
|
|
49
|
+
from .validate import validate
|
|
50
|
+
|
|
51
|
+
# SECTION: EXPORTS ========================================================== #
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
__all__ = [
|
|
55
|
+
'extract',
|
|
56
|
+
'load',
|
|
57
|
+
'run',
|
|
58
|
+
'run_pipeline',
|
|
59
|
+
'transform',
|
|
60
|
+
'validate',
|
|
61
|
+
]
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
:mod:`etlplus.extract` module.
|
|
2
|
+
:mod:`etlplus.ops.extract` module.
|
|
3
3
|
|
|
4
4
|
Helpers to extract data from files, databases, and REST APIs.
|
|
5
5
|
"""
|
|
@@ -10,56 +10,81 @@ from pathlib import Path
|
|
|
10
10
|
from typing import Any
|
|
11
11
|
from typing import cast
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
from
|
|
16
|
-
from
|
|
17
|
-
from
|
|
18
|
-
from
|
|
19
|
-
from
|
|
20
|
-
from
|
|
21
|
-
from
|
|
22
|
-
from .types import StrPath
|
|
13
|
+
from ..api import HttpMethod
|
|
14
|
+
from ..api.utils import resolve_request
|
|
15
|
+
from ..enums import DataConnectorType
|
|
16
|
+
from ..file import File
|
|
17
|
+
from ..file import FileFormat
|
|
18
|
+
from ..types import JSONData
|
|
19
|
+
from ..types import JSONDict
|
|
20
|
+
from ..types import JSONList
|
|
21
|
+
from ..types import StrPath
|
|
23
22
|
|
|
24
23
|
# SECTION: FUNCTIONS ======================================================== #
|
|
25
24
|
|
|
26
25
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
file_path: StrPath,
|
|
32
|
-
file_format: FileFormat | str | None = FileFormat.JSON,
|
|
26
|
+
def extract_from_api(
|
|
27
|
+
url: str,
|
|
28
|
+
method: HttpMethod | str = HttpMethod.GET,
|
|
29
|
+
**kwargs: Any,
|
|
33
30
|
) -> JSONData:
|
|
34
31
|
"""
|
|
35
|
-
Extract
|
|
32
|
+
Extract data from a REST API.
|
|
36
33
|
|
|
37
34
|
Parameters
|
|
38
35
|
----------
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
36
|
+
url : str
|
|
37
|
+
API endpoint URL.
|
|
38
|
+
method : HttpMethod | str, optional
|
|
39
|
+
HTTP method to use. Defaults to ``GET``.
|
|
40
|
+
**kwargs : Any
|
|
41
|
+
Extra arguments forwarded to the underlying ``requests`` call
|
|
42
|
+
(for example, ``timeout``). To use a pre-configured
|
|
43
|
+
:class:`requests.Session`, provide it via ``session``.
|
|
44
|
+
When omitted, ``timeout`` defaults to 10 seconds.
|
|
45
45
|
|
|
46
46
|
Returns
|
|
47
47
|
-------
|
|
48
48
|
JSONData
|
|
49
|
-
Parsed
|
|
50
|
-
"""
|
|
51
|
-
path = Path(file_path)
|
|
52
|
-
|
|
53
|
-
# If no explicit format is provided, let File infer from extension.
|
|
54
|
-
if file_format is None:
|
|
55
|
-
return File(path, None).read()
|
|
56
|
-
fmt = FileFormat.coerce(file_format)
|
|
49
|
+
Parsed JSON payload, or a fallback object with raw text.
|
|
57
50
|
|
|
58
|
-
|
|
59
|
-
|
|
51
|
+
Raises
|
|
52
|
+
------
|
|
53
|
+
TypeError
|
|
54
|
+
If a provided ``session`` does not expose the required HTTP
|
|
55
|
+
method (for example, ``get``).
|
|
56
|
+
"""
|
|
57
|
+
timeout = kwargs.pop('timeout', None)
|
|
58
|
+
session = kwargs.pop('session', None)
|
|
59
|
+
request_callable, timeout, _ = resolve_request(
|
|
60
|
+
method,
|
|
61
|
+
session=session,
|
|
62
|
+
timeout=timeout,
|
|
63
|
+
)
|
|
64
|
+
response = request_callable(url, timeout=timeout, **kwargs)
|
|
65
|
+
response.raise_for_status()
|
|
60
66
|
|
|
67
|
+
content_type = response.headers.get('content-type', '').lower()
|
|
68
|
+
if 'application/json' in content_type:
|
|
69
|
+
try:
|
|
70
|
+
payload: Any = response.json()
|
|
71
|
+
except ValueError:
|
|
72
|
+
# Malformed JSON despite content-type; fall back to text
|
|
73
|
+
return {
|
|
74
|
+
'content': response.text,
|
|
75
|
+
'content_type': content_type,
|
|
76
|
+
}
|
|
77
|
+
if isinstance(payload, dict):
|
|
78
|
+
return cast(JSONDict, payload)
|
|
79
|
+
if isinstance(payload, list):
|
|
80
|
+
if all(isinstance(x, dict) for x in payload):
|
|
81
|
+
return cast(JSONList, payload)
|
|
82
|
+
# Coerce non-dict array items into objects for consistency
|
|
83
|
+
return [{'value': x} for x in payload]
|
|
84
|
+
# Fallback: wrap scalar JSON
|
|
85
|
+
return {'value': payload}
|
|
61
86
|
|
|
62
|
-
|
|
87
|
+
return {'content': response.text, 'content_type': content_type}
|
|
63
88
|
|
|
64
89
|
|
|
65
90
|
def extract_from_database(
|
|
@@ -94,77 +119,36 @@ def extract_from_database(
|
|
|
94
119
|
]
|
|
95
120
|
|
|
96
121
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
def extract_from_api(
|
|
101
|
-
url: str,
|
|
102
|
-
method: HttpMethod | str = HttpMethod.GET,
|
|
103
|
-
**kwargs: Any,
|
|
122
|
+
def extract_from_file(
|
|
123
|
+
file_path: StrPath,
|
|
124
|
+
file_format: FileFormat | str | None = FileFormat.JSON,
|
|
104
125
|
) -> JSONData:
|
|
105
126
|
"""
|
|
106
|
-
Extract data from a
|
|
127
|
+
Extract (semi-)structured data from a local file.
|
|
107
128
|
|
|
108
129
|
Parameters
|
|
109
130
|
----------
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
(for example, ``timeout``). To use a pre-configured
|
|
117
|
-
:class:`requests.Session`, provide it via ``session``.
|
|
131
|
+
file_path : StrPath
|
|
132
|
+
Source file path.
|
|
133
|
+
file_format : FileFormat | str | None, optional
|
|
134
|
+
File format to parse. If ``None``, infer from the filename
|
|
135
|
+
extension. Defaults to `'json'` for backward compatibility when
|
|
136
|
+
explicitly provided.
|
|
118
137
|
|
|
119
138
|
Returns
|
|
120
139
|
-------
|
|
121
140
|
JSONData
|
|
122
|
-
Parsed
|
|
123
|
-
|
|
124
|
-
Raises
|
|
125
|
-
------
|
|
126
|
-
TypeError
|
|
127
|
-
If a provided ``session`` does not expose the required HTTP
|
|
128
|
-
method (for example, ``get``).
|
|
141
|
+
Parsed data as a mapping or a list of mappings.
|
|
129
142
|
"""
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
# Apply a conservative timeout to guard against hanging requests.
|
|
133
|
-
timeout = kwargs.pop('timeout', 10.0)
|
|
134
|
-
session = kwargs.pop('session', None)
|
|
135
|
-
requester = session or requests
|
|
136
|
-
|
|
137
|
-
request_callable = getattr(requester, http_method.value, None)
|
|
138
|
-
if not callable(request_callable):
|
|
139
|
-
raise TypeError(
|
|
140
|
-
'Session object must supply a callable'
|
|
141
|
-
f'"{http_method.value}" method',
|
|
142
|
-
)
|
|
143
|
-
|
|
144
|
-
response = request_callable(url, timeout=timeout, **kwargs)
|
|
145
|
-
response.raise_for_status()
|
|
143
|
+
path = Path(file_path)
|
|
146
144
|
|
|
147
|
-
|
|
148
|
-
if
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
except ValueError:
|
|
152
|
-
# Malformed JSON despite content-type; fall back to text
|
|
153
|
-
return {
|
|
154
|
-
'content': response.text,
|
|
155
|
-
'content_type': content_type,
|
|
156
|
-
}
|
|
157
|
-
if isinstance(payload, dict):
|
|
158
|
-
return cast(JSONDict, payload)
|
|
159
|
-
if isinstance(payload, list):
|
|
160
|
-
if all(isinstance(x, dict) for x in payload):
|
|
161
|
-
return cast(JSONList, payload)
|
|
162
|
-
# Coerce non-dict array items into objects for consistency
|
|
163
|
-
return [{'value': x} for x in payload]
|
|
164
|
-
# Fallback: wrap scalar JSON
|
|
165
|
-
return {'value': payload}
|
|
145
|
+
# If no explicit format is provided, let File infer from extension.
|
|
146
|
+
if file_format is None:
|
|
147
|
+
return File(path, None).read()
|
|
148
|
+
fmt = FileFormat.coerce(file_format)
|
|
166
149
|
|
|
167
|
-
|
|
150
|
+
# Let file module perform existence and format validation.
|
|
151
|
+
return File(path, fmt).read()
|
|
168
152
|
|
|
169
153
|
|
|
170
154
|
# -- Orchestration -- #
|
etlplus/{load.py → ops/load.py}
RENAMED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
:mod:`etlplus.load` module.
|
|
2
|
+
:mod:`etlplus.ops.load` module.
|
|
3
3
|
|
|
4
4
|
Helpers to load data into files, databases, and REST APIs.
|
|
5
5
|
"""
|
|
@@ -12,17 +12,16 @@ from pathlib import Path
|
|
|
12
12
|
from typing import Any
|
|
13
13
|
from typing import cast
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
from
|
|
18
|
-
from
|
|
19
|
-
from
|
|
20
|
-
from
|
|
21
|
-
from
|
|
22
|
-
from
|
|
23
|
-
from
|
|
24
|
-
from
|
|
25
|
-
from .utils import count_records
|
|
15
|
+
from ..api import HttpMethod
|
|
16
|
+
from ..api.utils import resolve_request
|
|
17
|
+
from ..enums import DataConnectorType
|
|
18
|
+
from ..file import File
|
|
19
|
+
from ..file import FileFormat
|
|
20
|
+
from ..types import JSONData
|
|
21
|
+
from ..types import JSONDict
|
|
22
|
+
from ..types import JSONList
|
|
23
|
+
from ..types import StrPath
|
|
24
|
+
from ..utils import count_records
|
|
26
25
|
|
|
27
26
|
# SECTION: INTERNAL FUNCTIONS ============================================== #
|
|
28
27
|
|
|
@@ -69,7 +68,7 @@ def _parse_json_string(
|
|
|
69
68
|
# SECTION: FUNCTIONS ======================================================== #
|
|
70
69
|
|
|
71
70
|
|
|
72
|
-
# --
|
|
71
|
+
# -- Helpers -- #
|
|
73
72
|
|
|
74
73
|
|
|
75
74
|
def load_data(
|
|
@@ -119,58 +118,59 @@ def load_data(
|
|
|
119
118
|
)
|
|
120
119
|
|
|
121
120
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
def load_to_file(
|
|
121
|
+
def load_to_api(
|
|
126
122
|
data: JSONData,
|
|
127
|
-
|
|
128
|
-
|
|
123
|
+
url: str,
|
|
124
|
+
method: HttpMethod | str,
|
|
125
|
+
**kwargs: Any,
|
|
129
126
|
) -> JSONDict:
|
|
130
127
|
"""
|
|
131
|
-
|
|
128
|
+
Load data to a REST API.
|
|
132
129
|
|
|
133
130
|
Parameters
|
|
134
131
|
----------
|
|
135
132
|
data : JSONData
|
|
136
|
-
Data to
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
133
|
+
Data to send as JSON.
|
|
134
|
+
url : str
|
|
135
|
+
API endpoint URL.
|
|
136
|
+
method : HttpMethod | str
|
|
137
|
+
HTTP method to use.
|
|
138
|
+
**kwargs : Any
|
|
139
|
+
Extra arguments forwarded to ``requests`` (e.g., ``timeout``).
|
|
140
|
+
When omitted, ``timeout`` defaults to 10 seconds.
|
|
142
141
|
|
|
143
142
|
Returns
|
|
144
143
|
-------
|
|
145
144
|
JSONDict
|
|
146
|
-
Result dictionary
|
|
145
|
+
Result dictionary including response payload or text.
|
|
147
146
|
"""
|
|
148
|
-
|
|
149
|
-
|
|
147
|
+
# Apply a conservative timeout to guard against hanging requests.
|
|
148
|
+
timeout = kwargs.pop('timeout', 10.0)
|
|
149
|
+
session = kwargs.pop('session', None)
|
|
150
|
+
request_callable, timeout, http_method = resolve_request(
|
|
151
|
+
method,
|
|
152
|
+
session=session,
|
|
153
|
+
timeout=timeout,
|
|
154
|
+
)
|
|
155
|
+
response = request_callable(url, json=data, timeout=timeout, **kwargs)
|
|
156
|
+
response.raise_for_status()
|
|
150
157
|
|
|
151
|
-
#
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
else:
|
|
157
|
-
fmt = FileFormat.coerce(file_format)
|
|
158
|
-
records = File(path, fmt).write(data)
|
|
159
|
-
if fmt is FileFormat.CSV and records == 0:
|
|
160
|
-
message = 'No data to write'
|
|
161
|
-
else:
|
|
162
|
-
message = f'Data loaded to {path}'
|
|
158
|
+
# Try JSON first, fall back to text.
|
|
159
|
+
try:
|
|
160
|
+
payload: Any = response.json()
|
|
161
|
+
except ValueError:
|
|
162
|
+
payload = response.text
|
|
163
163
|
|
|
164
164
|
return {
|
|
165
165
|
'status': 'success',
|
|
166
|
-
'
|
|
167
|
-
'
|
|
166
|
+
'status_code': response.status_code,
|
|
167
|
+
'message': f'Data loaded to {url}',
|
|
168
|
+
'response': payload,
|
|
169
|
+
'records': count_records(data),
|
|
170
|
+
'method': http_method.value.upper(),
|
|
168
171
|
}
|
|
169
172
|
|
|
170
173
|
|
|
171
|
-
# -- Database Loading (Placeholder) -- #
|
|
172
|
-
|
|
173
|
-
|
|
174
174
|
def load_to_database(
|
|
175
175
|
data: JSONData,
|
|
176
176
|
connection_string: str,
|
|
@@ -206,69 +206,49 @@ def load_to_database(
|
|
|
206
206
|
}
|
|
207
207
|
|
|
208
208
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
def load_to_api(
|
|
209
|
+
def load_to_file(
|
|
213
210
|
data: JSONData,
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
**kwargs: Any,
|
|
211
|
+
file_path: StrPath,
|
|
212
|
+
file_format: FileFormat | str | None = None,
|
|
217
213
|
) -> JSONDict:
|
|
218
214
|
"""
|
|
219
|
-
|
|
215
|
+
Persist data to a local file.
|
|
220
216
|
|
|
221
217
|
Parameters
|
|
222
218
|
----------
|
|
223
219
|
data : JSONData
|
|
224
|
-
Data to
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
Extra arguments forwarded to ``requests`` (e.g., ``timeout``).
|
|
220
|
+
Data to write.
|
|
221
|
+
file_path : StrPath
|
|
222
|
+
Target file path.
|
|
223
|
+
file_format : FileFormat | str | None, optional
|
|
224
|
+
Output format. If omitted (None), the format is inferred from the
|
|
225
|
+
filename extension.
|
|
231
226
|
|
|
232
227
|
Returns
|
|
233
228
|
-------
|
|
234
229
|
JSONDict
|
|
235
|
-
Result dictionary
|
|
236
|
-
|
|
237
|
-
Raises
|
|
238
|
-
------
|
|
239
|
-
TypeError
|
|
240
|
-
If the session object is not valid.
|
|
230
|
+
Result dictionary with status and record count.
|
|
241
231
|
"""
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
# Apply a conservative timeout to guard against hanging requests.
|
|
245
|
-
timeout = kwargs.pop('timeout', 10.0)
|
|
246
|
-
session = kwargs.pop('session', None)
|
|
247
|
-
requester = session or requests
|
|
248
|
-
|
|
249
|
-
request_callable = getattr(requester, http_method.value, None)
|
|
250
|
-
if not callable(request_callable):
|
|
251
|
-
raise TypeError(
|
|
252
|
-
'Session object must supply a '
|
|
253
|
-
f'callable "{http_method.value}" method',
|
|
254
|
-
)
|
|
255
|
-
|
|
256
|
-
response = request_callable(url, json=data, timeout=timeout, **kwargs)
|
|
257
|
-
response.raise_for_status()
|
|
232
|
+
path = Path(file_path)
|
|
233
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
258
234
|
|
|
259
|
-
#
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
235
|
+
# If no explicit format is provided, let File infer from extension.
|
|
236
|
+
if file_format is None:
|
|
237
|
+
records = File(path).write(data)
|
|
238
|
+
ext = path.suffix.lstrip('.').lower()
|
|
239
|
+
fmt = FileFormat.coerce(ext) if ext else FileFormat.JSON
|
|
240
|
+
else:
|
|
241
|
+
fmt = FileFormat.coerce(file_format)
|
|
242
|
+
records = File(path, fmt).write(data)
|
|
243
|
+
if fmt is FileFormat.CSV and records == 0:
|
|
244
|
+
message = 'No data to write'
|
|
245
|
+
else:
|
|
246
|
+
message = f'Data loaded to {path}'
|
|
264
247
|
|
|
265
248
|
return {
|
|
266
249
|
'status': 'success',
|
|
267
|
-
'
|
|
268
|
-
'
|
|
269
|
-
'response': payload,
|
|
270
|
-
'records': count_records(data),
|
|
271
|
-
'method': http_method.value.upper(),
|
|
250
|
+
'message': message,
|
|
251
|
+
'records': records,
|
|
272
252
|
}
|
|
273
253
|
|
|
274
254
|
|