etlplus 0.12.10__py3-none-any.whl → 0.14.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/README.md +1 -1
- etlplus/__init__.py +1 -26
- etlplus/api/__init__.py +10 -0
- etlplus/api/config.py +36 -20
- etlplus/api/endpoint_client.py +3 -3
- etlplus/api/enums.py +51 -0
- etlplus/api/pagination/client.py +1 -1
- etlplus/api/rate_limiting/config.py +13 -1
- etlplus/api/rate_limiting/rate_limiter.py +8 -11
- etlplus/api/request_manager.py +11 -6
- etlplus/api/transport.py +14 -2
- etlplus/api/types.py +7 -6
- etlplus/{run_helpers.py → api/utils.py} +205 -153
- etlplus/cli/handlers.py +17 -7
- etlplus/config/jobs.py +14 -4
- etlplus/dag.py +103 -0
- etlplus/enums.py +0 -32
- etlplus/file/cfg.py +2 -2
- etlplus/file/conf.py +2 -2
- etlplus/file/dta.py +77 -0
- etlplus/file/enums.py +10 -4
- etlplus/file/hbs.py +78 -0
- etlplus/file/hdf5.py +78 -0
- etlplus/file/jinja2.py +78 -0
- etlplus/file/mat.py +78 -0
- etlplus/file/mustache.py +78 -0
- etlplus/file/nc.py +78 -0
- etlplus/file/numbers.py +75 -0
- etlplus/file/ods.py +79 -0
- etlplus/file/properties.py +13 -13
- etlplus/file/rda.py +78 -0
- etlplus/file/rds.py +78 -0
- etlplus/file/sas7bdat.py +78 -0
- etlplus/file/sav.py +77 -0
- etlplus/file/sylk.py +77 -0
- etlplus/file/toml.py +1 -1
- etlplus/file/vm.py +78 -0
- etlplus/file/wks.py +77 -0
- etlplus/file/xlsm.py +79 -0
- etlplus/file/xpt.py +78 -0
- etlplus/file/zsav.py +77 -0
- etlplus/{validation → ops}/README.md +2 -2
- etlplus/ops/__init__.py +61 -0
- etlplus/{extract.py → ops/extract.py} +78 -94
- etlplus/{load.py → ops/load.py} +73 -93
- etlplus/{run.py → ops/run.py} +140 -110
- etlplus/{transform.py → ops/transform.py} +75 -68
- etlplus/{validation → ops}/utils.py +80 -15
- etlplus/{validate.py → ops/validate.py} +19 -9
- etlplus/types.py +2 -2
- {etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/METADATA +91 -60
- {etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/RECORD +56 -35
- etlplus/validation/__init__.py +0 -44
- {etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/WHEEL +0 -0
- {etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/entry_points.txt +0 -0
- {etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.12.10.dist-info → etlplus-0.14.3.dist-info}/top_level.txt +0 -0
etlplus/{load.py → ops/load.py}
RENAMED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
:mod:`etlplus.load` module.
|
|
2
|
+
:mod:`etlplus.ops.load` module.
|
|
3
3
|
|
|
4
4
|
Helpers to load data into files, databases, and REST APIs.
|
|
5
5
|
"""
|
|
@@ -12,17 +12,16 @@ from pathlib import Path
|
|
|
12
12
|
from typing import Any
|
|
13
13
|
from typing import cast
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
from
|
|
18
|
-
from
|
|
19
|
-
from
|
|
20
|
-
from
|
|
21
|
-
from
|
|
22
|
-
from
|
|
23
|
-
from
|
|
24
|
-
from
|
|
25
|
-
from .utils import count_records
|
|
15
|
+
from ..api import HttpMethod
|
|
16
|
+
from ..api.utils import resolve_request
|
|
17
|
+
from ..enums import DataConnectorType
|
|
18
|
+
from ..file import File
|
|
19
|
+
from ..file import FileFormat
|
|
20
|
+
from ..types import JSONData
|
|
21
|
+
from ..types import JSONDict
|
|
22
|
+
from ..types import JSONList
|
|
23
|
+
from ..types import StrPath
|
|
24
|
+
from ..utils import count_records
|
|
26
25
|
|
|
27
26
|
# SECTION: INTERNAL FUNCTIONS ============================================== #
|
|
28
27
|
|
|
@@ -69,7 +68,7 @@ def _parse_json_string(
|
|
|
69
68
|
# SECTION: FUNCTIONS ======================================================== #
|
|
70
69
|
|
|
71
70
|
|
|
72
|
-
# --
|
|
71
|
+
# -- Helpers -- #
|
|
73
72
|
|
|
74
73
|
|
|
75
74
|
def load_data(
|
|
@@ -119,58 +118,59 @@ def load_data(
|
|
|
119
118
|
)
|
|
120
119
|
|
|
121
120
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
def load_to_file(
|
|
121
|
+
def load_to_api(
|
|
126
122
|
data: JSONData,
|
|
127
|
-
|
|
128
|
-
|
|
123
|
+
url: str,
|
|
124
|
+
method: HttpMethod | str,
|
|
125
|
+
**kwargs: Any,
|
|
129
126
|
) -> JSONDict:
|
|
130
127
|
"""
|
|
131
|
-
|
|
128
|
+
Load data to a REST API.
|
|
132
129
|
|
|
133
130
|
Parameters
|
|
134
131
|
----------
|
|
135
132
|
data : JSONData
|
|
136
|
-
Data to
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
133
|
+
Data to send as JSON.
|
|
134
|
+
url : str
|
|
135
|
+
API endpoint URL.
|
|
136
|
+
method : HttpMethod | str
|
|
137
|
+
HTTP method to use.
|
|
138
|
+
**kwargs : Any
|
|
139
|
+
Extra arguments forwarded to ``requests`` (e.g., ``timeout``).
|
|
140
|
+
When omitted, ``timeout`` defaults to 10 seconds.
|
|
142
141
|
|
|
143
142
|
Returns
|
|
144
143
|
-------
|
|
145
144
|
JSONDict
|
|
146
|
-
Result dictionary
|
|
145
|
+
Result dictionary including response payload or text.
|
|
147
146
|
"""
|
|
148
|
-
|
|
149
|
-
|
|
147
|
+
# Apply a conservative timeout to guard against hanging requests.
|
|
148
|
+
timeout = kwargs.pop('timeout', 10.0)
|
|
149
|
+
session = kwargs.pop('session', None)
|
|
150
|
+
request_callable, timeout, http_method = resolve_request(
|
|
151
|
+
method,
|
|
152
|
+
session=session,
|
|
153
|
+
timeout=timeout,
|
|
154
|
+
)
|
|
155
|
+
response = request_callable(url, json=data, timeout=timeout, **kwargs)
|
|
156
|
+
response.raise_for_status()
|
|
150
157
|
|
|
151
|
-
#
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
else:
|
|
157
|
-
fmt = FileFormat.coerce(file_format)
|
|
158
|
-
records = File(path, fmt).write(data)
|
|
159
|
-
if fmt is FileFormat.CSV and records == 0:
|
|
160
|
-
message = 'No data to write'
|
|
161
|
-
else:
|
|
162
|
-
message = f'Data loaded to {path}'
|
|
158
|
+
# Try JSON first, fall back to text.
|
|
159
|
+
try:
|
|
160
|
+
payload: Any = response.json()
|
|
161
|
+
except ValueError:
|
|
162
|
+
payload = response.text
|
|
163
163
|
|
|
164
164
|
return {
|
|
165
165
|
'status': 'success',
|
|
166
|
-
'
|
|
167
|
-
'
|
|
166
|
+
'status_code': response.status_code,
|
|
167
|
+
'message': f'Data loaded to {url}',
|
|
168
|
+
'response': payload,
|
|
169
|
+
'records': count_records(data),
|
|
170
|
+
'method': http_method.value.upper(),
|
|
168
171
|
}
|
|
169
172
|
|
|
170
173
|
|
|
171
|
-
# -- Database Loading (Placeholder) -- #
|
|
172
|
-
|
|
173
|
-
|
|
174
174
|
def load_to_database(
|
|
175
175
|
data: JSONData,
|
|
176
176
|
connection_string: str,
|
|
@@ -206,69 +206,49 @@ def load_to_database(
|
|
|
206
206
|
}
|
|
207
207
|
|
|
208
208
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
def load_to_api(
|
|
209
|
+
def load_to_file(
|
|
213
210
|
data: JSONData,
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
**kwargs: Any,
|
|
211
|
+
file_path: StrPath,
|
|
212
|
+
file_format: FileFormat | str | None = None,
|
|
217
213
|
) -> JSONDict:
|
|
218
214
|
"""
|
|
219
|
-
|
|
215
|
+
Persist data to a local file.
|
|
220
216
|
|
|
221
217
|
Parameters
|
|
222
218
|
----------
|
|
223
219
|
data : JSONData
|
|
224
|
-
Data to
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
Extra arguments forwarded to ``requests`` (e.g., ``timeout``).
|
|
220
|
+
Data to write.
|
|
221
|
+
file_path : StrPath
|
|
222
|
+
Target file path.
|
|
223
|
+
file_format : FileFormat | str | None, optional
|
|
224
|
+
Output format. If omitted (None), the format is inferred from the
|
|
225
|
+
filename extension.
|
|
231
226
|
|
|
232
227
|
Returns
|
|
233
228
|
-------
|
|
234
229
|
JSONDict
|
|
235
|
-
Result dictionary
|
|
236
|
-
|
|
237
|
-
Raises
|
|
238
|
-
------
|
|
239
|
-
TypeError
|
|
240
|
-
If the session object is not valid.
|
|
230
|
+
Result dictionary with status and record count.
|
|
241
231
|
"""
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
# Apply a conservative timeout to guard against hanging requests.
|
|
245
|
-
timeout = kwargs.pop('timeout', 10.0)
|
|
246
|
-
session = kwargs.pop('session', None)
|
|
247
|
-
requester = session or requests
|
|
248
|
-
|
|
249
|
-
request_callable = getattr(requester, http_method.value, None)
|
|
250
|
-
if not callable(request_callable):
|
|
251
|
-
raise TypeError(
|
|
252
|
-
'Session object must supply a '
|
|
253
|
-
f'callable "{http_method.value}" method',
|
|
254
|
-
)
|
|
255
|
-
|
|
256
|
-
response = request_callable(url, json=data, timeout=timeout, **kwargs)
|
|
257
|
-
response.raise_for_status()
|
|
232
|
+
path = Path(file_path)
|
|
233
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
258
234
|
|
|
259
|
-
#
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
235
|
+
# If no explicit format is provided, let File infer from extension.
|
|
236
|
+
if file_format is None:
|
|
237
|
+
records = File(path).write(data)
|
|
238
|
+
ext = path.suffix.lstrip('.').lower()
|
|
239
|
+
fmt = FileFormat.coerce(ext) if ext else FileFormat.JSON
|
|
240
|
+
else:
|
|
241
|
+
fmt = FileFormat.coerce(file_format)
|
|
242
|
+
records = File(path, fmt).write(data)
|
|
243
|
+
if fmt is FileFormat.CSV and records == 0:
|
|
244
|
+
message = 'No data to write'
|
|
245
|
+
else:
|
|
246
|
+
message = f'Data loaded to {path}'
|
|
264
247
|
|
|
265
248
|
return {
|
|
266
249
|
'status': 'success',
|
|
267
|
-
'
|
|
268
|
-
'
|
|
269
|
-
'response': payload,
|
|
270
|
-
'records': count_records(data),
|
|
271
|
-
'method': http_method.value.upper(),
|
|
250
|
+
'message': message,
|
|
251
|
+
'records': records,
|
|
272
252
|
}
|
|
273
253
|
|
|
274
254
|
|
etlplus/{run.py → ops/run.py}
RENAMED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
:mod:`etlplus.run` module.
|
|
2
|
+
:mod:`etlplus.ops.run` module.
|
|
3
3
|
|
|
4
4
|
A module for running ETL jobs defined in YAML configurations.
|
|
5
5
|
"""
|
|
@@ -9,126 +9,78 @@ from __future__ import annotations
|
|
|
9
9
|
from collections.abc import Mapping
|
|
10
10
|
from typing import Any
|
|
11
11
|
from typing import Final
|
|
12
|
-
from typing import TypedDict
|
|
13
12
|
from typing import cast
|
|
14
13
|
from urllib.parse import urlsplit
|
|
15
14
|
from urllib.parse import urlunsplit
|
|
16
15
|
|
|
17
|
-
import
|
|
18
|
-
|
|
19
|
-
from
|
|
20
|
-
from
|
|
21
|
-
from
|
|
22
|
-
from
|
|
23
|
-
from
|
|
24
|
-
from
|
|
25
|
-
from
|
|
16
|
+
from ..api import EndpointClient # noqa: F401 (re-exported for tests)
|
|
17
|
+
from ..api import HttpMethod
|
|
18
|
+
from ..api import PaginationConfigMap
|
|
19
|
+
from ..api import RequestOptions
|
|
20
|
+
from ..api import compose_api_request_env
|
|
21
|
+
from ..api import compose_api_target_env
|
|
22
|
+
from ..api import paginate_with_client
|
|
23
|
+
from ..config import load_pipeline_config
|
|
24
|
+
from ..enums import DataConnectorType
|
|
25
|
+
from ..file import FileFormat
|
|
26
|
+
from ..types import JSONData
|
|
27
|
+
from ..types import JSONDict
|
|
28
|
+
from ..types import PipelineConfig
|
|
29
|
+
from ..types import StrPath
|
|
30
|
+
from ..types import Timeout
|
|
31
|
+
from ..utils import print_json
|
|
26
32
|
from .extract import extract
|
|
27
33
|
from .load import load
|
|
28
|
-
from .run_helpers import compose_api_request_env
|
|
29
|
-
from .run_helpers import compose_api_target_env
|
|
30
|
-
from .run_helpers import paginate_with_client
|
|
31
34
|
from .transform import transform
|
|
32
|
-
from .
|
|
33
|
-
from .types import Timeout
|
|
34
|
-
from .utils import print_json
|
|
35
|
+
from .utils import maybe_validate
|
|
35
36
|
from .validate import validate
|
|
36
|
-
from .validation.utils import maybe_validate
|
|
37
37
|
|
|
38
38
|
# SECTION: EXPORTS ========================================================== #
|
|
39
39
|
|
|
40
40
|
|
|
41
|
-
__all__ = [
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
41
|
+
__all__ = [
|
|
42
|
+
# Functions
|
|
43
|
+
'run',
|
|
44
|
+
'run_pipeline',
|
|
45
|
+
]
|
|
46
46
|
|
|
47
|
-
class BaseApiHttpEnv(TypedDict, total=False):
|
|
48
|
-
"""
|
|
49
|
-
Common HTTP request environment for API interactions.
|
|
50
|
-
|
|
51
|
-
Fields shared by both source-side and target-side API operations.
|
|
52
|
-
"""
|
|
53
47
|
|
|
54
|
-
|
|
55
|
-
url: Url | None
|
|
56
|
-
headers: dict[str, str]
|
|
57
|
-
timeout: Timeout
|
|
58
|
-
|
|
59
|
-
# Session
|
|
60
|
-
session: requests.Session | None
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
class ApiRequestEnv(BaseApiHttpEnv, total=False):
|
|
64
|
-
"""
|
|
65
|
-
Composed request environment for API sources.
|
|
48
|
+
# SECTION: CONSTANTS ======================================================== #
|
|
66
49
|
|
|
67
|
-
Returned by ``compose_api_request_env`` (run_helpers) and consumed by the
|
|
68
|
-
API extract branch. Values are fully merged with endpoint/API defaults and
|
|
69
|
-
job-level overrides, preserving the original precedence and behavior.
|
|
70
|
-
"""
|
|
71
50
|
|
|
72
|
-
|
|
73
|
-
use_endpoints: bool
|
|
74
|
-
base_url: str | None
|
|
75
|
-
base_path: str | None
|
|
76
|
-
endpoints_map: dict[str, str] | None
|
|
77
|
-
endpoint_key: str | None
|
|
51
|
+
DEFAULT_CONFIG_PATH: Final[str] = 'in/pipeline.yml'
|
|
78
52
|
|
|
79
|
-
# Request
|
|
80
|
-
params: dict[str, Any]
|
|
81
|
-
pagination: PaginationConfigMap | None
|
|
82
|
-
sleep_seconds: float
|
|
83
53
|
|
|
84
|
-
|
|
85
|
-
retry: RetryPolicy | None
|
|
86
|
-
retry_network_errors: bool
|
|
54
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
87
55
|
|
|
88
56
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
Returned by ``compose_api_target_env`` (run_helpers) and consumed by the
|
|
94
|
-
API load branch. Values are merged from the target object, optional
|
|
95
|
-
API/endpoint reference, and job-level overrides, preserving original
|
|
96
|
-
precedence and behavior.
|
|
97
|
-
|
|
98
|
-
Notes
|
|
99
|
-
-----
|
|
100
|
-
- Precedence for inherited values matches original logic:
|
|
101
|
-
overrides -> target -> API profile defaults.
|
|
102
|
-
- Target composition does not include pagination/rate-limit/retry since
|
|
103
|
-
loads are single-request operations; only headers/timeout/session
|
|
104
|
-
apply.
|
|
57
|
+
def _resolve_validation_config(
|
|
58
|
+
job_obj: Any,
|
|
59
|
+
cfg: Any,
|
|
60
|
+
) -> tuple[bool, dict[str, Any], str, str]:
|
|
105
61
|
"""
|
|
62
|
+
Resolve validation settings for a job with safe defaults.
|
|
106
63
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
Minimal session configuration schema accepted by this runner.
|
|
64
|
+
Parameters
|
|
65
|
+
----------
|
|
66
|
+
job_obj : Any
|
|
67
|
+
Job configuration object.
|
|
68
|
+
cfg : Any
|
|
69
|
+
Pipeline configuration object with validations.
|
|
114
70
|
|
|
115
|
-
|
|
71
|
+
Returns
|
|
72
|
+
-------
|
|
73
|
+
tuple[bool, dict[str, Any], str, str]
|
|
74
|
+
Tuple of (enabled, rules, severity, phase).
|
|
116
75
|
"""
|
|
76
|
+
val_ref = job_obj.validate
|
|
77
|
+
if val_ref is None:
|
|
78
|
+
return False, {}, 'error', 'before_transform'
|
|
117
79
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
cert: Any # str or (cert, key)
|
|
123
|
-
proxies: Mapping[str, Any]
|
|
124
|
-
cookies: Mapping[str, Any]
|
|
125
|
-
trust_env: bool
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
# SECTION: CONSTANTS ======================================================== #
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
DEFAULT_CONFIG_PATH: Final[str] = 'in/pipeline.yml'
|
|
80
|
+
rules = cfg.validations.get(val_ref.ruleset, {})
|
|
81
|
+
severity = (val_ref.severity or 'error').lower()
|
|
82
|
+
phase = (val_ref.phase or 'before_transform').lower()
|
|
83
|
+
return True, rules, severity, phase
|
|
132
84
|
|
|
133
85
|
|
|
134
86
|
# SECTION: FUNCTIONS ======================================================== #
|
|
@@ -207,7 +159,7 @@ def run(
|
|
|
207
159
|
and env.get('endpoint_key')
|
|
208
160
|
):
|
|
209
161
|
# Construct client using module-level EndpointClient so tests
|
|
210
|
-
# can monkeypatch this class on etlplus.run.
|
|
162
|
+
# can monkeypatch this class on etlplus.ops.run.
|
|
211
163
|
ClientClass = EndpointClient # noqa: N806
|
|
212
164
|
client = ClientClass(
|
|
213
165
|
base_url=cast(str, env['base_url']),
|
|
@@ -263,19 +215,10 @@ def run(
|
|
|
263
215
|
# keep explicit guard for defensive programming.
|
|
264
216
|
raise ValueError(f'Unsupported source type: {stype_raw}')
|
|
265
217
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
# Type narrowing for static checkers
|
|
271
|
-
assert val_ref is not None
|
|
272
|
-
rules = cfg.validations.get(val_ref.ruleset, {})
|
|
273
|
-
severity = (val_ref.severity or 'error').lower()
|
|
274
|
-
phase = (val_ref.phase or 'before_transform').lower()
|
|
275
|
-
else:
|
|
276
|
-
rules = {}
|
|
277
|
-
severity = 'error'
|
|
278
|
-
phase = 'before_transform'
|
|
218
|
+
enabled_validation, rules, severity, phase = _resolve_validation_config(
|
|
219
|
+
job_obj,
|
|
220
|
+
cfg,
|
|
221
|
+
)
|
|
279
222
|
|
|
280
223
|
# Pre-transform validation (if configured).
|
|
281
224
|
data = maybe_validate(
|
|
@@ -361,3 +304,90 @@ def run(
|
|
|
361
304
|
# Return the terminal load result directly; callers (e.g., CLI) can wrap
|
|
362
305
|
# it in their own envelope when needed.
|
|
363
306
|
return cast(JSONDict, result)
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def run_pipeline(
|
|
310
|
+
*,
|
|
311
|
+
source_type: DataConnectorType | str | None = None,
|
|
312
|
+
source: StrPath | JSONData | None = None,
|
|
313
|
+
operations: PipelineConfig | None = None,
|
|
314
|
+
target_type: DataConnectorType | str | None = None,
|
|
315
|
+
target: StrPath | None = None,
|
|
316
|
+
file_format: FileFormat | str | None = None,
|
|
317
|
+
method: HttpMethod | str | None = None,
|
|
318
|
+
**kwargs: Any,
|
|
319
|
+
) -> JSONData:
|
|
320
|
+
"""
|
|
321
|
+
Run a single extract-transform-load flow without a YAML config.
|
|
322
|
+
|
|
323
|
+
Parameters
|
|
324
|
+
----------
|
|
325
|
+
source_type : DataConnectorType | str | None, optional
|
|
326
|
+
Connector type for extraction. When ``None``, ``source`` is assumed
|
|
327
|
+
to be pre-loaded data and extraction is skipped.
|
|
328
|
+
source : StrPath | JSONData | None, optional
|
|
329
|
+
Data source for extraction or the pre-loaded payload when
|
|
330
|
+
``source_type`` is ``None``.
|
|
331
|
+
operations : PipelineConfig | None, optional
|
|
332
|
+
Transform configuration passed to :func:`etlplus.ops.transform`.
|
|
333
|
+
target_type : DataConnectorType | str | None, optional
|
|
334
|
+
Connector type for loading. When ``None``, load is skipped and the
|
|
335
|
+
transformed data is returned.
|
|
336
|
+
target : StrPath | None, optional
|
|
337
|
+
Target for loading (file path, connection string, or API URL).
|
|
338
|
+
file_format : FileFormat | str | None, optional
|
|
339
|
+
File format for file sources/targets (forwarded to extract/load).
|
|
340
|
+
method : HttpMethod | str | None, optional
|
|
341
|
+
HTTP method for API loads (forwarded to :func:`etlplus.ops.load`).
|
|
342
|
+
**kwargs : Any
|
|
343
|
+
Extra keyword arguments forwarded to extract/load for API options
|
|
344
|
+
(headers, timeout, session, etc.).
|
|
345
|
+
|
|
346
|
+
Returns
|
|
347
|
+
-------
|
|
348
|
+
JSONData
|
|
349
|
+
Transformed data or the load result payload.
|
|
350
|
+
|
|
351
|
+
Raises
|
|
352
|
+
------
|
|
353
|
+
TypeError
|
|
354
|
+
Raised when extracted data is not a dict or list of dicts and no
|
|
355
|
+
target is specified.
|
|
356
|
+
ValueError
|
|
357
|
+
Raised when required source/target inputs are missing.
|
|
358
|
+
"""
|
|
359
|
+
if source_type is None:
|
|
360
|
+
if source is None:
|
|
361
|
+
raise ValueError('source or source_type is required')
|
|
362
|
+
data = source
|
|
363
|
+
else:
|
|
364
|
+
if source is None:
|
|
365
|
+
raise ValueError('source is required when source_type is set')
|
|
366
|
+
data = extract(
|
|
367
|
+
source_type,
|
|
368
|
+
cast(StrPath, source),
|
|
369
|
+
file_format=file_format,
|
|
370
|
+
**kwargs,
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
if operations:
|
|
374
|
+
data = transform(data, operations)
|
|
375
|
+
|
|
376
|
+
if target_type is None:
|
|
377
|
+
if not isinstance(data, (dict, list)):
|
|
378
|
+
raise TypeError(
|
|
379
|
+
f'Expected data to be dict or list of dicts, '
|
|
380
|
+
f'got {type(data).__name__}',
|
|
381
|
+
)
|
|
382
|
+
return data
|
|
383
|
+
if target is None:
|
|
384
|
+
raise ValueError('target is required when target_type is set')
|
|
385
|
+
|
|
386
|
+
return load(
|
|
387
|
+
data,
|
|
388
|
+
target_type,
|
|
389
|
+
target,
|
|
390
|
+
file_format=file_format,
|
|
391
|
+
method=method,
|
|
392
|
+
**kwargs,
|
|
393
|
+
)
|