etlplus 0.16.0__py3-none-any.whl → 0.16.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/README.md +24 -2
- etlplus/__init__.py +2 -0
- etlplus/api/__init__.py +14 -14
- etlplus/api/auth.py +9 -6
- etlplus/api/config.py +6 -6
- etlplus/api/endpoint_client.py +16 -16
- etlplus/api/enums.py +2 -2
- etlplus/api/errors.py +4 -4
- etlplus/api/pagination/__init__.py +6 -6
- etlplus/api/pagination/config.py +11 -9
- etlplus/api/rate_limiting/__init__.py +2 -2
- etlplus/api/rate_limiting/config.py +10 -10
- etlplus/api/rate_limiting/rate_limiter.py +2 -2
- etlplus/api/request_manager.py +4 -4
- etlplus/api/retry_manager.py +6 -6
- etlplus/api/transport.py +10 -10
- etlplus/api/types.py +47 -26
- etlplus/api/utils.py +49 -49
- etlplus/cli/README.md +9 -7
- etlplus/cli/commands.py +22 -22
- etlplus/cli/handlers.py +12 -13
- etlplus/cli/main.py +1 -1
- etlplus/{workflow/pipeline.py → config.py} +54 -91
- etlplus/connector/__init__.py +6 -6
- etlplus/connector/api.py +7 -7
- etlplus/connector/database.py +3 -3
- etlplus/connector/file.py +3 -3
- etlplus/connector/types.py +2 -2
- etlplus/database/README.md +7 -7
- etlplus/enums.py +35 -167
- etlplus/file/README.md +7 -5
- etlplus/file/accdb.py +2 -1
- etlplus/file/arrow.py +2 -1
- etlplus/file/bson.py +2 -1
- etlplus/file/cbor.py +2 -1
- etlplus/file/cfg.py +1 -1
- etlplus/file/conf.py +1 -1
- etlplus/file/dat.py +1 -1
- etlplus/file/dta.py +1 -1
- etlplus/file/duckdb.py +2 -1
- etlplus/file/enums.py +1 -1
- etlplus/file/fwf.py +2 -1
- etlplus/file/hbs.py +2 -1
- etlplus/file/hdf5.py +2 -1
- etlplus/file/ini.py +2 -1
- etlplus/file/ion.py +1 -1
- etlplus/file/jinja2.py +2 -1
- etlplus/file/log.py +1 -1
- etlplus/file/mat.py +1 -1
- etlplus/file/mdb.py +2 -1
- etlplus/file/msgpack.py +2 -1
- etlplus/file/mustache.py +2 -1
- etlplus/file/nc.py +1 -1
- etlplus/file/numbers.py +2 -1
- etlplus/file/ods.py +2 -1
- etlplus/file/pb.py +2 -1
- etlplus/file/pbf.py +2 -1
- etlplus/file/properties.py +2 -1
- etlplus/file/proto.py +2 -1
- etlplus/file/psv.py +2 -1
- etlplus/file/rda.py +2 -1
- etlplus/file/rds.py +1 -1
- etlplus/file/sas7bdat.py +2 -1
- etlplus/file/sav.py +1 -1
- etlplus/file/sqlite.py +2 -1
- etlplus/file/sylk.py +2 -1
- etlplus/file/tab.py +2 -1
- etlplus/file/toml.py +2 -1
- etlplus/file/vm.py +2 -1
- etlplus/file/wks.py +2 -1
- etlplus/file/xls.py +1 -1
- etlplus/file/xlsm.py +2 -2
- etlplus/file/xpt.py +2 -1
- etlplus/file/zsav.py +2 -1
- etlplus/ops/README.md +10 -9
- etlplus/ops/__init__.py +1 -0
- etlplus/ops/enums.py +173 -0
- etlplus/ops/extract.py +209 -22
- etlplus/ops/load.py +140 -34
- etlplus/ops/run.py +88 -103
- etlplus/ops/transform.py +46 -27
- etlplus/ops/types.py +147 -0
- etlplus/ops/utils.py +5 -5
- etlplus/ops/validate.py +13 -13
- etlplus/templates/README.md +11 -9
- etlplus/types.py +5 -102
- etlplus/workflow/README.md +0 -24
- etlplus/workflow/__init__.py +2 -4
- etlplus/workflow/dag.py +23 -1
- etlplus/workflow/jobs.py +15 -28
- etlplus/workflow/profile.py +4 -2
- {etlplus-0.16.0.dist-info → etlplus-0.16.7.dist-info}/METADATA +32 -28
- etlplus-0.16.7.dist-info/RECORD +143 -0
- etlplus-0.16.0.dist-info/RECORD +0 -141
- {etlplus-0.16.0.dist-info → etlplus-0.16.7.dist-info}/WHEEL +0 -0
- {etlplus-0.16.0.dist-info → etlplus-0.16.7.dist-info}/entry_points.txt +0 -0
- {etlplus-0.16.0.dist-info → etlplus-0.16.7.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.16.0.dist-info → etlplus-0.16.7.dist-info}/top_level.txt +0 -0
etlplus/file/sqlite.py
CHANGED
etlplus/file/sylk.py
CHANGED
etlplus/file/tab.py
CHANGED
etlplus/file/toml.py
CHANGED
etlplus/file/vm.py
CHANGED
etlplus/file/wks.py
CHANGED
etlplus/file/xls.py
CHANGED
etlplus/file/xlsm.py
CHANGED
etlplus/file/xpt.py
CHANGED
etlplus/file/zsav.py
CHANGED
etlplus/ops/README.md
CHANGED
|
@@ -1,14 +1,16 @@
|
|
|
1
|
-
# etlplus.ops
|
|
1
|
+
# `etlplus.ops` Subpackage
|
|
2
2
|
|
|
3
|
-
Documentation for the `etlplus.
|
|
3
|
+
Documentation for the `etlplus.ops` subpackage: core ETL primitives used by the CLI and pipeline
|
|
4
|
+
runner.
|
|
4
5
|
|
|
5
|
-
-
|
|
6
|
-
-
|
|
7
|
-
-
|
|
6
|
+
- Extract data from files, APIs, and databases (database extract is a placeholder today)
|
|
7
|
+
- Validate JSON-like data with schema-style rules
|
|
8
|
+
- Transform records (filter, map, select, sort, aggregate)
|
|
9
|
+
- Load data into files and APIs (database load is a placeholder today)
|
|
8
10
|
|
|
9
11
|
Back to project overview: see the top-level [README](../../README.md).
|
|
10
12
|
|
|
11
|
-
- [etlplus.ops
|
|
13
|
+
- [`etlplus.ops` Subpackage](#etlplusops-subpackage)
|
|
12
14
|
- [Validation Features](#validation-features)
|
|
13
15
|
- [Defining Validation Rules](#defining-validation-rules)
|
|
14
16
|
- [Example: Validating Data](#example-validating-data)
|
|
@@ -19,7 +21,6 @@ Back to project overview: see the top-level [README](../../README.md).
|
|
|
19
21
|
- Type checking (string, number, boolean, etc.)
|
|
20
22
|
- Required/optional fields
|
|
21
23
|
- Enum and pattern validation
|
|
22
|
-
- Custom rule support
|
|
23
24
|
|
|
24
25
|
## Defining Validation Rules
|
|
25
26
|
|
|
@@ -35,7 +36,7 @@ rules = {
|
|
|
35
36
|
## Example: Validating Data
|
|
36
37
|
|
|
37
38
|
```python
|
|
38
|
-
from etlplus.
|
|
39
|
+
from etlplus.ops import validate
|
|
39
40
|
|
|
40
41
|
result = validate({"name": "Alice", "age": 30}, rules)
|
|
41
42
|
if result["valid"]:
|
|
@@ -47,4 +48,4 @@ else:
|
|
|
47
48
|
## See Also
|
|
48
49
|
|
|
49
50
|
- Top-level CLI and library usage in the main [README](../../README.md)
|
|
50
|
-
- Validation utilities in [
|
|
51
|
+
- Validation utilities in [validate.py](validate.py)
|
etlplus/ops/__init__.py
CHANGED
etlplus/ops/enums.py
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.ops.enums` module.
|
|
3
|
+
|
|
4
|
+
Operation-specific enums and helpers.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import operator as _op
|
|
10
|
+
from statistics import fmean
|
|
11
|
+
|
|
12
|
+
from ..enums import CoercibleStrEnum
|
|
13
|
+
from ..types import StrStrMap
|
|
14
|
+
from .types import AggregateFunc
|
|
15
|
+
from .types import OperatorFunc
|
|
16
|
+
|
|
17
|
+
# SECTION: EXPORTS ========================================================= #
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
# Enums
|
|
22
|
+
'AggregateName',
|
|
23
|
+
'OperatorName',
|
|
24
|
+
'PipelineStep',
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# SECTION: ENUMS ============================================================ #
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class AggregateName(CoercibleStrEnum):
|
|
32
|
+
"""Supported aggregations with helpers."""
|
|
33
|
+
|
|
34
|
+
# -- Constants -- #
|
|
35
|
+
|
|
36
|
+
AVG = 'avg'
|
|
37
|
+
COUNT = 'count'
|
|
38
|
+
MAX = 'max'
|
|
39
|
+
MIN = 'min'
|
|
40
|
+
SUM = 'sum'
|
|
41
|
+
|
|
42
|
+
# -- Class Methods -- #
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def func(self) -> AggregateFunc:
|
|
46
|
+
"""
|
|
47
|
+
Get the aggregation function for this aggregation type.
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
AggregateFunc
|
|
52
|
+
The aggregation function corresponding to this aggregation type.
|
|
53
|
+
"""
|
|
54
|
+
if self is AggregateName.COUNT:
|
|
55
|
+
return lambda xs, n: n
|
|
56
|
+
if self is AggregateName.MAX:
|
|
57
|
+
return lambda xs, n: (max(xs) if xs else None)
|
|
58
|
+
if self is AggregateName.MIN:
|
|
59
|
+
return lambda xs, n: (min(xs) if xs else None)
|
|
60
|
+
if self is AggregateName.SUM:
|
|
61
|
+
return lambda xs, n: sum(xs)
|
|
62
|
+
|
|
63
|
+
# AVG
|
|
64
|
+
return lambda xs, n: (fmean(xs) if xs else 0.0)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class OperatorName(CoercibleStrEnum):
|
|
68
|
+
"""Supported comparison operators with helpers."""
|
|
69
|
+
|
|
70
|
+
# -- Constants -- #
|
|
71
|
+
|
|
72
|
+
EQ = 'eq'
|
|
73
|
+
NE = 'ne'
|
|
74
|
+
GT = 'gt'
|
|
75
|
+
GTE = 'gte'
|
|
76
|
+
LT = 'lt'
|
|
77
|
+
LTE = 'lte'
|
|
78
|
+
IN = 'in'
|
|
79
|
+
CONTAINS = 'contains'
|
|
80
|
+
|
|
81
|
+
# -- Getters -- #
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def func(self) -> OperatorFunc:
|
|
85
|
+
"""
|
|
86
|
+
Get the comparison function for this operator.
|
|
87
|
+
|
|
88
|
+
Returns
|
|
89
|
+
-------
|
|
90
|
+
OperatorFunc
|
|
91
|
+
The comparison function corresponding to this operator.
|
|
92
|
+
"""
|
|
93
|
+
match self:
|
|
94
|
+
case OperatorName.EQ:
|
|
95
|
+
return _op.eq
|
|
96
|
+
case OperatorName.NE:
|
|
97
|
+
return _op.ne
|
|
98
|
+
case OperatorName.GT:
|
|
99
|
+
return _op.gt
|
|
100
|
+
case OperatorName.GTE:
|
|
101
|
+
return _op.ge
|
|
102
|
+
case OperatorName.LT:
|
|
103
|
+
return _op.lt
|
|
104
|
+
case OperatorName.LTE:
|
|
105
|
+
return _op.le
|
|
106
|
+
case OperatorName.IN:
|
|
107
|
+
return lambda a, b: a in b
|
|
108
|
+
case OperatorName.CONTAINS:
|
|
109
|
+
return lambda a, b: b in a
|
|
110
|
+
|
|
111
|
+
# -- Class Methods -- #
|
|
112
|
+
|
|
113
|
+
@classmethod
|
|
114
|
+
def aliases(cls) -> StrStrMap:
|
|
115
|
+
"""
|
|
116
|
+
Return a mapping of common aliases for each enum member.
|
|
117
|
+
|
|
118
|
+
Returns
|
|
119
|
+
-------
|
|
120
|
+
StrStrMap
|
|
121
|
+
A mapping of alias names to their corresponding enum member names.
|
|
122
|
+
"""
|
|
123
|
+
return {
|
|
124
|
+
'==': 'eq',
|
|
125
|
+
'=': 'eq',
|
|
126
|
+
'!=': 'ne',
|
|
127
|
+
'<>': 'ne',
|
|
128
|
+
'>=': 'gte',
|
|
129
|
+
'≥': 'gte',
|
|
130
|
+
'<=': 'lte',
|
|
131
|
+
'≤': 'lte',
|
|
132
|
+
'>': 'gt',
|
|
133
|
+
'<': 'lt',
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class PipelineStep(CoercibleStrEnum):
|
|
138
|
+
"""Pipeline step names as an enum for internal orchestration."""
|
|
139
|
+
|
|
140
|
+
# -- Constants -- #
|
|
141
|
+
|
|
142
|
+
FILTER = 'filter'
|
|
143
|
+
MAP = 'map'
|
|
144
|
+
SELECT = 'select'
|
|
145
|
+
SORT = 'sort'
|
|
146
|
+
AGGREGATE = 'aggregate'
|
|
147
|
+
|
|
148
|
+
# -- Getters -- #
|
|
149
|
+
|
|
150
|
+
@property
|
|
151
|
+
def order(self) -> int:
|
|
152
|
+
"""
|
|
153
|
+
Get the execution order of this pipeline step.
|
|
154
|
+
|
|
155
|
+
Returns
|
|
156
|
+
-------
|
|
157
|
+
int
|
|
158
|
+
The execution order of this pipeline step.
|
|
159
|
+
"""
|
|
160
|
+
return _PIPELINE_ORDER_INDEX[self]
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
# SECTION: INTERNAL CONSTANTS ============================================== #
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
# Precomputed order index for PipelineStep; avoids recomputing on each access.
|
|
167
|
+
_PIPELINE_ORDER_INDEX: dict[PipelineStep, int] = {
|
|
168
|
+
PipelineStep.FILTER: 0,
|
|
169
|
+
PipelineStep.MAP: 1,
|
|
170
|
+
PipelineStep.SELECT: 2,
|
|
171
|
+
PipelineStep.SORT: 3,
|
|
172
|
+
PipelineStep.AGGREGATE: 4,
|
|
173
|
+
}
|
etlplus/ops/extract.py
CHANGED
|
@@ -6,11 +6,19 @@ Helpers to extract data from files, databases, and REST APIs.
|
|
|
6
6
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
|
+
from collections.abc import Mapping
|
|
9
10
|
from pathlib import Path
|
|
10
11
|
from typing import Any
|
|
11
12
|
from typing import cast
|
|
13
|
+
from urllib.parse import urlsplit
|
|
14
|
+
from urllib.parse import urlunsplit
|
|
12
15
|
|
|
16
|
+
from ..api import EndpointClient
|
|
13
17
|
from ..api import HttpMethod
|
|
18
|
+
from ..api import PaginationConfigDict
|
|
19
|
+
from ..api import RequestOptions
|
|
20
|
+
from ..api import compose_api_request_env
|
|
21
|
+
from ..api import paginate_with_client
|
|
14
22
|
from ..api.utils import resolve_request
|
|
15
23
|
from ..connector import DataConnectorType
|
|
16
24
|
from ..file import File
|
|
@@ -19,6 +27,7 @@ from ..types import JSONData
|
|
|
19
27
|
from ..types import JSONDict
|
|
20
28
|
from ..types import JSONList
|
|
21
29
|
from ..types import StrPath
|
|
30
|
+
from ..types import Timeout
|
|
22
31
|
|
|
23
32
|
# SECTION: EXPORTS ========================================================== #
|
|
24
33
|
|
|
@@ -32,50 +41,164 @@ __all__ = [
|
|
|
32
41
|
]
|
|
33
42
|
|
|
34
43
|
|
|
35
|
-
# SECTION: FUNCTIONS
|
|
44
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
36
45
|
|
|
37
46
|
|
|
38
|
-
def
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
47
|
+
def _build_client(
|
|
48
|
+
*,
|
|
49
|
+
base_url: str,
|
|
50
|
+
base_path: str | None,
|
|
51
|
+
endpoints: dict[str, str],
|
|
52
|
+
retry: Any,
|
|
53
|
+
retry_network_errors: bool,
|
|
54
|
+
session: Any,
|
|
55
|
+
) -> EndpointClient:
|
|
56
|
+
"""
|
|
57
|
+
Construct an API client with shared defaults.
|
|
58
|
+
|
|
59
|
+
Parameters
|
|
60
|
+
----------
|
|
61
|
+
base_url : str
|
|
62
|
+
API base URL.
|
|
63
|
+
base_path : str | None
|
|
64
|
+
Base path to prepend for endpoints.
|
|
65
|
+
endpoints : dict[str, str]
|
|
66
|
+
Endpoint name to path mappings.
|
|
67
|
+
retry : Any
|
|
68
|
+
Retry policy configuration.
|
|
69
|
+
retry_network_errors : bool
|
|
70
|
+
Whether to retry on network errors.
|
|
71
|
+
session : Any
|
|
72
|
+
Optional requests session.
|
|
73
|
+
|
|
74
|
+
Returns
|
|
75
|
+
-------
|
|
76
|
+
EndpointClient
|
|
77
|
+
Configured endpoint client instance.
|
|
78
|
+
"""
|
|
79
|
+
ClientClass = EndpointClient # noqa: N806
|
|
80
|
+
return ClientClass(
|
|
81
|
+
base_url=base_url,
|
|
82
|
+
base_path=base_path,
|
|
83
|
+
endpoints=endpoints,
|
|
84
|
+
retry=retry,
|
|
85
|
+
retry_network_errors=retry_network_errors,
|
|
86
|
+
session=session,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _extract_from_api_env(
|
|
91
|
+
env: Mapping[str, Any],
|
|
92
|
+
*,
|
|
93
|
+
use_client: bool,
|
|
42
94
|
) -> JSONData:
|
|
43
95
|
"""
|
|
44
|
-
Extract data from a
|
|
96
|
+
Extract API data from a normalized request environment.
|
|
45
97
|
|
|
46
98
|
Parameters
|
|
47
99
|
----------
|
|
48
|
-
|
|
49
|
-
API
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
**kwargs : Any
|
|
53
|
-
Extra arguments forwarded to the underlying ``requests`` call
|
|
54
|
-
(for example, ``timeout``). To use a pre-configured
|
|
55
|
-
:class:`requests.Session`, provide it via ``session``.
|
|
56
|
-
When omitted, ``timeout`` defaults to 10 seconds.
|
|
100
|
+
env : Mapping[str, Any]
|
|
101
|
+
Normalized environment describing API request parameters.
|
|
102
|
+
use_client : bool
|
|
103
|
+
Whether to use the endpoint client/pagination machinery.
|
|
57
104
|
|
|
58
105
|
Returns
|
|
59
106
|
-------
|
|
60
107
|
JSONData
|
|
61
|
-
|
|
108
|
+
Extracted payload.
|
|
62
109
|
|
|
63
110
|
Raises
|
|
64
111
|
------
|
|
65
|
-
|
|
66
|
-
If
|
|
67
|
-
method (for example, ``get``).
|
|
112
|
+
ValueError
|
|
113
|
+
If required parameters are missing.
|
|
68
114
|
"""
|
|
69
|
-
|
|
70
|
-
|
|
115
|
+
if (
|
|
116
|
+
use_client
|
|
117
|
+
and env.get('use_endpoints')
|
|
118
|
+
and env.get('base_url')
|
|
119
|
+
and env.get('endpoints_map')
|
|
120
|
+
and env.get('endpoint_key')
|
|
121
|
+
):
|
|
122
|
+
client = _build_client(
|
|
123
|
+
base_url=cast(str, env.get('base_url')),
|
|
124
|
+
base_path=cast(str | None, env.get('base_path')),
|
|
125
|
+
endpoints=cast(dict[str, str], env.get('endpoints_map', {})),
|
|
126
|
+
retry=env.get('retry'),
|
|
127
|
+
retry_network_errors=bool(env.get('retry_network_errors', False)),
|
|
128
|
+
session=env.get('session'),
|
|
129
|
+
)
|
|
130
|
+
return paginate_with_client(
|
|
131
|
+
client,
|
|
132
|
+
cast(str, env.get('endpoint_key')),
|
|
133
|
+
env.get('params'),
|
|
134
|
+
env.get('headers'),
|
|
135
|
+
env.get('timeout'),
|
|
136
|
+
env.get('pagination'),
|
|
137
|
+
cast(float | None, env.get('sleep_seconds')),
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
url = env.get('url')
|
|
141
|
+
if not url:
|
|
142
|
+
raise ValueError('API source missing URL')
|
|
143
|
+
|
|
144
|
+
if use_client:
|
|
145
|
+
parts = urlsplit(cast(str, url))
|
|
146
|
+
base = urlunsplit((parts.scheme, parts.netloc, '', '', ''))
|
|
147
|
+
client = _build_client(
|
|
148
|
+
base_url=base,
|
|
149
|
+
base_path=None,
|
|
150
|
+
endpoints={},
|
|
151
|
+
retry=env.get('retry'),
|
|
152
|
+
retry_network_errors=bool(env.get('retry_network_errors', False)),
|
|
153
|
+
session=env.get('session'),
|
|
154
|
+
)
|
|
155
|
+
request_options = RequestOptions(
|
|
156
|
+
params=cast(Mapping[str, Any] | None, env.get('params')),
|
|
157
|
+
headers=cast(Mapping[str, str] | None, env.get('headers')),
|
|
158
|
+
timeout=cast(Timeout | None, env.get('timeout')),
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
return client.paginate_url(
|
|
162
|
+
cast(str, url),
|
|
163
|
+
cast(PaginationConfigDict | None, env.get('pagination')),
|
|
164
|
+
request=request_options,
|
|
165
|
+
sleep_seconds=cast(float, env.get('sleep_seconds', 0.0)),
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
method = env.get('method', HttpMethod.GET)
|
|
169
|
+
timeout = env.get('timeout', None)
|
|
170
|
+
session = env.get('session', None)
|
|
171
|
+
request_kwargs = dict(env.get('request_kwargs') or {})
|
|
71
172
|
request_callable, timeout, _ = resolve_request(
|
|
72
173
|
method,
|
|
73
174
|
session=session,
|
|
74
175
|
timeout=timeout,
|
|
75
176
|
)
|
|
76
|
-
response = request_callable(
|
|
177
|
+
response = request_callable(
|
|
178
|
+
cast(str, url),
|
|
179
|
+
timeout=timeout,
|
|
180
|
+
**request_kwargs,
|
|
181
|
+
)
|
|
77
182
|
response.raise_for_status()
|
|
183
|
+
return _parse_api_response(response)
|
|
184
|
+
|
|
78
185
|
|
|
186
|
+
def _parse_api_response(
|
|
187
|
+
response: Any,
|
|
188
|
+
) -> JSONData:
|
|
189
|
+
"""
|
|
190
|
+
Parse API responses into a consistent JSON payload.
|
|
191
|
+
|
|
192
|
+
Parameters
|
|
193
|
+
----------
|
|
194
|
+
response : Any
|
|
195
|
+
HTTP response object exposing ``headers``, ``json()``, and ``text``.
|
|
196
|
+
|
|
197
|
+
Returns
|
|
198
|
+
-------
|
|
199
|
+
JSONData
|
|
200
|
+
Parsed JSON payload, or a fallback object with raw text.
|
|
201
|
+
"""
|
|
79
202
|
content_type = response.headers.get('content-type', '').lower()
|
|
80
203
|
if 'application/json' in content_type:
|
|
81
204
|
try:
|
|
@@ -99,6 +222,70 @@ def extract_from_api(
|
|
|
99
222
|
return {'content': response.text, 'content_type': content_type}
|
|
100
223
|
|
|
101
224
|
|
|
225
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def extract_from_api(
|
|
229
|
+
url: str,
|
|
230
|
+
method: HttpMethod | str = HttpMethod.GET,
|
|
231
|
+
**kwargs: Any,
|
|
232
|
+
) -> JSONData:
|
|
233
|
+
"""
|
|
234
|
+
Extract data from a REST API.
|
|
235
|
+
|
|
236
|
+
Parameters
|
|
237
|
+
----------
|
|
238
|
+
url : str
|
|
239
|
+
API endpoint URL.
|
|
240
|
+
method : HttpMethod | str, optional
|
|
241
|
+
HTTP method to use. Defaults to ``GET``.
|
|
242
|
+
**kwargs : Any
|
|
243
|
+
Extra arguments forwarded to the underlying ``requests`` call
|
|
244
|
+
(for example, ``timeout``). To use a pre-configured
|
|
245
|
+
:class:`requests.Session`, provide it via ``session``.
|
|
246
|
+
When omitted, ``timeout`` defaults to 10 seconds.
|
|
247
|
+
|
|
248
|
+
Returns
|
|
249
|
+
-------
|
|
250
|
+
JSONData
|
|
251
|
+
Parsed JSON payload, or a fallback object with raw text.
|
|
252
|
+
"""
|
|
253
|
+
env = {
|
|
254
|
+
'url': url,
|
|
255
|
+
'method': method,
|
|
256
|
+
'timeout': kwargs.pop('timeout', None),
|
|
257
|
+
'session': kwargs.pop('session', None),
|
|
258
|
+
'request_kwargs': kwargs,
|
|
259
|
+
}
|
|
260
|
+
return _extract_from_api_env(env, use_client=False)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def extract_from_api_source(
|
|
264
|
+
cfg: Any,
|
|
265
|
+
source_obj: Any,
|
|
266
|
+
overrides: dict[str, Any],
|
|
267
|
+
) -> JSONData:
|
|
268
|
+
"""
|
|
269
|
+
Extract data from a REST API source connector.
|
|
270
|
+
|
|
271
|
+
Parameters
|
|
272
|
+
----------
|
|
273
|
+
cfg : Any
|
|
274
|
+
Pipeline configuration.
|
|
275
|
+
source_obj : Any
|
|
276
|
+
Connector configuration.
|
|
277
|
+
overrides : dict[str, Any]
|
|
278
|
+
Extract-time overrides.
|
|
279
|
+
|
|
280
|
+
Returns
|
|
281
|
+
-------
|
|
282
|
+
JSONData
|
|
283
|
+
Extracted payload.
|
|
284
|
+
"""
|
|
285
|
+
env = compose_api_request_env(cfg, source_obj, overrides)
|
|
286
|
+
return _extract_from_api_env(env, use_client=True)
|
|
287
|
+
|
|
288
|
+
|
|
102
289
|
def extract_from_database(
|
|
103
290
|
connection_string: str,
|
|
104
291
|
) -> JSONList:
|