etlplus 0.15.5__py3-none-any.whl → 0.16.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/api/types.py +32 -11
- etlplus/cli/constants.py +1 -1
- etlplus/connector/__init__.py +43 -0
- etlplus/connector/api.py +161 -0
- etlplus/connector/connector.py +26 -0
- etlplus/connector/core.py +132 -0
- etlplus/connector/database.py +122 -0
- etlplus/connector/enums.py +52 -0
- etlplus/connector/file.py +120 -0
- etlplus/connector/types.py +40 -0
- etlplus/connector/utils.py +122 -0
- etlplus/enums.py +0 -32
- etlplus/ops/extract.py +210 -23
- etlplus/ops/load.py +141 -35
- etlplus/ops/run.py +86 -101
- etlplus/ops/transform.py +30 -11
- etlplus/types.py +3 -2
- etlplus/workflow/__init__.py +2 -11
- etlplus/workflow/dag.py +23 -1
- etlplus/workflow/jobs.py +15 -26
- etlplus/workflow/pipeline.py +39 -56
- etlplus/workflow/profile.py +4 -2
- {etlplus-0.15.5.dist-info → etlplus-0.16.2.dist-info}/METADATA +1 -1
- {etlplus-0.15.5.dist-info → etlplus-0.16.2.dist-info}/RECORD +28 -21
- etlplus/workflow/connector.py +0 -386
- etlplus/workflow/types.py +0 -115
- {etlplus-0.15.5.dist-info → etlplus-0.16.2.dist-info}/WHEEL +0 -0
- {etlplus-0.15.5.dist-info → etlplus-0.16.2.dist-info}/entry_points.txt +0 -0
- {etlplus-0.15.5.dist-info → etlplus-0.16.2.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.15.5.dist-info → etlplus-0.16.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.connector.file` module.
|
|
3
|
+
|
|
4
|
+
File connector configuration dataclass.
|
|
5
|
+
|
|
6
|
+
Notes
|
|
7
|
+
-----
|
|
8
|
+
- TypedDicts in this module are intentionally ``total=False`` and are not
|
|
9
|
+
enforced at runtime.
|
|
10
|
+
- :meth:`*.from_obj` constructors accept :class:`Mapping[str, Any]` and perform
|
|
11
|
+
tolerant parsing and light casting. This keeps the runtime permissive while
|
|
12
|
+
improving autocomplete and static analysis for contributors.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from dataclasses import dataclass
|
|
18
|
+
from dataclasses import field
|
|
19
|
+
from typing import Any
|
|
20
|
+
from typing import Self
|
|
21
|
+
from typing import TypedDict
|
|
22
|
+
from typing import overload
|
|
23
|
+
|
|
24
|
+
from ..types import StrAnyMap
|
|
25
|
+
from ..utils import coerce_dict
|
|
26
|
+
from .core import ConnectorBase
|
|
27
|
+
from .enums import DataConnectorType
|
|
28
|
+
from .types import ConnectorType
|
|
29
|
+
|
|
30
|
+
# SECTION: EXPORTS ========================================================== #
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
__all__ = [
|
|
34
|
+
'ConnectorFile',
|
|
35
|
+
'ConnectorFileConfigMap',
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# SECTION: TYPED DICTS ====================================================== #
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class ConnectorFileConfigMap(TypedDict, total=False):
|
|
43
|
+
"""
|
|
44
|
+
Shape accepted by :meth:`ConnectorFile.from_obj` (all keys optional).
|
|
45
|
+
|
|
46
|
+
See Also
|
|
47
|
+
--------
|
|
48
|
+
- :meth:`etlplus.connector.file.ConnectorFile.from_obj`
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
name: str
|
|
52
|
+
type: ConnectorType
|
|
53
|
+
format: str
|
|
54
|
+
path: str
|
|
55
|
+
options: StrAnyMap
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# SECTION: DATA CLASSES ===================================================== #
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@dataclass(kw_only=True, slots=True)
|
|
62
|
+
class ConnectorFile(ConnectorBase):
|
|
63
|
+
"""
|
|
64
|
+
Configuration for a file-based data connector.
|
|
65
|
+
|
|
66
|
+
Attributes
|
|
67
|
+
----------
|
|
68
|
+
type : ConnectorType
|
|
69
|
+
Connector kind, always ``'file'``.
|
|
70
|
+
format : str | None
|
|
71
|
+
File format (e.g., ``'json'``, ``'csv'``).
|
|
72
|
+
path : str | None
|
|
73
|
+
File path or URI.
|
|
74
|
+
options : dict[str, Any]
|
|
75
|
+
Reader/writer format options.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
# -- Attributes -- #
|
|
79
|
+
|
|
80
|
+
type: ConnectorType = DataConnectorType.FILE
|
|
81
|
+
format: str | None = None
|
|
82
|
+
path: str | None = None
|
|
83
|
+
options: dict[str, Any] = field(default_factory=dict)
|
|
84
|
+
|
|
85
|
+
# -- Class Methods -- #
|
|
86
|
+
|
|
87
|
+
@classmethod
|
|
88
|
+
@overload
|
|
89
|
+
def from_obj(cls, obj: ConnectorFileConfigMap) -> Self: ...
|
|
90
|
+
|
|
91
|
+
@classmethod
|
|
92
|
+
@overload
|
|
93
|
+
def from_obj(cls, obj: StrAnyMap) -> Self: ...
|
|
94
|
+
|
|
95
|
+
@classmethod
|
|
96
|
+
def from_obj(
|
|
97
|
+
cls,
|
|
98
|
+
obj: StrAnyMap,
|
|
99
|
+
) -> Self:
|
|
100
|
+
"""
|
|
101
|
+
Parse a mapping into a ``ConnectorFile`` instance.
|
|
102
|
+
|
|
103
|
+
Parameters
|
|
104
|
+
----------
|
|
105
|
+
obj : StrAnyMap
|
|
106
|
+
Mapping with at least ``name``.
|
|
107
|
+
|
|
108
|
+
Returns
|
|
109
|
+
-------
|
|
110
|
+
Self
|
|
111
|
+
Parsed connector instance.
|
|
112
|
+
"""
|
|
113
|
+
name = cls._require_name(obj, kind='File')
|
|
114
|
+
|
|
115
|
+
return cls(
|
|
116
|
+
name=name,
|
|
117
|
+
format=obj.get('format'),
|
|
118
|
+
path=obj.get('path'),
|
|
119
|
+
options=coerce_dict(obj.get('options')),
|
|
120
|
+
)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.connector.types` module.
|
|
3
|
+
|
|
4
|
+
Connector type aliases for :mod:`etlplus.connector`.
|
|
5
|
+
|
|
6
|
+
Examples
|
|
7
|
+
--------
|
|
8
|
+
>>> from etlplus.connector import Connector
|
|
9
|
+
>>> src: Connector = {
|
|
10
|
+
>>> "type": "file",
|
|
11
|
+
>>> "path": "/data/input.csv",
|
|
12
|
+
>>> }
|
|
13
|
+
>>> tgt: Connector = {
|
|
14
|
+
>>> "type": "database",
|
|
15
|
+
>>> "connection_string": "postgresql://user:pass@localhost/db",
|
|
16
|
+
>>> }
|
|
17
|
+
>>> from etlplus.api import RetryPolicy
|
|
18
|
+
>>> rp: RetryPolicy = {"max_attempts": 3, "backoff": 0.5}
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
from typing import Literal
|
|
24
|
+
|
|
25
|
+
from .enums import DataConnectorType
|
|
26
|
+
|
|
27
|
+
# SECTION: EXPORTS ========================================================= #
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
__all__ = [
|
|
31
|
+
# Type Aliases
|
|
32
|
+
'ConnectorType',
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# SECTION: TYPE ALIASES ===================================================== #
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# Literal type for supported connector kinds (strings or enum members)
|
|
40
|
+
type ConnectorType = DataConnectorType | Literal['api', 'database', 'file']
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.connector.utils` module.
|
|
3
|
+
|
|
4
|
+
Shared connector parsing helpers.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from collections.abc import Mapping
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from .api import ConnectorApi
|
|
13
|
+
from .connector import Connector
|
|
14
|
+
from .database import ConnectorDb
|
|
15
|
+
from .enums import DataConnectorType
|
|
16
|
+
from .file import ConnectorFile
|
|
17
|
+
|
|
18
|
+
# SECTION: EXPORTS ========================================================== #
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
# Functions
|
|
23
|
+
'parse_connector',
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _coerce_connector_type(
|
|
31
|
+
obj: Mapping[str, Any],
|
|
32
|
+
) -> DataConnectorType:
|
|
33
|
+
"""
|
|
34
|
+
Normalize and validate the connector ``type`` field.
|
|
35
|
+
|
|
36
|
+
Parameters
|
|
37
|
+
----------
|
|
38
|
+
obj : Mapping[str, Any]
|
|
39
|
+
Mapping with a ``type`` entry.
|
|
40
|
+
|
|
41
|
+
Returns
|
|
42
|
+
-------
|
|
43
|
+
DataConnectorType
|
|
44
|
+
Normalized connector type enum.
|
|
45
|
+
|
|
46
|
+
Raises
|
|
47
|
+
------
|
|
48
|
+
TypeError
|
|
49
|
+
If ``type`` is missing or unsupported.
|
|
50
|
+
"""
|
|
51
|
+
if 'type' not in obj:
|
|
52
|
+
raise TypeError('Connector requires a "type"')
|
|
53
|
+
try:
|
|
54
|
+
return DataConnectorType.coerce(obj.get('type'))
|
|
55
|
+
except ValueError as exc:
|
|
56
|
+
allowed = ', '.join(DataConnectorType.choices())
|
|
57
|
+
raise TypeError(
|
|
58
|
+
f'Unsupported connector type: {obj.get("type")!r}. '
|
|
59
|
+
f'Expected one of {allowed}.',
|
|
60
|
+
) from exc
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _load_connector(
|
|
64
|
+
kind: DataConnectorType,
|
|
65
|
+
) -> type[Connector]:
|
|
66
|
+
"""
|
|
67
|
+
Resolve the connector class for the requested kind.
|
|
68
|
+
|
|
69
|
+
Parameters
|
|
70
|
+
----------
|
|
71
|
+
kind : DataConnectorType
|
|
72
|
+
Connector kind enum.
|
|
73
|
+
|
|
74
|
+
Returns
|
|
75
|
+
-------
|
|
76
|
+
type[Connector]
|
|
77
|
+
Connector class corresponding to *kind*.
|
|
78
|
+
"""
|
|
79
|
+
match kind:
|
|
80
|
+
case DataConnectorType.API:
|
|
81
|
+
return ConnectorApi
|
|
82
|
+
case DataConnectorType.DATABASE:
|
|
83
|
+
return ConnectorDb
|
|
84
|
+
case DataConnectorType.FILE:
|
|
85
|
+
return ConnectorFile
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def parse_connector(
|
|
92
|
+
obj: Mapping[str, Any],
|
|
93
|
+
) -> Connector:
|
|
94
|
+
"""
|
|
95
|
+
Dispatch to a concrete connector constructor based on ``type``.
|
|
96
|
+
|
|
97
|
+
Parameters
|
|
98
|
+
----------
|
|
99
|
+
obj : Mapping[str, Any]
|
|
100
|
+
Mapping with at least ``name`` and ``type``.
|
|
101
|
+
|
|
102
|
+
Returns
|
|
103
|
+
-------
|
|
104
|
+
Connector
|
|
105
|
+
Concrete connector instance.
|
|
106
|
+
|
|
107
|
+
Raises
|
|
108
|
+
------
|
|
109
|
+
TypeError
|
|
110
|
+
If the mapping is invalid or the connector type is unsupported.
|
|
111
|
+
|
|
112
|
+
Notes
|
|
113
|
+
-----
|
|
114
|
+
Delegates to the tolerant ``from_obj`` constructors for each connector
|
|
115
|
+
kind. Connector types are normalized via
|
|
116
|
+
:class:`etlplus.connector.enums.DataConnectorType`, so common aliases
|
|
117
|
+
(e.g., ``'db'`` or ``'http'``) are accepted.
|
|
118
|
+
"""
|
|
119
|
+
if not isinstance(obj, Mapping):
|
|
120
|
+
raise TypeError('Connector configuration must be a mapping.')
|
|
121
|
+
connector_cls = _load_connector(_coerce_connector_type(obj))
|
|
122
|
+
return connector_cls.from_obj(obj)
|
etlplus/enums.py
CHANGED
|
@@ -22,7 +22,6 @@ __all__ = [
|
|
|
22
22
|
# Enums
|
|
23
23
|
'AggregateName',
|
|
24
24
|
'CoercibleStrEnum',
|
|
25
|
-
'DataConnectorType',
|
|
26
25
|
'OperatorName',
|
|
27
26
|
'PipelineStep',
|
|
28
27
|
]
|
|
@@ -168,37 +167,6 @@ class AggregateName(CoercibleStrEnum):
|
|
|
168
167
|
return lambda xs, n: (fmean(xs) if xs else 0.0)
|
|
169
168
|
|
|
170
169
|
|
|
171
|
-
class DataConnectorType(CoercibleStrEnum):
|
|
172
|
-
"""Supported data connector types."""
|
|
173
|
-
|
|
174
|
-
# -- Constants -- #
|
|
175
|
-
|
|
176
|
-
API = 'api'
|
|
177
|
-
DATABASE = 'database'
|
|
178
|
-
FILE = 'file'
|
|
179
|
-
|
|
180
|
-
# -- Class Methods -- #
|
|
181
|
-
|
|
182
|
-
@classmethod
|
|
183
|
-
def aliases(cls) -> StrStrMap:
|
|
184
|
-
"""
|
|
185
|
-
Return a mapping of common aliases for each enum member.
|
|
186
|
-
|
|
187
|
-
Returns
|
|
188
|
-
-------
|
|
189
|
-
StrStrMap
|
|
190
|
-
A mapping of alias names to their corresponding enum member names.
|
|
191
|
-
"""
|
|
192
|
-
return {
|
|
193
|
-
'http': 'api',
|
|
194
|
-
'https': 'api',
|
|
195
|
-
'rest': 'api',
|
|
196
|
-
'db': 'database',
|
|
197
|
-
'filesystem': 'file',
|
|
198
|
-
'fs': 'file',
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
|
|
202
170
|
class OperatorName(CoercibleStrEnum):
|
|
203
171
|
"""Supported comparison operators with helpers."""
|
|
204
172
|
|
etlplus/ops/extract.py
CHANGED
|
@@ -6,19 +6,28 @@ Helpers to extract data from files, databases, and REST APIs.
|
|
|
6
6
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
|
+
from collections.abc import Mapping
|
|
9
10
|
from pathlib import Path
|
|
10
11
|
from typing import Any
|
|
11
12
|
from typing import cast
|
|
13
|
+
from urllib.parse import urlsplit
|
|
14
|
+
from urllib.parse import urlunsplit
|
|
12
15
|
|
|
16
|
+
from ..api import EndpointClient
|
|
13
17
|
from ..api import HttpMethod
|
|
18
|
+
from ..api import PaginationConfigMap
|
|
19
|
+
from ..api import RequestOptions
|
|
20
|
+
from ..api import compose_api_request_env
|
|
21
|
+
from ..api import paginate_with_client
|
|
14
22
|
from ..api.utils import resolve_request
|
|
15
|
-
from ..
|
|
23
|
+
from ..connector import DataConnectorType
|
|
16
24
|
from ..file import File
|
|
17
25
|
from ..file import FileFormat
|
|
18
26
|
from ..types import JSONData
|
|
19
27
|
from ..types import JSONDict
|
|
20
28
|
from ..types import JSONList
|
|
21
29
|
from ..types import StrPath
|
|
30
|
+
from ..types import Timeout
|
|
22
31
|
|
|
23
32
|
# SECTION: EXPORTS ========================================================== #
|
|
24
33
|
|
|
@@ -32,50 +41,164 @@ __all__ = [
|
|
|
32
41
|
]
|
|
33
42
|
|
|
34
43
|
|
|
35
|
-
# SECTION: FUNCTIONS
|
|
44
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
36
45
|
|
|
37
46
|
|
|
38
|
-
def
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
47
|
+
def _build_client(
|
|
48
|
+
*,
|
|
49
|
+
base_url: str,
|
|
50
|
+
base_path: str | None,
|
|
51
|
+
endpoints: dict[str, str],
|
|
52
|
+
retry: Any,
|
|
53
|
+
retry_network_errors: bool,
|
|
54
|
+
session: Any,
|
|
55
|
+
) -> EndpointClient:
|
|
56
|
+
"""
|
|
57
|
+
Construct an API client with shared defaults.
|
|
58
|
+
|
|
59
|
+
Parameters
|
|
60
|
+
----------
|
|
61
|
+
base_url : str
|
|
62
|
+
API base URL.
|
|
63
|
+
base_path : str | None
|
|
64
|
+
Base path to prepend for endpoints.
|
|
65
|
+
endpoints : dict[str, str]
|
|
66
|
+
Endpoint name to path mappings.
|
|
67
|
+
retry : Any
|
|
68
|
+
Retry policy configuration.
|
|
69
|
+
retry_network_errors : bool
|
|
70
|
+
Whether to retry on network errors.
|
|
71
|
+
session : Any
|
|
72
|
+
Optional requests session.
|
|
73
|
+
|
|
74
|
+
Returns
|
|
75
|
+
-------
|
|
76
|
+
EndpointClient
|
|
77
|
+
Configured endpoint client instance.
|
|
78
|
+
"""
|
|
79
|
+
ClientClass = EndpointClient # noqa: N806
|
|
80
|
+
return ClientClass(
|
|
81
|
+
base_url=base_url,
|
|
82
|
+
base_path=base_path,
|
|
83
|
+
endpoints=endpoints,
|
|
84
|
+
retry=retry,
|
|
85
|
+
retry_network_errors=retry_network_errors,
|
|
86
|
+
session=session,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _extract_from_api_env(
|
|
91
|
+
env: Mapping[str, Any],
|
|
92
|
+
*,
|
|
93
|
+
use_client: bool,
|
|
42
94
|
) -> JSONData:
|
|
43
95
|
"""
|
|
44
|
-
Extract data from a
|
|
96
|
+
Extract API data from a normalized request environment.
|
|
45
97
|
|
|
46
98
|
Parameters
|
|
47
99
|
----------
|
|
48
|
-
|
|
49
|
-
API
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
**kwargs : Any
|
|
53
|
-
Extra arguments forwarded to the underlying ``requests`` call
|
|
54
|
-
(for example, ``timeout``). To use a pre-configured
|
|
55
|
-
:class:`requests.Session`, provide it via ``session``.
|
|
56
|
-
When omitted, ``timeout`` defaults to 10 seconds.
|
|
100
|
+
env : Mapping[str, Any]
|
|
101
|
+
Normalized environment describing API request parameters.
|
|
102
|
+
use_client : bool
|
|
103
|
+
Whether to use the endpoint client/pagination machinery.
|
|
57
104
|
|
|
58
105
|
Returns
|
|
59
106
|
-------
|
|
60
107
|
JSONData
|
|
61
|
-
|
|
108
|
+
Extracted payload.
|
|
62
109
|
|
|
63
110
|
Raises
|
|
64
111
|
------
|
|
65
|
-
|
|
66
|
-
If
|
|
67
|
-
method (for example, ``get``).
|
|
112
|
+
ValueError
|
|
113
|
+
If required parameters are missing.
|
|
68
114
|
"""
|
|
69
|
-
|
|
70
|
-
|
|
115
|
+
if (
|
|
116
|
+
use_client
|
|
117
|
+
and env.get('use_endpoints')
|
|
118
|
+
and env.get('base_url')
|
|
119
|
+
and env.get('endpoints_map')
|
|
120
|
+
and env.get('endpoint_key')
|
|
121
|
+
):
|
|
122
|
+
client = _build_client(
|
|
123
|
+
base_url=cast(str, env.get('base_url')),
|
|
124
|
+
base_path=cast(str | None, env.get('base_path')),
|
|
125
|
+
endpoints=cast(dict[str, str], env.get('endpoints_map', {})),
|
|
126
|
+
retry=env.get('retry'),
|
|
127
|
+
retry_network_errors=bool(env.get('retry_network_errors', False)),
|
|
128
|
+
session=env.get('session'),
|
|
129
|
+
)
|
|
130
|
+
return paginate_with_client(
|
|
131
|
+
client,
|
|
132
|
+
cast(str, env.get('endpoint_key')),
|
|
133
|
+
env.get('params'),
|
|
134
|
+
env.get('headers'),
|
|
135
|
+
env.get('timeout'),
|
|
136
|
+
env.get('pagination'),
|
|
137
|
+
cast(float | None, env.get('sleep_seconds')),
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
url = env.get('url')
|
|
141
|
+
if not url:
|
|
142
|
+
raise ValueError('API source missing URL')
|
|
143
|
+
|
|
144
|
+
if use_client:
|
|
145
|
+
parts = urlsplit(cast(str, url))
|
|
146
|
+
base = urlunsplit((parts.scheme, parts.netloc, '', '', ''))
|
|
147
|
+
client = _build_client(
|
|
148
|
+
base_url=base,
|
|
149
|
+
base_path=None,
|
|
150
|
+
endpoints={},
|
|
151
|
+
retry=env.get('retry'),
|
|
152
|
+
retry_network_errors=bool(env.get('retry_network_errors', False)),
|
|
153
|
+
session=env.get('session'),
|
|
154
|
+
)
|
|
155
|
+
request_options = RequestOptions(
|
|
156
|
+
params=cast(Mapping[str, Any] | None, env.get('params')),
|
|
157
|
+
headers=cast(Mapping[str, str] | None, env.get('headers')),
|
|
158
|
+
timeout=cast(Timeout | None, env.get('timeout')),
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
return client.paginate_url(
|
|
162
|
+
cast(str, url),
|
|
163
|
+
cast(PaginationConfigMap | None, env.get('pagination')),
|
|
164
|
+
request=request_options,
|
|
165
|
+
sleep_seconds=cast(float, env.get('sleep_seconds', 0.0)),
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
method = env.get('method', HttpMethod.GET)
|
|
169
|
+
timeout = env.get('timeout', None)
|
|
170
|
+
session = env.get('session', None)
|
|
171
|
+
request_kwargs = dict(env.get('request_kwargs') or {})
|
|
71
172
|
request_callable, timeout, _ = resolve_request(
|
|
72
173
|
method,
|
|
73
174
|
session=session,
|
|
74
175
|
timeout=timeout,
|
|
75
176
|
)
|
|
76
|
-
response = request_callable(
|
|
177
|
+
response = request_callable(
|
|
178
|
+
cast(str, url),
|
|
179
|
+
timeout=timeout,
|
|
180
|
+
**request_kwargs,
|
|
181
|
+
)
|
|
77
182
|
response.raise_for_status()
|
|
183
|
+
return _parse_api_response(response)
|
|
184
|
+
|
|
78
185
|
|
|
186
|
+
def _parse_api_response(
|
|
187
|
+
response: Any,
|
|
188
|
+
) -> JSONData:
|
|
189
|
+
"""
|
|
190
|
+
Parse API responses into a consistent JSON payload.
|
|
191
|
+
|
|
192
|
+
Parameters
|
|
193
|
+
----------
|
|
194
|
+
response : Any
|
|
195
|
+
HTTP response object exposing ``headers``, ``json()``, and ``text``.
|
|
196
|
+
|
|
197
|
+
Returns
|
|
198
|
+
-------
|
|
199
|
+
JSONData
|
|
200
|
+
Parsed JSON payload, or a fallback object with raw text.
|
|
201
|
+
"""
|
|
79
202
|
content_type = response.headers.get('content-type', '').lower()
|
|
80
203
|
if 'application/json' in content_type:
|
|
81
204
|
try:
|
|
@@ -99,6 +222,70 @@ def extract_from_api(
|
|
|
99
222
|
return {'content': response.text, 'content_type': content_type}
|
|
100
223
|
|
|
101
224
|
|
|
225
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def extract_from_api(
|
|
229
|
+
url: str,
|
|
230
|
+
method: HttpMethod | str = HttpMethod.GET,
|
|
231
|
+
**kwargs: Any,
|
|
232
|
+
) -> JSONData:
|
|
233
|
+
"""
|
|
234
|
+
Extract data from a REST API.
|
|
235
|
+
|
|
236
|
+
Parameters
|
|
237
|
+
----------
|
|
238
|
+
url : str
|
|
239
|
+
API endpoint URL.
|
|
240
|
+
method : HttpMethod | str, optional
|
|
241
|
+
HTTP method to use. Defaults to ``GET``.
|
|
242
|
+
**kwargs : Any
|
|
243
|
+
Extra arguments forwarded to the underlying ``requests`` call
|
|
244
|
+
(for example, ``timeout``). To use a pre-configured
|
|
245
|
+
:class:`requests.Session`, provide it via ``session``.
|
|
246
|
+
When omitted, ``timeout`` defaults to 10 seconds.
|
|
247
|
+
|
|
248
|
+
Returns
|
|
249
|
+
-------
|
|
250
|
+
JSONData
|
|
251
|
+
Parsed JSON payload, or a fallback object with raw text.
|
|
252
|
+
"""
|
|
253
|
+
env = {
|
|
254
|
+
'url': url,
|
|
255
|
+
'method': method,
|
|
256
|
+
'timeout': kwargs.pop('timeout', None),
|
|
257
|
+
'session': kwargs.pop('session', None),
|
|
258
|
+
'request_kwargs': kwargs,
|
|
259
|
+
}
|
|
260
|
+
return _extract_from_api_env(env, use_client=False)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def extract_from_api_source(
|
|
264
|
+
cfg: Any,
|
|
265
|
+
source_obj: Any,
|
|
266
|
+
overrides: dict[str, Any],
|
|
267
|
+
) -> JSONData:
|
|
268
|
+
"""
|
|
269
|
+
Extract data from a REST API source connector.
|
|
270
|
+
|
|
271
|
+
Parameters
|
|
272
|
+
----------
|
|
273
|
+
cfg : Any
|
|
274
|
+
Pipeline configuration.
|
|
275
|
+
source_obj : Any
|
|
276
|
+
Connector configuration.
|
|
277
|
+
overrides : dict[str, Any]
|
|
278
|
+
Extract-time overrides.
|
|
279
|
+
|
|
280
|
+
Returns
|
|
281
|
+
-------
|
|
282
|
+
JSONData
|
|
283
|
+
Extracted payload.
|
|
284
|
+
"""
|
|
285
|
+
env = compose_api_request_env(cfg, source_obj, overrides)
|
|
286
|
+
return _extract_from_api_env(env, use_client=True)
|
|
287
|
+
|
|
288
|
+
|
|
102
289
|
def extract_from_database(
|
|
103
290
|
connection_string: str,
|
|
104
291
|
) -> JSONList:
|