etlplus 0.15.5__py3-none-any.whl → 0.16.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,120 @@
1
+ """
2
+ :mod:`etlplus.connector.file` module.
3
+
4
+ File connector configuration dataclass.
5
+
6
+ Notes
7
+ -----
8
+ - TypedDicts in this module are intentionally ``total=False`` and are not
9
+ enforced at runtime.
10
+ - :meth:`*.from_obj` constructors accept :class:`Mapping[str, Any]` and perform
11
+ tolerant parsing and light casting. This keeps the runtime permissive while
12
+ improving autocomplete and static analysis for contributors.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from dataclasses import dataclass
18
+ from dataclasses import field
19
+ from typing import Any
20
+ from typing import Self
21
+ from typing import TypedDict
22
+ from typing import overload
23
+
24
+ from ..types import StrAnyMap
25
+ from ..utils import coerce_dict
26
+ from .core import ConnectorBase
27
+ from .enums import DataConnectorType
28
+ from .types import ConnectorType
29
+
30
+ # SECTION: EXPORTS ========================================================== #
31
+
32
+
33
+ __all__ = [
34
+ 'ConnectorFile',
35
+ 'ConnectorFileConfigMap',
36
+ ]
37
+
38
+
39
+ # SECTION: TYPED DICTS ====================================================== #
40
+
41
+
42
+ class ConnectorFileConfigMap(TypedDict, total=False):
43
+ """
44
+ Shape accepted by :meth:`ConnectorFile.from_obj` (all keys optional).
45
+
46
+ See Also
47
+ --------
48
+ - :meth:`etlplus.connector.file.ConnectorFile.from_obj`
49
+ """
50
+
51
+ name: str
52
+ type: ConnectorType
53
+ format: str
54
+ path: str
55
+ options: StrAnyMap
56
+
57
+
58
+ # SECTION: DATA CLASSES ===================================================== #
59
+
60
+
61
+ @dataclass(kw_only=True, slots=True)
62
+ class ConnectorFile(ConnectorBase):
63
+ """
64
+ Configuration for a file-based data connector.
65
+
66
+ Attributes
67
+ ----------
68
+ type : ConnectorType
69
+ Connector kind, always ``'file'``.
70
+ format : str | None
71
+ File format (e.g., ``'json'``, ``'csv'``).
72
+ path : str | None
73
+ File path or URI.
74
+ options : dict[str, Any]
75
+ Reader/writer format options.
76
+ """
77
+
78
+ # -- Attributes -- #
79
+
80
+ type: ConnectorType = DataConnectorType.FILE
81
+ format: str | None = None
82
+ path: str | None = None
83
+ options: dict[str, Any] = field(default_factory=dict)
84
+
85
+ # -- Class Methods -- #
86
+
87
+ @classmethod
88
+ @overload
89
+ def from_obj(cls, obj: ConnectorFileConfigMap) -> Self: ...
90
+
91
+ @classmethod
92
+ @overload
93
+ def from_obj(cls, obj: StrAnyMap) -> Self: ...
94
+
95
+ @classmethod
96
+ def from_obj(
97
+ cls,
98
+ obj: StrAnyMap,
99
+ ) -> Self:
100
+ """
101
+ Parse a mapping into a ``ConnectorFile`` instance.
102
+
103
+ Parameters
104
+ ----------
105
+ obj : StrAnyMap
106
+ Mapping with at least ``name``.
107
+
108
+ Returns
109
+ -------
110
+ Self
111
+ Parsed connector instance.
112
+ """
113
+ name = cls._require_name(obj, kind='File')
114
+
115
+ return cls(
116
+ name=name,
117
+ format=obj.get('format'),
118
+ path=obj.get('path'),
119
+ options=coerce_dict(obj.get('options')),
120
+ )
@@ -0,0 +1,40 @@
1
+ """
2
+ :mod:`etlplus.connector.types` module.
3
+
4
+ Connector type aliases for :mod:`etlplus.connector`.
5
+
6
+ Examples
7
+ --------
8
+ >>> from etlplus.connector import Connector
9
+ >>> src: Connector = {
10
+ >>> "type": "file",
11
+ >>> "path": "/data/input.csv",
12
+ >>> }
13
+ >>> tgt: Connector = {
14
+ >>> "type": "database",
15
+ >>> "connection_string": "postgresql://user:pass@localhost/db",
16
+ >>> }
17
+ >>> from etlplus.api import RetryPolicy
18
+ >>> rp: RetryPolicy = {"max_attempts": 3, "backoff": 0.5}
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ from typing import Literal
24
+
25
+ from .enums import DataConnectorType
26
+
27
+ # SECTION: EXPORTS ========================================================= #
28
+
29
+
30
+ __all__ = [
31
+ # Type Aliases
32
+ 'ConnectorType',
33
+ ]
34
+
35
+
36
+ # SECTION: TYPE ALIASES ===================================================== #
37
+
38
+
39
+ # Literal type for supported connector kinds (strings or enum members)
40
+ type ConnectorType = DataConnectorType | Literal['api', 'database', 'file']
@@ -0,0 +1,122 @@
1
+ """
2
+ :mod:`etlplus.connector.utils` module.
3
+
4
+ Shared connector parsing helpers.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from collections.abc import Mapping
10
+ from typing import Any
11
+
12
+ from .api import ConnectorApi
13
+ from .connector import Connector
14
+ from .database import ConnectorDb
15
+ from .enums import DataConnectorType
16
+ from .file import ConnectorFile
17
+
18
+ # SECTION: EXPORTS ========================================================== #
19
+
20
+
21
+ __all__ = [
22
+ # Functions
23
+ 'parse_connector',
24
+ ]
25
+
26
+
27
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
28
+
29
+
30
+ def _coerce_connector_type(
31
+ obj: Mapping[str, Any],
32
+ ) -> DataConnectorType:
33
+ """
34
+ Normalize and validate the connector ``type`` field.
35
+
36
+ Parameters
37
+ ----------
38
+ obj : Mapping[str, Any]
39
+ Mapping with a ``type`` entry.
40
+
41
+ Returns
42
+ -------
43
+ DataConnectorType
44
+ Normalized connector type enum.
45
+
46
+ Raises
47
+ ------
48
+ TypeError
49
+ If ``type`` is missing or unsupported.
50
+ """
51
+ if 'type' not in obj:
52
+ raise TypeError('Connector requires a "type"')
53
+ try:
54
+ return DataConnectorType.coerce(obj.get('type'))
55
+ except ValueError as exc:
56
+ allowed = ', '.join(DataConnectorType.choices())
57
+ raise TypeError(
58
+ f'Unsupported connector type: {obj.get("type")!r}. '
59
+ f'Expected one of {allowed}.',
60
+ ) from exc
61
+
62
+
63
+ def _load_connector(
64
+ kind: DataConnectorType,
65
+ ) -> type[Connector]:
66
+ """
67
+ Resolve the connector class for the requested kind.
68
+
69
+ Parameters
70
+ ----------
71
+ kind : DataConnectorType
72
+ Connector kind enum.
73
+
74
+ Returns
75
+ -------
76
+ type[Connector]
77
+ Connector class corresponding to *kind*.
78
+ """
79
+ match kind:
80
+ case DataConnectorType.API:
81
+ return ConnectorApi
82
+ case DataConnectorType.DATABASE:
83
+ return ConnectorDb
84
+ case DataConnectorType.FILE:
85
+ return ConnectorFile
86
+
87
+
88
+ # SECTION: FUNCTIONS ======================================================== #
89
+
90
+
91
+ def parse_connector(
92
+ obj: Mapping[str, Any],
93
+ ) -> Connector:
94
+ """
95
+ Dispatch to a concrete connector constructor based on ``type``.
96
+
97
+ Parameters
98
+ ----------
99
+ obj : Mapping[str, Any]
100
+ Mapping with at least ``name`` and ``type``.
101
+
102
+ Returns
103
+ -------
104
+ Connector
105
+ Concrete connector instance.
106
+
107
+ Raises
108
+ ------
109
+ TypeError
110
+ If the mapping is invalid or the connector type is unsupported.
111
+
112
+ Notes
113
+ -----
114
+ Delegates to the tolerant ``from_obj`` constructors for each connector
115
+ kind. Connector types are normalized via
116
+ :class:`etlplus.connector.enums.DataConnectorType`, so common aliases
117
+ (e.g., ``'db'`` or ``'http'``) are accepted.
118
+ """
119
+ if not isinstance(obj, Mapping):
120
+ raise TypeError('Connector configuration must be a mapping.')
121
+ connector_cls = _load_connector(_coerce_connector_type(obj))
122
+ return connector_cls.from_obj(obj)
etlplus/enums.py CHANGED
@@ -22,7 +22,6 @@ __all__ = [
22
22
  # Enums
23
23
  'AggregateName',
24
24
  'CoercibleStrEnum',
25
- 'DataConnectorType',
26
25
  'OperatorName',
27
26
  'PipelineStep',
28
27
  ]
@@ -168,37 +167,6 @@ class AggregateName(CoercibleStrEnum):
168
167
  return lambda xs, n: (fmean(xs) if xs else 0.0)
169
168
 
170
169
 
171
- class DataConnectorType(CoercibleStrEnum):
172
- """Supported data connector types."""
173
-
174
- # -- Constants -- #
175
-
176
- API = 'api'
177
- DATABASE = 'database'
178
- FILE = 'file'
179
-
180
- # -- Class Methods -- #
181
-
182
- @classmethod
183
- def aliases(cls) -> StrStrMap:
184
- """
185
- Return a mapping of common aliases for each enum member.
186
-
187
- Returns
188
- -------
189
- StrStrMap
190
- A mapping of alias names to their corresponding enum member names.
191
- """
192
- return {
193
- 'http': 'api',
194
- 'https': 'api',
195
- 'rest': 'api',
196
- 'db': 'database',
197
- 'filesystem': 'file',
198
- 'fs': 'file',
199
- }
200
-
201
-
202
170
  class OperatorName(CoercibleStrEnum):
203
171
  """Supported comparison operators with helpers."""
204
172
 
etlplus/ops/extract.py CHANGED
@@ -6,19 +6,28 @@ Helpers to extract data from files, databases, and REST APIs.
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
+ from collections.abc import Mapping
9
10
  from pathlib import Path
10
11
  from typing import Any
11
12
  from typing import cast
13
+ from urllib.parse import urlsplit
14
+ from urllib.parse import urlunsplit
12
15
 
16
+ from ..api import EndpointClient
13
17
  from ..api import HttpMethod
18
+ from ..api import PaginationConfigMap
19
+ from ..api import RequestOptions
20
+ from ..api import compose_api_request_env
21
+ from ..api import paginate_with_client
14
22
  from ..api.utils import resolve_request
15
- from ..enums import DataConnectorType
23
+ from ..connector import DataConnectorType
16
24
  from ..file import File
17
25
  from ..file import FileFormat
18
26
  from ..types import JSONData
19
27
  from ..types import JSONDict
20
28
  from ..types import JSONList
21
29
  from ..types import StrPath
30
+ from ..types import Timeout
22
31
 
23
32
  # SECTION: EXPORTS ========================================================== #
24
33
 
@@ -32,50 +41,164 @@ __all__ = [
32
41
  ]
33
42
 
34
43
 
35
- # SECTION: FUNCTIONS ======================================================== #
44
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
36
45
 
37
46
 
38
- def extract_from_api(
39
- url: str,
40
- method: HttpMethod | str = HttpMethod.GET,
41
- **kwargs: Any,
47
+ def _build_client(
48
+ *,
49
+ base_url: str,
50
+ base_path: str | None,
51
+ endpoints: dict[str, str],
52
+ retry: Any,
53
+ retry_network_errors: bool,
54
+ session: Any,
55
+ ) -> EndpointClient:
56
+ """
57
+ Construct an API client with shared defaults.
58
+
59
+ Parameters
60
+ ----------
61
+ base_url : str
62
+ API base URL.
63
+ base_path : str | None
64
+ Base path to prepend for endpoints.
65
+ endpoints : dict[str, str]
66
+ Endpoint name to path mappings.
67
+ retry : Any
68
+ Retry policy configuration.
69
+ retry_network_errors : bool
70
+ Whether to retry on network errors.
71
+ session : Any
72
+ Optional requests session.
73
+
74
+ Returns
75
+ -------
76
+ EndpointClient
77
+ Configured endpoint client instance.
78
+ """
79
+ ClientClass = EndpointClient # noqa: N806
80
+ return ClientClass(
81
+ base_url=base_url,
82
+ base_path=base_path,
83
+ endpoints=endpoints,
84
+ retry=retry,
85
+ retry_network_errors=retry_network_errors,
86
+ session=session,
87
+ )
88
+
89
+
90
+ def _extract_from_api_env(
91
+ env: Mapping[str, Any],
92
+ *,
93
+ use_client: bool,
42
94
  ) -> JSONData:
43
95
  """
44
- Extract data from a REST API.
96
+ Extract API data from a normalized request environment.
45
97
 
46
98
  Parameters
47
99
  ----------
48
- url : str
49
- API endpoint URL.
50
- method : HttpMethod | str, optional
51
- HTTP method to use. Defaults to ``GET``.
52
- **kwargs : Any
53
- Extra arguments forwarded to the underlying ``requests`` call
54
- (for example, ``timeout``). To use a pre-configured
55
- :class:`requests.Session`, provide it via ``session``.
56
- When omitted, ``timeout`` defaults to 10 seconds.
100
+ env : Mapping[str, Any]
101
+ Normalized environment describing API request parameters.
102
+ use_client : bool
103
+ Whether to use the endpoint client/pagination machinery.
57
104
 
58
105
  Returns
59
106
  -------
60
107
  JSONData
61
- Parsed JSON payload, or a fallback object with raw text.
108
+ Extracted payload.
62
109
 
63
110
  Raises
64
111
  ------
65
- TypeError
66
- If a provided ``session`` does not expose the required HTTP
67
- method (for example, ``get``).
112
+ ValueError
113
+ If required parameters are missing.
68
114
  """
69
- timeout = kwargs.pop('timeout', None)
70
- session = kwargs.pop('session', None)
115
+ if (
116
+ use_client
117
+ and env.get('use_endpoints')
118
+ and env.get('base_url')
119
+ and env.get('endpoints_map')
120
+ and env.get('endpoint_key')
121
+ ):
122
+ client = _build_client(
123
+ base_url=cast(str, env.get('base_url')),
124
+ base_path=cast(str | None, env.get('base_path')),
125
+ endpoints=cast(dict[str, str], env.get('endpoints_map', {})),
126
+ retry=env.get('retry'),
127
+ retry_network_errors=bool(env.get('retry_network_errors', False)),
128
+ session=env.get('session'),
129
+ )
130
+ return paginate_with_client(
131
+ client,
132
+ cast(str, env.get('endpoint_key')),
133
+ env.get('params'),
134
+ env.get('headers'),
135
+ env.get('timeout'),
136
+ env.get('pagination'),
137
+ cast(float | None, env.get('sleep_seconds')),
138
+ )
139
+
140
+ url = env.get('url')
141
+ if not url:
142
+ raise ValueError('API source missing URL')
143
+
144
+ if use_client:
145
+ parts = urlsplit(cast(str, url))
146
+ base = urlunsplit((parts.scheme, parts.netloc, '', '', ''))
147
+ client = _build_client(
148
+ base_url=base,
149
+ base_path=None,
150
+ endpoints={},
151
+ retry=env.get('retry'),
152
+ retry_network_errors=bool(env.get('retry_network_errors', False)),
153
+ session=env.get('session'),
154
+ )
155
+ request_options = RequestOptions(
156
+ params=cast(Mapping[str, Any] | None, env.get('params')),
157
+ headers=cast(Mapping[str, str] | None, env.get('headers')),
158
+ timeout=cast(Timeout | None, env.get('timeout')),
159
+ )
160
+
161
+ return client.paginate_url(
162
+ cast(str, url),
163
+ cast(PaginationConfigMap | None, env.get('pagination')),
164
+ request=request_options,
165
+ sleep_seconds=cast(float, env.get('sleep_seconds', 0.0)),
166
+ )
167
+
168
+ method = env.get('method', HttpMethod.GET)
169
+ timeout = env.get('timeout', None)
170
+ session = env.get('session', None)
171
+ request_kwargs = dict(env.get('request_kwargs') or {})
71
172
  request_callable, timeout, _ = resolve_request(
72
173
  method,
73
174
  session=session,
74
175
  timeout=timeout,
75
176
  )
76
- response = request_callable(url, timeout=timeout, **kwargs)
177
+ response = request_callable(
178
+ cast(str, url),
179
+ timeout=timeout,
180
+ **request_kwargs,
181
+ )
77
182
  response.raise_for_status()
183
+ return _parse_api_response(response)
184
+
78
185
 
186
+ def _parse_api_response(
187
+ response: Any,
188
+ ) -> JSONData:
189
+ """
190
+ Parse API responses into a consistent JSON payload.
191
+
192
+ Parameters
193
+ ----------
194
+ response : Any
195
+ HTTP response object exposing ``headers``, ``json()``, and ``text``.
196
+
197
+ Returns
198
+ -------
199
+ JSONData
200
+ Parsed JSON payload, or a fallback object with raw text.
201
+ """
79
202
  content_type = response.headers.get('content-type', '').lower()
80
203
  if 'application/json' in content_type:
81
204
  try:
@@ -99,6 +222,70 @@ def extract_from_api(
99
222
  return {'content': response.text, 'content_type': content_type}
100
223
 
101
224
 
225
+ # SECTION: FUNCTIONS ======================================================== #
226
+
227
+
228
+ def extract_from_api(
229
+ url: str,
230
+ method: HttpMethod | str = HttpMethod.GET,
231
+ **kwargs: Any,
232
+ ) -> JSONData:
233
+ """
234
+ Extract data from a REST API.
235
+
236
+ Parameters
237
+ ----------
238
+ url : str
239
+ API endpoint URL.
240
+ method : HttpMethod | str, optional
241
+ HTTP method to use. Defaults to ``GET``.
242
+ **kwargs : Any
243
+ Extra arguments forwarded to the underlying ``requests`` call
244
+ (for example, ``timeout``). To use a pre-configured
245
+ :class:`requests.Session`, provide it via ``session``.
246
+ When omitted, ``timeout`` defaults to 10 seconds.
247
+
248
+ Returns
249
+ -------
250
+ JSONData
251
+ Parsed JSON payload, or a fallback object with raw text.
252
+ """
253
+ env = {
254
+ 'url': url,
255
+ 'method': method,
256
+ 'timeout': kwargs.pop('timeout', None),
257
+ 'session': kwargs.pop('session', None),
258
+ 'request_kwargs': kwargs,
259
+ }
260
+ return _extract_from_api_env(env, use_client=False)
261
+
262
+
263
+ def extract_from_api_source(
264
+ cfg: Any,
265
+ source_obj: Any,
266
+ overrides: dict[str, Any],
267
+ ) -> JSONData:
268
+ """
269
+ Extract data from a REST API source connector.
270
+
271
+ Parameters
272
+ ----------
273
+ cfg : Any
274
+ Pipeline configuration.
275
+ source_obj : Any
276
+ Connector configuration.
277
+ overrides : dict[str, Any]
278
+ Extract-time overrides.
279
+
280
+ Returns
281
+ -------
282
+ JSONData
283
+ Extracted payload.
284
+ """
285
+ env = compose_api_request_env(cfg, source_obj, overrides)
286
+ return _extract_from_api_env(env, use_client=True)
287
+
288
+
102
289
  def extract_from_database(
103
290
  connection_string: str,
104
291
  ) -> JSONList: