etlplus 0.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/__init__.py +43 -0
- etlplus/__main__.py +22 -0
- etlplus/__version__.py +14 -0
- etlplus/api/README.md +237 -0
- etlplus/api/__init__.py +136 -0
- etlplus/api/auth.py +432 -0
- etlplus/api/config.py +633 -0
- etlplus/api/endpoint_client.py +885 -0
- etlplus/api/errors.py +170 -0
- etlplus/api/pagination/__init__.py +47 -0
- etlplus/api/pagination/client.py +188 -0
- etlplus/api/pagination/config.py +440 -0
- etlplus/api/pagination/paginator.py +775 -0
- etlplus/api/rate_limiting/__init__.py +38 -0
- etlplus/api/rate_limiting/config.py +343 -0
- etlplus/api/rate_limiting/rate_limiter.py +266 -0
- etlplus/api/request_manager.py +589 -0
- etlplus/api/retry_manager.py +430 -0
- etlplus/api/transport.py +325 -0
- etlplus/api/types.py +172 -0
- etlplus/cli/__init__.py +15 -0
- etlplus/cli/app.py +1367 -0
- etlplus/cli/handlers.py +775 -0
- etlplus/cli/main.py +616 -0
- etlplus/config/__init__.py +56 -0
- etlplus/config/connector.py +372 -0
- etlplus/config/jobs.py +311 -0
- etlplus/config/pipeline.py +339 -0
- etlplus/config/profile.py +78 -0
- etlplus/config/types.py +204 -0
- etlplus/config/utils.py +120 -0
- etlplus/ddl.py +197 -0
- etlplus/enums.py +414 -0
- etlplus/extract.py +218 -0
- etlplus/file.py +657 -0
- etlplus/load.py +336 -0
- etlplus/mixins.py +62 -0
- etlplus/py.typed +0 -0
- etlplus/run.py +368 -0
- etlplus/run_helpers.py +843 -0
- etlplus/templates/__init__.py +5 -0
- etlplus/templates/ddl.sql.j2 +128 -0
- etlplus/templates/view.sql.j2 +69 -0
- etlplus/transform.py +1049 -0
- etlplus/types.py +227 -0
- etlplus/utils.py +638 -0
- etlplus/validate.py +493 -0
- etlplus/validation/__init__.py +44 -0
- etlplus/validation/utils.py +389 -0
- etlplus-0.5.4.dist-info/METADATA +616 -0
- etlplus-0.5.4.dist-info/RECORD +55 -0
- etlplus-0.5.4.dist-info/WHEEL +5 -0
- etlplus-0.5.4.dist-info/entry_points.txt +2 -0
- etlplus-0.5.4.dist-info/licenses/LICENSE +21 -0
- etlplus-0.5.4.dist-info/top_level.txt +1 -0
etlplus/api/errors.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.api.errors` module.
|
|
3
|
+
|
|
4
|
+
Exception types with rich context for debugging REST API failures.
|
|
5
|
+
|
|
6
|
+
Summary
|
|
7
|
+
-------
|
|
8
|
+
Provides subclasses for request errors (``ApiRequestError``), auth failures
|
|
9
|
+
(``ApiAuthError``), and pagination errors with page context
|
|
10
|
+
(``PaginationError``).
|
|
11
|
+
|
|
12
|
+
Examples
|
|
13
|
+
--------
|
|
14
|
+
>>> try:
|
|
15
|
+
... client.paginate("list", pagination={"type": "page", "page_size": 50})
|
|
16
|
+
... except ApiAuthError as e:
|
|
17
|
+
... print("auth failed", e.status)
|
|
18
|
+
... except PaginationError as e:
|
|
19
|
+
... print("page:", e.page, "attempts:", e.attempts)
|
|
20
|
+
... except ApiRequestError as e:
|
|
21
|
+
... print("request failed", e.url)
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
from dataclasses import dataclass
|
|
27
|
+
from typing import TYPE_CHECKING
|
|
28
|
+
from typing import Any
|
|
29
|
+
|
|
30
|
+
import requests # type: ignore[import]
|
|
31
|
+
|
|
32
|
+
if TYPE_CHECKING: # pragma: no cover - typing only
|
|
33
|
+
from .retry_manager import RetryPolicy
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# SECTION: EXPORTS ========================================================== #
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
__all__ = ['ApiAuthError', 'ApiRequestError', 'PaginationError']
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# SECTION: CLASSES ========================================================== #
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass(slots=True, kw_only=True)
|
|
46
|
+
class ApiRequestError(requests.RequestException):
|
|
47
|
+
"""
|
|
48
|
+
Base error for API request failures with rich context.
|
|
49
|
+
|
|
50
|
+
Parameters
|
|
51
|
+
----------
|
|
52
|
+
url : str
|
|
53
|
+
Absolute URL that was requested.
|
|
54
|
+
status : int | None, optional
|
|
55
|
+
HTTP status code when available.
|
|
56
|
+
attempts : int, optional
|
|
57
|
+
Number of attempts performed (defaults to ``1``).
|
|
58
|
+
retried : bool, optional
|
|
59
|
+
Whether any retry attempts were made.
|
|
60
|
+
retry_policy : RetryPolicy | None, optional
|
|
61
|
+
The retry policy in effect, if any.
|
|
62
|
+
cause : Exception | None, optional
|
|
63
|
+
Original underlying exception.
|
|
64
|
+
|
|
65
|
+
Attributes
|
|
66
|
+
----------
|
|
67
|
+
url : str
|
|
68
|
+
Absolute URL that was requested.
|
|
69
|
+
status : int | None
|
|
70
|
+
HTTP status code when available.
|
|
71
|
+
attempts : int
|
|
72
|
+
Number of attempts performed.
|
|
73
|
+
retried : bool
|
|
74
|
+
Whether any retry attempts were made.
|
|
75
|
+
retry_policy : RetryPolicy | None
|
|
76
|
+
The retry policy in effect, if any.
|
|
77
|
+
cause : Exception | None
|
|
78
|
+
Original underlying exception.
|
|
79
|
+
|
|
80
|
+
Examples
|
|
81
|
+
--------
|
|
82
|
+
>>> try:
|
|
83
|
+
... raise ApiRequestError(url="https://api.example.com/x", status=500)
|
|
84
|
+
... except ApiRequestError as e:
|
|
85
|
+
... print(e.status, e.attempts)
|
|
86
|
+
500 1
|
|
87
|
+
|
|
88
|
+
Notes
|
|
89
|
+
-----
|
|
90
|
+
The :meth:`as_dict` helper returns a structured payload suitable for
|
|
91
|
+
structured logging or telemetry.
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
# -- Attributes -- #
|
|
95
|
+
|
|
96
|
+
url: str
|
|
97
|
+
status: int | None = None
|
|
98
|
+
attempts: int = 1
|
|
99
|
+
retried: bool = False
|
|
100
|
+
retry_policy: RetryPolicy | None = None
|
|
101
|
+
cause: Exception | None = None
|
|
102
|
+
|
|
103
|
+
# -- Magic Methods (Object Representation) -- #
|
|
104
|
+
|
|
105
|
+
def __str__(self) -> str: # pragma: no cover - formatting only
|
|
106
|
+
base = f'request failed url={self.url!r} status={self.status}'
|
|
107
|
+
meta = f' attempts={self.attempts} retried={self.retried}'
|
|
108
|
+
|
|
109
|
+
return f'ApiRequestError({base}{meta})'
|
|
110
|
+
|
|
111
|
+
# -- Instance Methods -- #
|
|
112
|
+
|
|
113
|
+
def as_dict(self) -> dict[str, Any]:
|
|
114
|
+
"""Return structured error context for logging or telemetry."""
|
|
115
|
+
return {
|
|
116
|
+
'url': self.url,
|
|
117
|
+
'status': self.status,
|
|
118
|
+
'attempts': self.attempts,
|
|
119
|
+
'retried': self.retried,
|
|
120
|
+
'retry_policy': self.retry_policy,
|
|
121
|
+
'cause': repr(self.cause) if self.cause else None,
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class ApiAuthError(ApiRequestError):
|
|
126
|
+
"""Authentication/authorization failure (e.g., 401/403)."""
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
@dataclass(slots=True, kw_only=True)
|
|
130
|
+
class PaginationError(ApiRequestError):
|
|
131
|
+
"""
|
|
132
|
+
Error raised during pagination with page context.
|
|
133
|
+
|
|
134
|
+
Parameters
|
|
135
|
+
----------
|
|
136
|
+
page : int | None, optional
|
|
137
|
+
Page number (1-based) or request count when applicable.
|
|
138
|
+
**kwargs
|
|
139
|
+
Remaining keyword arguments forwarded to ``ApiRequestError``.
|
|
140
|
+
|
|
141
|
+
Attributes
|
|
142
|
+
----------
|
|
143
|
+
page : int | None
|
|
144
|
+
Stored page number.
|
|
145
|
+
|
|
146
|
+
Examples
|
|
147
|
+
--------
|
|
148
|
+
>>> err = PaginationError(url="u", status=400, page=3)
|
|
149
|
+
>>> str(err).startswith("PaginationError(")
|
|
150
|
+
True
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
# -- Attributes -- #
|
|
154
|
+
|
|
155
|
+
page: int | None = None
|
|
156
|
+
|
|
157
|
+
# -- Magic Methods (Object Representation) -- #
|
|
158
|
+
|
|
159
|
+
def __str__(self) -> str: # pragma: no cover - formatting only
|
|
160
|
+
base = super().__str__()
|
|
161
|
+
|
|
162
|
+
return f'PaginationError({base} page={self.page})'
|
|
163
|
+
|
|
164
|
+
# -- Instance Methods -- #
|
|
165
|
+
|
|
166
|
+
def as_dict(self) -> dict[str, Any]:
|
|
167
|
+
"""Extend base context with pagination metadata."""
|
|
168
|
+
payload = super().as_dict()
|
|
169
|
+
payload['page'] = self.page
|
|
170
|
+
return payload
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.api.pagination` package.
|
|
3
|
+
|
|
4
|
+
Pagination configuration and runtime helpers for REST API responses.
|
|
5
|
+
|
|
6
|
+
This package groups configuration shapes, paginator utilities, and a
|
|
7
|
+
client-facing driver for traversing page-, offset-, and cursor-based JSON
|
|
8
|
+
responses.
|
|
9
|
+
|
|
10
|
+
Notes
|
|
11
|
+
-----
|
|
12
|
+
- Pagination defaults are centralized on :class:`EndpointClient` (``page``,
|
|
13
|
+
``per_page``, ``cursor``, ``limit``; start page ``1``; page size ``100``).
|
|
14
|
+
- Prefer :data:`JSONRecords` (list of :data:`JSONDict`) for paginated
|
|
15
|
+
responses; scalar/record aliases are exported for convenience.
|
|
16
|
+
- The underlying :class:`Paginator` is exported for advanced scenarios that
|
|
17
|
+
need to stream pages manually.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
from .client import PaginationClient
|
|
23
|
+
from .config import CursorPaginationConfigMap
|
|
24
|
+
from .config import PagePaginationConfigMap
|
|
25
|
+
from .config import PaginationConfig
|
|
26
|
+
from .config import PaginationConfigMap
|
|
27
|
+
from .config import PaginationInput
|
|
28
|
+
from .config import PaginationType
|
|
29
|
+
from .paginator import Paginator
|
|
30
|
+
|
|
31
|
+
# SECTION: EXPORTS ========================================================== #
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
__all__ = [
|
|
35
|
+
# Classes
|
|
36
|
+
'PaginationClient',
|
|
37
|
+
'Paginator',
|
|
38
|
+
# Data Classes
|
|
39
|
+
'PaginationConfig',
|
|
40
|
+
# Enums
|
|
41
|
+
'PaginationType',
|
|
42
|
+
# Type Aliases
|
|
43
|
+
'CursorPaginationConfigMap',
|
|
44
|
+
'PagePaginationConfigMap',
|
|
45
|
+
'PaginationInput',
|
|
46
|
+
'PaginationConfigMap',
|
|
47
|
+
]
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.api.client` module.
|
|
3
|
+
|
|
4
|
+
Client-facing pagination driver for REST API responses.
|
|
5
|
+
|
|
6
|
+
This module wires pagination configuration, fetch callbacks, and optional rate
|
|
7
|
+
limiting into :class:`etlplus.api.pagination.Paginator` instances.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from collections.abc import Generator
|
|
13
|
+
from collections.abc import Mapping
|
|
14
|
+
from dataclasses import dataclass
|
|
15
|
+
from typing import Any
|
|
16
|
+
from typing import cast
|
|
17
|
+
|
|
18
|
+
from ...types import JSONDict
|
|
19
|
+
from ...types import JSONRecords
|
|
20
|
+
from ..rate_limiting import RateLimiter
|
|
21
|
+
from ..types import FetchPageCallable
|
|
22
|
+
from ..types import RequestOptions
|
|
23
|
+
from ..types import Url
|
|
24
|
+
from .config import PaginationConfig
|
|
25
|
+
from .config import PaginationInput
|
|
26
|
+
from .config import PaginationType
|
|
27
|
+
from .paginator import Paginator
|
|
28
|
+
|
|
29
|
+
# SECTION: EXPORTS ========================================================== #
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
__all__ = [
|
|
33
|
+
# Classes
|
|
34
|
+
'PaginationClient',
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# SECTION: CLASSES ========================================================== #
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass(slots=True, kw_only=True)
|
|
42
|
+
class PaginationClient:
|
|
43
|
+
"""
|
|
44
|
+
Drive :class:`Paginator` instances with shared guardrails.
|
|
45
|
+
|
|
46
|
+
Parameters
|
|
47
|
+
----------
|
|
48
|
+
pagination : PaginationInput
|
|
49
|
+
Pagination configuration mapping or :class:`PaginationConfig`.
|
|
50
|
+
fetch : FetchPageCallable
|
|
51
|
+
Callback used to fetch a single page.
|
|
52
|
+
rate_limiter : RateLimiter | None, optional
|
|
53
|
+
Optional limiter invoked between page fetches.
|
|
54
|
+
|
|
55
|
+
Attributes
|
|
56
|
+
----------
|
|
57
|
+
pagination : PaginationInput
|
|
58
|
+
Resolved pagination configuration.
|
|
59
|
+
fetch : FetchPageCallable
|
|
60
|
+
Stored fetch callback invoked by ``Paginator``.
|
|
61
|
+
rate_limiter : RateLimiter | None
|
|
62
|
+
Limiter applied between requests when configured.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
# -- Attributes -- #
|
|
66
|
+
|
|
67
|
+
pagination: PaginationInput
|
|
68
|
+
fetch: FetchPageCallable
|
|
69
|
+
rate_limiter: RateLimiter | None = None
|
|
70
|
+
|
|
71
|
+
# -- Properties -- #
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def is_paginated(self) -> bool:
|
|
75
|
+
"""Return ``True`` when a known pagination type is configured."""
|
|
76
|
+
return self.pagination_type is not None
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def pagination_type(self) -> PaginationType | None:
|
|
80
|
+
"""Return the normalized pagination type when available."""
|
|
81
|
+
if isinstance(self.pagination, PaginationConfig):
|
|
82
|
+
return self.pagination.type
|
|
83
|
+
return Paginator.detect_type(
|
|
84
|
+
cast(Mapping[str, Any] | None, self.pagination),
|
|
85
|
+
default=None,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# -- Instance Methods -- #
|
|
89
|
+
|
|
90
|
+
def collect(
|
|
91
|
+
self,
|
|
92
|
+
url: Url,
|
|
93
|
+
*,
|
|
94
|
+
request: RequestOptions | None = None,
|
|
95
|
+
) -> JSONRecords:
|
|
96
|
+
"""
|
|
97
|
+
Collect records across pages into a list.
|
|
98
|
+
|
|
99
|
+
Parameters
|
|
100
|
+
----------
|
|
101
|
+
url : Url
|
|
102
|
+
Base URL to fetch pages from.
|
|
103
|
+
request : RequestOptions | None, optional
|
|
104
|
+
Snapshot of request metadata (params/headers/timeout) to clone
|
|
105
|
+
for this invocation.
|
|
106
|
+
|
|
107
|
+
Returns
|
|
108
|
+
-------
|
|
109
|
+
JSONRecords
|
|
110
|
+
List of JSON records.
|
|
111
|
+
"""
|
|
112
|
+
return list(self.iterate(url, request=request))
|
|
113
|
+
|
|
114
|
+
def iterate(
|
|
115
|
+
self,
|
|
116
|
+
url: Url,
|
|
117
|
+
*,
|
|
118
|
+
request: RequestOptions | None = None,
|
|
119
|
+
) -> Generator[JSONDict]:
|
|
120
|
+
"""
|
|
121
|
+
Yield records for the configured pagination strategy.
|
|
122
|
+
|
|
123
|
+
Parameters
|
|
124
|
+
----------
|
|
125
|
+
url : Url
|
|
126
|
+
Base URL to fetch pages from.
|
|
127
|
+
request : RequestOptions | None, optional
|
|
128
|
+
Snapshot of request metadata (params/headers/timeout) to clone
|
|
129
|
+
for this invocation.
|
|
130
|
+
|
|
131
|
+
Yields
|
|
132
|
+
------
|
|
133
|
+
Generator[JSONDict]
|
|
134
|
+
Iterator over JSON records from one or more pages.
|
|
135
|
+
"""
|
|
136
|
+
effective_request = request or RequestOptions()
|
|
137
|
+
|
|
138
|
+
if not self.is_paginated:
|
|
139
|
+
yield from self._iterate_single_page(url, effective_request)
|
|
140
|
+
return
|
|
141
|
+
|
|
142
|
+
paginator = Paginator.from_config(
|
|
143
|
+
cast(PaginationInput, self.pagination),
|
|
144
|
+
fetch=self.fetch,
|
|
145
|
+
rate_limiter=self.rate_limiter,
|
|
146
|
+
)
|
|
147
|
+
yield from paginator.paginate_iter(
|
|
148
|
+
url,
|
|
149
|
+
request=effective_request,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
# -- Internal Instance Methods -- #
|
|
153
|
+
|
|
154
|
+
def _iterate_single_page(
|
|
155
|
+
self,
|
|
156
|
+
url: Url,
|
|
157
|
+
request: RequestOptions,
|
|
158
|
+
) -> Generator[JSONDict]:
|
|
159
|
+
"""
|
|
160
|
+
Iterate records for non-paginated responses.
|
|
161
|
+
|
|
162
|
+
Parameters
|
|
163
|
+
----------
|
|
164
|
+
url : Url
|
|
165
|
+
Base URL to fetch pages from.
|
|
166
|
+
request : RequestOptions
|
|
167
|
+
Request metadata to forward to the fetch callback.
|
|
168
|
+
|
|
169
|
+
Yields
|
|
170
|
+
------
|
|
171
|
+
Generator[JSONDict]
|
|
172
|
+
JSON records from the response.
|
|
173
|
+
"""
|
|
174
|
+
pg_records_path: str | None
|
|
175
|
+
pg_fallback_path: str | None
|
|
176
|
+
if isinstance(self.pagination, Mapping):
|
|
177
|
+
pg = cast(Mapping[str, Any], self.pagination)
|
|
178
|
+
pg_records_path = cast(str | None, pg.get('records_path'))
|
|
179
|
+
pg_fallback_path = cast(str | None, pg.get('fallback_path'))
|
|
180
|
+
else:
|
|
181
|
+
pg_records_path = getattr(self.pagination, 'records_path', None)
|
|
182
|
+
pg_fallback_path = getattr(self.pagination, 'fallback_path', None)
|
|
183
|
+
page_data = self.fetch(url, request, None)
|
|
184
|
+
yield from Paginator.coalesce_records(
|
|
185
|
+
page_data,
|
|
186
|
+
pg_records_path,
|
|
187
|
+
pg_fallback_path,
|
|
188
|
+
)
|