etlplus 0.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. etlplus/__init__.py +43 -0
  2. etlplus/__main__.py +22 -0
  3. etlplus/__version__.py +14 -0
  4. etlplus/api/README.md +237 -0
  5. etlplus/api/__init__.py +136 -0
  6. etlplus/api/auth.py +432 -0
  7. etlplus/api/config.py +633 -0
  8. etlplus/api/endpoint_client.py +885 -0
  9. etlplus/api/errors.py +170 -0
  10. etlplus/api/pagination/__init__.py +47 -0
  11. etlplus/api/pagination/client.py +188 -0
  12. etlplus/api/pagination/config.py +440 -0
  13. etlplus/api/pagination/paginator.py +775 -0
  14. etlplus/api/rate_limiting/__init__.py +38 -0
  15. etlplus/api/rate_limiting/config.py +343 -0
  16. etlplus/api/rate_limiting/rate_limiter.py +266 -0
  17. etlplus/api/request_manager.py +589 -0
  18. etlplus/api/retry_manager.py +430 -0
  19. etlplus/api/transport.py +325 -0
  20. etlplus/api/types.py +172 -0
  21. etlplus/cli/__init__.py +15 -0
  22. etlplus/cli/app.py +1367 -0
  23. etlplus/cli/handlers.py +775 -0
  24. etlplus/cli/main.py +616 -0
  25. etlplus/config/__init__.py +56 -0
  26. etlplus/config/connector.py +372 -0
  27. etlplus/config/jobs.py +311 -0
  28. etlplus/config/pipeline.py +339 -0
  29. etlplus/config/profile.py +78 -0
  30. etlplus/config/types.py +204 -0
  31. etlplus/config/utils.py +120 -0
  32. etlplus/ddl.py +197 -0
  33. etlplus/enums.py +414 -0
  34. etlplus/extract.py +218 -0
  35. etlplus/file.py +657 -0
  36. etlplus/load.py +336 -0
  37. etlplus/mixins.py +62 -0
  38. etlplus/py.typed +0 -0
  39. etlplus/run.py +368 -0
  40. etlplus/run_helpers.py +843 -0
  41. etlplus/templates/__init__.py +5 -0
  42. etlplus/templates/ddl.sql.j2 +128 -0
  43. etlplus/templates/view.sql.j2 +69 -0
  44. etlplus/transform.py +1049 -0
  45. etlplus/types.py +227 -0
  46. etlplus/utils.py +638 -0
  47. etlplus/validate.py +493 -0
  48. etlplus/validation/__init__.py +44 -0
  49. etlplus/validation/utils.py +389 -0
  50. etlplus-0.5.4.dist-info/METADATA +616 -0
  51. etlplus-0.5.4.dist-info/RECORD +55 -0
  52. etlplus-0.5.4.dist-info/WHEEL +5 -0
  53. etlplus-0.5.4.dist-info/entry_points.txt +2 -0
  54. etlplus-0.5.4.dist-info/licenses/LICENSE +21 -0
  55. etlplus-0.5.4.dist-info/top_level.txt +1 -0
etlplus/api/errors.py ADDED
@@ -0,0 +1,170 @@
1
+ """
2
+ :mod:`etlplus.api.errors` module.
3
+
4
+ Exception types with rich context for debugging REST API failures.
5
+
6
+ Summary
7
+ -------
8
+ Provides subclasses for request errors (``ApiRequestError``), auth failures
9
+ (``ApiAuthError``), and pagination errors with page context
10
+ (``PaginationError``).
11
+
12
+ Examples
13
+ --------
14
+ >>> try:
15
+ ... client.paginate("list", pagination={"type": "page", "page_size": 50})
16
+ ... except ApiAuthError as e:
17
+ ... print("auth failed", e.status)
18
+ ... except PaginationError as e:
19
+ ... print("page:", e.page, "attempts:", e.attempts)
20
+ ... except ApiRequestError as e:
21
+ ... print("request failed", e.url)
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ from dataclasses import dataclass
27
+ from typing import TYPE_CHECKING
28
+ from typing import Any
29
+
30
+ import requests # type: ignore[import]
31
+
32
+ if TYPE_CHECKING: # pragma: no cover - typing only
33
+ from .retry_manager import RetryPolicy
34
+
35
+
36
+ # SECTION: EXPORTS ========================================================== #
37
+
38
+
39
+ __all__ = ['ApiAuthError', 'ApiRequestError', 'PaginationError']
40
+
41
+
42
+ # SECTION: CLASSES ========================================================== #
43
+
44
+
45
+ @dataclass(slots=True, kw_only=True)
46
+ class ApiRequestError(requests.RequestException):
47
+ """
48
+ Base error for API request failures with rich context.
49
+
50
+ Parameters
51
+ ----------
52
+ url : str
53
+ Absolute URL that was requested.
54
+ status : int | None, optional
55
+ HTTP status code when available.
56
+ attempts : int, optional
57
+ Number of attempts performed (defaults to ``1``).
58
+ retried : bool, optional
59
+ Whether any retry attempts were made.
60
+ retry_policy : RetryPolicy | None, optional
61
+ The retry policy in effect, if any.
62
+ cause : Exception | None, optional
63
+ Original underlying exception.
64
+
65
+ Attributes
66
+ ----------
67
+ url : str
68
+ Absolute URL that was requested.
69
+ status : int | None
70
+ HTTP status code when available.
71
+ attempts : int
72
+ Number of attempts performed.
73
+ retried : bool
74
+ Whether any retry attempts were made.
75
+ retry_policy : RetryPolicy | None
76
+ The retry policy in effect, if any.
77
+ cause : Exception | None
78
+ Original underlying exception.
79
+
80
+ Examples
81
+ --------
82
+ >>> try:
83
+ ... raise ApiRequestError(url="https://api.example.com/x", status=500)
84
+ ... except ApiRequestError as e:
85
+ ... print(e.status, e.attempts)
86
+ 500 1
87
+
88
+ Notes
89
+ -----
90
+ The :meth:`as_dict` helper returns a structured payload suitable for
91
+ structured logging or telemetry.
92
+ """
93
+
94
+ # -- Attributes -- #
95
+
96
+ url: str
97
+ status: int | None = None
98
+ attempts: int = 1
99
+ retried: bool = False
100
+ retry_policy: RetryPolicy | None = None
101
+ cause: Exception | None = None
102
+
103
+ # -- Magic Methods (Object Representation) -- #
104
+
105
+ def __str__(self) -> str: # pragma: no cover - formatting only
106
+ base = f'request failed url={self.url!r} status={self.status}'
107
+ meta = f' attempts={self.attempts} retried={self.retried}'
108
+
109
+ return f'ApiRequestError({base}{meta})'
110
+
111
+ # -- Instance Methods -- #
112
+
113
+ def as_dict(self) -> dict[str, Any]:
114
+ """Return structured error context for logging or telemetry."""
115
+ return {
116
+ 'url': self.url,
117
+ 'status': self.status,
118
+ 'attempts': self.attempts,
119
+ 'retried': self.retried,
120
+ 'retry_policy': self.retry_policy,
121
+ 'cause': repr(self.cause) if self.cause else None,
122
+ }
123
+
124
+
125
+ class ApiAuthError(ApiRequestError):
126
+ """Authentication/authorization failure (e.g., 401/403)."""
127
+
128
+
129
+ @dataclass(slots=True, kw_only=True)
130
+ class PaginationError(ApiRequestError):
131
+ """
132
+ Error raised during pagination with page context.
133
+
134
+ Parameters
135
+ ----------
136
+ page : int | None, optional
137
+ Page number (1-based) or request count when applicable.
138
+ **kwargs
139
+ Remaining keyword arguments forwarded to ``ApiRequestError``.
140
+
141
+ Attributes
142
+ ----------
143
+ page : int | None
144
+ Stored page number.
145
+
146
+ Examples
147
+ --------
148
+ >>> err = PaginationError(url="u", status=400, page=3)
149
+ >>> str(err).startswith("PaginationError(")
150
+ True
151
+ """
152
+
153
+ # -- Attributes -- #
154
+
155
+ page: int | None = None
156
+
157
+ # -- Magic Methods (Object Representation) -- #
158
+
159
+ def __str__(self) -> str: # pragma: no cover - formatting only
160
+ base = super().__str__()
161
+
162
+ return f'PaginationError({base} page={self.page})'
163
+
164
+ # -- Instance Methods -- #
165
+
166
+ def as_dict(self) -> dict[str, Any]:
167
+ """Extend base context with pagination metadata."""
168
+ payload = super().as_dict()
169
+ payload['page'] = self.page
170
+ return payload
@@ -0,0 +1,47 @@
1
+ """
2
+ :mod:`etlplus.api.pagination` package.
3
+
4
+ Pagination configuration and runtime helpers for REST API responses.
5
+
6
+ This package groups configuration shapes, paginator utilities, and a
7
+ client-facing driver for traversing page-, offset-, and cursor-based JSON
8
+ responses.
9
+
10
+ Notes
11
+ -----
12
+ - Pagination defaults are centralized on :class:`EndpointClient` (``page``,
13
+ ``per_page``, ``cursor``, ``limit``; start page ``1``; page size ``100``).
14
+ - Prefer :data:`JSONRecords` (list of :data:`JSONDict`) for paginated
15
+ responses; scalar/record aliases are exported for convenience.
16
+ - The underlying :class:`Paginator` is exported for advanced scenarios that
17
+ need to stream pages manually.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ from .client import PaginationClient
23
+ from .config import CursorPaginationConfigMap
24
+ from .config import PagePaginationConfigMap
25
+ from .config import PaginationConfig
26
+ from .config import PaginationConfigMap
27
+ from .config import PaginationInput
28
+ from .config import PaginationType
29
+ from .paginator import Paginator
30
+
31
+ # SECTION: EXPORTS ========================================================== #
32
+
33
+
34
+ __all__ = [
35
+ # Classes
36
+ 'PaginationClient',
37
+ 'Paginator',
38
+ # Data Classes
39
+ 'PaginationConfig',
40
+ # Enums
41
+ 'PaginationType',
42
+ # Type Aliases
43
+ 'CursorPaginationConfigMap',
44
+ 'PagePaginationConfigMap',
45
+ 'PaginationInput',
46
+ 'PaginationConfigMap',
47
+ ]
@@ -0,0 +1,188 @@
1
+ """
2
+ :mod:`etlplus.api.client` module.
3
+
4
+ Client-facing pagination driver for REST API responses.
5
+
6
+ This module wires pagination configuration, fetch callbacks, and optional rate
7
+ limiting into :class:`etlplus.api.pagination.Paginator` instances.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from collections.abc import Generator
13
+ from collections.abc import Mapping
14
+ from dataclasses import dataclass
15
+ from typing import Any
16
+ from typing import cast
17
+
18
+ from ...types import JSONDict
19
+ from ...types import JSONRecords
20
+ from ..rate_limiting import RateLimiter
21
+ from ..types import FetchPageCallable
22
+ from ..types import RequestOptions
23
+ from ..types import Url
24
+ from .config import PaginationConfig
25
+ from .config import PaginationInput
26
+ from .config import PaginationType
27
+ from .paginator import Paginator
28
+
29
+ # SECTION: EXPORTS ========================================================== #
30
+
31
+
32
+ __all__ = [
33
+ # Classes
34
+ 'PaginationClient',
35
+ ]
36
+
37
+
38
+ # SECTION: CLASSES ========================================================== #
39
+
40
+
41
+ @dataclass(slots=True, kw_only=True)
42
+ class PaginationClient:
43
+ """
44
+ Drive :class:`Paginator` instances with shared guardrails.
45
+
46
+ Parameters
47
+ ----------
48
+ pagination : PaginationInput
49
+ Pagination configuration mapping or :class:`PaginationConfig`.
50
+ fetch : FetchPageCallable
51
+ Callback used to fetch a single page.
52
+ rate_limiter : RateLimiter | None, optional
53
+ Optional limiter invoked between page fetches.
54
+
55
+ Attributes
56
+ ----------
57
+ pagination : PaginationInput
58
+ Resolved pagination configuration.
59
+ fetch : FetchPageCallable
60
+ Stored fetch callback invoked by ``Paginator``.
61
+ rate_limiter : RateLimiter | None
62
+ Limiter applied between requests when configured.
63
+ """
64
+
65
+ # -- Attributes -- #
66
+
67
+ pagination: PaginationInput
68
+ fetch: FetchPageCallable
69
+ rate_limiter: RateLimiter | None = None
70
+
71
+ # -- Properties -- #
72
+
73
+ @property
74
+ def is_paginated(self) -> bool:
75
+ """Return ``True`` when a known pagination type is configured."""
76
+ return self.pagination_type is not None
77
+
78
+ @property
79
+ def pagination_type(self) -> PaginationType | None:
80
+ """Return the normalized pagination type when available."""
81
+ if isinstance(self.pagination, PaginationConfig):
82
+ return self.pagination.type
83
+ return Paginator.detect_type(
84
+ cast(Mapping[str, Any] | None, self.pagination),
85
+ default=None,
86
+ )
87
+
88
+ # -- Instance Methods -- #
89
+
90
+ def collect(
91
+ self,
92
+ url: Url,
93
+ *,
94
+ request: RequestOptions | None = None,
95
+ ) -> JSONRecords:
96
+ """
97
+ Collect records across pages into a list.
98
+
99
+ Parameters
100
+ ----------
101
+ url : Url
102
+ Base URL to fetch pages from.
103
+ request : RequestOptions | None, optional
104
+ Snapshot of request metadata (params/headers/timeout) to clone
105
+ for this invocation.
106
+
107
+ Returns
108
+ -------
109
+ JSONRecords
110
+ List of JSON records.
111
+ """
112
+ return list(self.iterate(url, request=request))
113
+
114
+ def iterate(
115
+ self,
116
+ url: Url,
117
+ *,
118
+ request: RequestOptions | None = None,
119
+ ) -> Generator[JSONDict]:
120
+ """
121
+ Yield records for the configured pagination strategy.
122
+
123
+ Parameters
124
+ ----------
125
+ url : Url
126
+ Base URL to fetch pages from.
127
+ request : RequestOptions | None, optional
128
+ Snapshot of request metadata (params/headers/timeout) to clone
129
+ for this invocation.
130
+
131
+ Yields
132
+ ------
133
+ Generator[JSONDict]
134
+ Iterator over JSON records from one or more pages.
135
+ """
136
+ effective_request = request or RequestOptions()
137
+
138
+ if not self.is_paginated:
139
+ yield from self._iterate_single_page(url, effective_request)
140
+ return
141
+
142
+ paginator = Paginator.from_config(
143
+ cast(PaginationInput, self.pagination),
144
+ fetch=self.fetch,
145
+ rate_limiter=self.rate_limiter,
146
+ )
147
+ yield from paginator.paginate_iter(
148
+ url,
149
+ request=effective_request,
150
+ )
151
+
152
+ # -- Internal Instance Methods -- #
153
+
154
+ def _iterate_single_page(
155
+ self,
156
+ url: Url,
157
+ request: RequestOptions,
158
+ ) -> Generator[JSONDict]:
159
+ """
160
+ Iterate records for non-paginated responses.
161
+
162
+ Parameters
163
+ ----------
164
+ url : Url
165
+ Base URL to fetch pages from.
166
+ request : RequestOptions
167
+ Request metadata to forward to the fetch callback.
168
+
169
+ Yields
170
+ ------
171
+ Generator[JSONDict]
172
+ JSON records from the response.
173
+ """
174
+ pg_records_path: str | None
175
+ pg_fallback_path: str | None
176
+ if isinstance(self.pagination, Mapping):
177
+ pg = cast(Mapping[str, Any], self.pagination)
178
+ pg_records_path = cast(str | None, pg.get('records_path'))
179
+ pg_fallback_path = cast(str | None, pg.get('fallback_path'))
180
+ else:
181
+ pg_records_path = getattr(self.pagination, 'records_path', None)
182
+ pg_fallback_path = getattr(self.pagination, 'fallback_path', None)
183
+ page_data = self.fetch(url, request, None)
184
+ yield from Paginator.coalesce_records(
185
+ page_data,
186
+ pg_records_path,
187
+ pg_fallback_path,
188
+ )