etlplus 0.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. etlplus/__init__.py +43 -0
  2. etlplus/__main__.py +22 -0
  3. etlplus/__version__.py +14 -0
  4. etlplus/api/README.md +237 -0
  5. etlplus/api/__init__.py +136 -0
  6. etlplus/api/auth.py +432 -0
  7. etlplus/api/config.py +633 -0
  8. etlplus/api/endpoint_client.py +885 -0
  9. etlplus/api/errors.py +170 -0
  10. etlplus/api/pagination/__init__.py +47 -0
  11. etlplus/api/pagination/client.py +188 -0
  12. etlplus/api/pagination/config.py +440 -0
  13. etlplus/api/pagination/paginator.py +775 -0
  14. etlplus/api/rate_limiting/__init__.py +38 -0
  15. etlplus/api/rate_limiting/config.py +343 -0
  16. etlplus/api/rate_limiting/rate_limiter.py +266 -0
  17. etlplus/api/request_manager.py +589 -0
  18. etlplus/api/retry_manager.py +430 -0
  19. etlplus/api/transport.py +325 -0
  20. etlplus/api/types.py +172 -0
  21. etlplus/cli/__init__.py +15 -0
  22. etlplus/cli/app.py +1367 -0
  23. etlplus/cli/handlers.py +775 -0
  24. etlplus/cli/main.py +616 -0
  25. etlplus/config/__init__.py +56 -0
  26. etlplus/config/connector.py +372 -0
  27. etlplus/config/jobs.py +311 -0
  28. etlplus/config/pipeline.py +339 -0
  29. etlplus/config/profile.py +78 -0
  30. etlplus/config/types.py +204 -0
  31. etlplus/config/utils.py +120 -0
  32. etlplus/ddl.py +197 -0
  33. etlplus/enums.py +414 -0
  34. etlplus/extract.py +218 -0
  35. etlplus/file.py +657 -0
  36. etlplus/load.py +336 -0
  37. etlplus/mixins.py +62 -0
  38. etlplus/py.typed +0 -0
  39. etlplus/run.py +368 -0
  40. etlplus/run_helpers.py +843 -0
  41. etlplus/templates/__init__.py +5 -0
  42. etlplus/templates/ddl.sql.j2 +128 -0
  43. etlplus/templates/view.sql.j2 +69 -0
  44. etlplus/transform.py +1049 -0
  45. etlplus/types.py +227 -0
  46. etlplus/utils.py +638 -0
  47. etlplus/validate.py +493 -0
  48. etlplus/validation/__init__.py +44 -0
  49. etlplus/validation/utils.py +389 -0
  50. etlplus-0.5.4.dist-info/METADATA +616 -0
  51. etlplus-0.5.4.dist-info/RECORD +55 -0
  52. etlplus-0.5.4.dist-info/WHEEL +5 -0
  53. etlplus-0.5.4.dist-info/entry_points.txt +2 -0
  54. etlplus-0.5.4.dist-info/licenses/LICENSE +21 -0
  55. etlplus-0.5.4.dist-info/top_level.txt +1 -0
etlplus/__init__.py ADDED
@@ -0,0 +1,43 @@
1
+ """
2
+ :mod:`etlplus` package.
3
+
4
+ Top-level facade for the ETLPlus toolkit.
5
+
6
+ Importing :mod:`etlplus` exposes the handful of coarse-grained helpers most
7
+ users care about: ``extract``, ``transform``, ``load``, ``validate``, and
8
+ ``run``. Each helper delegates to the richer modules under ``etlplus.*`` while
9
+ presenting a compact public API surface.
10
+
11
+ Examples
12
+ --------
13
+ >>> from etlplus import extract, transform
14
+ >>> raw = extract('file', 'input.json')
15
+ >>> curated = transform(raw, {'select': ['id', 'name']})
16
+
17
+ See Also
18
+ --------
19
+ - :mod:`etlplus.cli` for the command-line interface
20
+ - :mod:`etlplus.run` for orchestrating pipeline jobs
21
+ """
22
+
23
+ from .__version__ import __version__
24
+
25
+ __author__ = 'ETLPlus Team'
26
+
27
+ from .extract import extract
28
+ from .load import load
29
+ from .run import run
30
+ from .transform import transform
31
+ from .validate import validate
32
+
33
+ # SECTION: EXPORTS ========================================================== #
34
+
35
+
36
+ __all__ = [
37
+ '__version__',
38
+ 'extract',
39
+ 'load',
40
+ 'run',
41
+ 'transform',
42
+ 'validate',
43
+ ]
etlplus/__main__.py ADDED
@@ -0,0 +1,22 @@
1
+ """
2
+ :mod:`etlplus.__main__` module.
3
+
4
+ Thin wrapper supporting `python -m etlplus` by delegating to the CLI
5
+ entrypoint.
6
+ """
7
+
8
+ from .cli import main
9
+
10
+ # SECTION: INTERNAL FUNCTIONS =============================================== #
11
+
12
+
13
+ def _run() -> int:
14
+ """Return the exit status."""
15
+ return main()
16
+
17
+
18
+ # SECTION: MAIN EXECUTION =================================================== #
19
+
20
+
21
+ if __name__ == '__main__': # pragma: no cover - exercised via CLI
22
+ raise SystemExit(_run())
etlplus/__version__.py ADDED
@@ -0,0 +1,14 @@
1
+ """
2
+ etlplus.__version__ module.
3
+
4
+ Expose the installed ETLPlus version.
5
+ """
6
+
7
+ from importlib import metadata as _metadata
8
+
9
+ try:
10
+ __version__ = _metadata.version('etlplus')
11
+ except _metadata.PackageNotFoundError:
12
+ # Local editable installs without metadata fallback to an obvious
13
+ # placeholder.
14
+ __version__ = '0.0.0'
etlplus/api/README.md ADDED
@@ -0,0 +1,237 @@
1
+ # etlplus.api module.
2
+
3
+ Focused documentation for the `etlplus.api` subpackage: a lightweight HTTP client and helpers for
4
+ paginated REST endpoints.
5
+
6
+ - Provides a small `EndpointClient` for calling JSON APIs
7
+ - Supports page-, offset-, and cursor-based pagination via `PaginationConfig`
8
+ - Simple bearer-auth credentials via `EndpointCredentialsBearer`
9
+ - Convenience helpers to extract records from nested JSON payloads
10
+ - Returns the shared `JSONRecords` alias (a list of `JSONDict`) for paginated responses, matching
11
+ the rest of the library.
12
+
13
+ Back to project overview: see the top-level [README](../../README.md).
14
+
15
+ ## Installation
16
+
17
+ `etlplus.api` ships as part of the `etlplus` package. Install the package as usual:
18
+
19
+ ```bash
20
+ pip install etlplus
21
+ # or for development
22
+ pip install -e ".[dev]"
23
+ ```
24
+
25
+ ## Quickstart
26
+
27
+ ```python
28
+ import requests
29
+ from etlplus.api import (
30
+ EndpointClient,
31
+ EndpointCredentialsBearer,
32
+ JSONRecords,
33
+ )
34
+
35
+ auth = EndpointCredentialsBearer(
36
+ token_url="https://auth.example.com/oauth2/token",
37
+ client_id="CLIENT_ID",
38
+ client_secret="CLIENT_SECRET",
39
+ scope="read:items",
40
+ )
41
+
42
+ session = requests.Session()
43
+ session.auth = auth
44
+
45
+ client = EndpointClient(
46
+ base_url="https://api.example.com/v1",
47
+ endpoints={
48
+ "list": "/items", # you can add more named endpoints here
49
+ },
50
+ retry={"max_attempts": 4, "backoff": 0.5},
51
+ retry_network_errors=True,
52
+ session=session,
53
+ )
54
+
55
+ # Page-based pagination
56
+ pg: PaginationConfig = {"type": "page", "page_size": 100}
57
+ rows: JSONRecords = client.paginate("list", pagination=pg)
58
+ for row in rows:
59
+ print(row)
60
+ ```
61
+
62
+ ### Overriding rate limits per call
63
+
64
+ When a client is constructed with ``rate_limit`` metadata you can still tweak the pacing for
65
+ individual calls by passing ``rate_limit_overrides`` to ``paginate``/``paginate_iter``. The
66
+ overrides share the same shape as the base configuration and take precedence over the client
67
+ defaults.
68
+
69
+ ```python
70
+ client = EndpointClient(
71
+ base_url="https://api.example.com/v1",
72
+ endpoints={"list": "/items"},
73
+ rate_limit={"max_per_sec": 2}, # ~0.5s between calls when unspecified
74
+ )
75
+
76
+ rows = client.paginate(
77
+ "list",
78
+ pagination={"type": "page", "page_size": 100},
79
+ rate_limit_overrides={"sleep_seconds": 0.1}, # per-call override
80
+ )
81
+ ```
82
+
83
+ Precedence is ``overrides.sleep_seconds`` > ``overrides.max_per_sec`` > the same keys from
84
+ ``client.rate_limit``. When no override is supplied the base settings are used.
85
+
86
+ ## Choosing `records_path` and `cursor_path`
87
+
88
+ If the API responds like this:
89
+
90
+ ```json
91
+ {
92
+ "data": {
93
+ "items": [{"id": 1}, {"id": 2}],
94
+ "nextCursor": "abc123"
95
+ }
96
+ }
97
+ ```
98
+
99
+ - `records_path` should be `data.items`
100
+ - `cursor_path` should be `data.nextCursor`
101
+
102
+ If the response is a list at the top level, you can omit `records_path`.
103
+
104
+ ## Cursor-based pagination example
105
+
106
+ ```python
107
+ from etlplus.api import EndpointClient, PaginationConfig, JSONRecords
108
+
109
+ client = EndpointClient(
110
+ base_url="https://api.example.com/v1",
111
+ endpoints={"list": "/items"},
112
+ )
113
+
114
+ pg: PaginationConfig = {
115
+ "type": "cursor",
116
+ # Where records live in the JSON payload (dot path or top-level key)
117
+ "records_path": "data.items",
118
+ # Query parameter name that carries the cursor
119
+ "cursor_param": "cursor",
120
+ # Dot path in the response JSON that holds the next cursor value
121
+ "cursor_path": "data.nextCursor",
122
+ # Optional: limit per page
123
+ "page_size": 100,
124
+ # Optional: start from a specific cursor value
125
+ # "start_cursor": "abc123",
126
+ }
127
+
128
+ rows: JSONRecords = client.paginate("list", pagination=pg)
129
+ for row in rows:
130
+ process(row)
131
+ ```
132
+
133
+ ## Offset-based pagination example
134
+
135
+ ```python
136
+ from etlplus.api import EndpointClient, PaginationConfig
137
+
138
+ client = EndpointClient(
139
+ base_url="https://api.example.com/v1",
140
+ endpoints={"list": "/items"},
141
+ )
142
+
143
+ pg: PaginationConfig = {
144
+ "type": "offset",
145
+ # Key holding the offset value on each request
146
+ "page_param": "offset",
147
+ # Key holding the page size (limit) on each request
148
+ "size_param": "limit",
149
+ # Starting offset (0 is common for offset-based APIs)
150
+ "start_page": 0,
151
+ # Number of records per page
152
+ "page_size": 100,
153
+ # Optional: where records live in the JSON payload
154
+ # "records_path": "data.items",
155
+ # Optional caps
156
+ # "max_records": 1000,
157
+ }
158
+
159
+ rows = client.paginate("list", pagination=pg)
160
+ for row in rows:
161
+ process(row)
162
+ ```
163
+
164
+ ## Authentication
165
+
166
+ Use bearer tokens with `EndpointCredentialsBearer` (OAuth2 client credentials flow). Attach it to a
167
+ `requests.Session` and pass that session to the client:
168
+
169
+ ```python
170
+ import requests
171
+ from etlplus.api import EndpointClient, EndpointCredentialsBearer
172
+
173
+ auth = EndpointCredentialsBearer(
174
+ token_url="https://auth.example.com/oauth2/token",
175
+ client_id="CLIENT_ID",
176
+ client_secret="CLIENT_SECRET",
177
+ scope="read:items",
178
+ )
179
+
180
+ session = requests.Session()
181
+ session.auth = auth
182
+
183
+ client = EndpointClient(
184
+ base_url="https://api.example.com/v1",
185
+ endpoints={"list": "/items"},
186
+ session=session,
187
+ )
188
+ ```
189
+
190
+ `EndpointCredentialsBearer` refreshes tokens automatically, applies a 15-second default timeout
191
+ (`DEFAULT_TOKEN_TIMEOUT`), and omits the optional `scope` field when not provided so identity
192
+ providers can fall back to their own defaults. If you already possess a static token, attach it to a
193
+ `requests.Session` manually rather than instantiating `EndpointCredentialsBearer`.
194
+
195
+ ## Errors and rate limiting
196
+
197
+ - Errors: `ApiRequestError`, `ApiAuthError`, and `PaginationError` (in `etlplus/api/errors.py`)
198
+ include an `as_dict()` helper for structured logs.
199
+ - Rate limiting: `RateLimiter` and its `resolve_sleep_seconds` helper (in
200
+ `etlplus/api/rate_limiter.py`) derives fixed sleeps or `max_per_sec` windows. The paginator now
201
+ builds a `RateLimiter` whenever the effective delay comes from
202
+ `rate_limit`/`rate_limit_overrides`, so each page fetch sleeps before making another HTTP call.
203
+ Passing `rate_limit_overrides` to `paginate*` lets you momentarily speed up or slow down a single
204
+ request without mutating the client-wide defaults.
205
+
206
+ ## Types and transport
207
+
208
+ - Types: pagination config helpers live in `etlplus/api/paginator.py`; retry helpers (including
209
+ `RetryPolicy`) live in `etlplus/api/retry_manager.py`; rate-limit helpers live in
210
+ `etlplus/api/rate_limiter.py`. These are all re-exported from `etlplus.api` for convenience.
211
+ - Transport/session: `etlplus/api/transport.py` contains the HTTP adapter helpers and
212
+ `etlplus/api/request_manager.py` wraps `requests` sessions plus retry orchestration. Advanced
213
+ users may consult those modules to adapt behavior.
214
+
215
+ ## Supporting modules
216
+
217
+ - `etlplus.api.types` collects friendly aliases such as `Headers`, `Params`, `Url`, and
218
+ `RateLimitOverrides` (whose values accept numeric override inputs) so endpoint helpers share the
219
+ same type vocabulary.
220
+ - `etlplus.utils` exposes lightweight helpers used across the project, including CLI-friendly
221
+ functions like `json_type`/`print_json` plus numeric coercion utilities (`to_float`,
222
+ `to_positive_int`, etc.).
223
+
224
+ ## Minimal contract
225
+
226
+ - Inputs
227
+ - `base_url: str`, `endpoints: dict[str, str]`
228
+ - optional `credentials`
229
+ - `pagination: PaginationConfig` for `paginate()`
230
+ - Outputs
231
+ - `paginate(name, ...)` yields an iterator of JSON-like rows
232
+ - Errors
233
+ - Network/HTTP errors raise exceptions; consult `errors.py`
234
+
235
+ ## See also
236
+
237
+ - Top-level CLI and library usage in the main [README](../../README.md)
@@ -0,0 +1,136 @@
1
+ """
2
+ :mod:`etlplus.api` package.
3
+
4
+ High-level helpers for building REST API clients with pagination, retry,
5
+ rate limiting, and transport configuration.
6
+
7
+ Summary
8
+ -------
9
+ Use :class:`etlplus.api.EndpointClient` to register relative endpoint paths
10
+ under a base URL and paginate responses. The client can apply rate limits
11
+ between requests and perform exponential-backoff retries with full jitter.
12
+
13
+ Examples
14
+ --------
15
+ Page-based pagination
16
+ ^^^^^^^^^^^^^^^^^^^^^
17
+ >>> from etlplus.api import EndpointClient
18
+ >>> client = EndpointClient(
19
+ ... base_url="https://api.example.com/v1",
20
+ ... endpoints={"list_users": "/users"},
21
+ ... )
22
+ >>> page_cfg = {
23
+ ... "type": "page", # or "offset"
24
+ ... "records_path": "data.items", # dotted path into payload
25
+ ... "page_param": "page",
26
+ ... "size_param": "per_page",
27
+ ... "start_page": 1,
28
+ ... "page_size": 100,
29
+ ... }
30
+ >>> rows = client.paginate(
31
+ ... "list_users",
32
+ ... query_parameters={"active": "true"},
33
+ ... pagination=page_cfg,
34
+ ... )
35
+
36
+ Retries and network errors
37
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
38
+ >>> client = EndpointClient(
39
+ ... base_url="https://api.example.com/v1",
40
+ ... endpoints={"list": "/items"},
41
+ ... retry={"max_attempts": 5, "backoff": 0.5, "retry_on": [429, 503]},
42
+ ... retry_network_errors=True,
43
+ ... )
44
+ >>> items = client.paginate(
45
+ ... "list", pagination={"type": "page", "page_size": 50}
46
+ ... )
47
+
48
+ Absolute URLs
49
+ ^^^^^^^^^^^^^
50
+ Use :meth:`EndpointClient.paginate_url` for an already composed absolute URL.
51
+ It accepts the same pagination config and returns either the raw JSON object
52
+ (no pagination) or a list of record dicts aggregated across pages.
53
+
54
+ Notes
55
+ -----
56
+ - ``EndpointClient.endpoints`` is read-only at runtime.
57
+ - Pagination defaults are centralized on the client (``page``, ``per_page``,
58
+ ``cursor``, ``limit``; start page ``1``; page size ``100``).
59
+ - Retries are opt-in via the ``retry`` parameter; backoff uses jitter.
60
+ - Use ``retry_network_errors=True`` to also retry timeouts/connection errors.
61
+ - Prefer :data:`JSONRecords` (list of :data:`JSONDict`) for paginated
62
+ responses; scalar/record aliases are exported for convenience.
63
+ - The underlying :class:`Paginator` is exported for advanced scenarios that
64
+ need to stream pages manually.
65
+
66
+ See Also
67
+ --------
68
+ - :mod:`etlplus.api.rate_limiting` for rate-limit helpers and config shapes
69
+ - :mod:`etlplus.api.pagination` for pagination helpers and config shapes
70
+ - :mod:`etlplus.api.retry_manager` for retry policies
71
+ - :mod:`etlplus.api.transport` for HTTPAdapter helpers
72
+ """
73
+
74
+ from __future__ import annotations
75
+
76
+ from .auth import EndpointCredentialsBearer
77
+ from .config import ApiConfig
78
+ from .config import ApiProfileConfig
79
+ from .config import EndpointConfig
80
+ from .endpoint_client import EndpointClient
81
+ from .pagination import CursorPaginationConfigMap
82
+ from .pagination import PagePaginationConfigMap
83
+ from .pagination import PaginationClient
84
+ from .pagination import PaginationConfig
85
+ from .pagination import PaginationConfigMap
86
+ from .pagination import PaginationType
87
+ from .pagination import Paginator
88
+ from .rate_limiting import RateLimitConfig
89
+ from .rate_limiting import RateLimitConfigMap
90
+ from .rate_limiting import RateLimiter
91
+ from .retry_manager import RetryManager
92
+ from .retry_manager import RetryPolicy
93
+ from .retry_manager import RetryStrategy
94
+ from .transport import HTTPAdapterMountConfig
95
+ from .transport import HTTPAdapterRetryConfig
96
+ from .transport import build_http_adapter
97
+ from .types import Headers
98
+ from .types import Params
99
+ from .types import RequestOptions
100
+ from .types import Url
101
+
102
+ # SECTION: EXPORTS ========================================================== #
103
+
104
+
105
+ __all__ = [
106
+ # Classes
107
+ 'EndpointClient',
108
+ 'EndpointCredentialsBearer',
109
+ 'Paginator',
110
+ 'RateLimiter',
111
+ 'RetryManager',
112
+ # Data Classes
113
+ 'ApiConfig',
114
+ 'ApiProfileConfig',
115
+ 'EndpointConfig',
116
+ 'PaginationClient',
117
+ 'PaginationConfig',
118
+ 'RateLimitConfig',
119
+ 'RequestOptions',
120
+ 'RetryStrategy',
121
+ # Enums
122
+ 'PaginationType',
123
+ # Functions
124
+ 'build_http_adapter',
125
+ # Type Aliases
126
+ 'CursorPaginationConfigMap',
127
+ 'Headers',
128
+ 'HTTPAdapterMountConfig',
129
+ 'HTTPAdapterRetryConfig',
130
+ 'PagePaginationConfigMap',
131
+ 'PaginationConfigMap',
132
+ 'Params',
133
+ 'RateLimitConfigMap',
134
+ 'RetryPolicy',
135
+ 'Url',
136
+ ]