etlplus 0.9.1__py3-none-any.whl → 0.9.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/README.md +37 -0
- etlplus/__init__.py +1 -26
- etlplus/api/README.md +51 -3
- etlplus/api/__init__.py +10 -0
- etlplus/api/config.py +39 -28
- etlplus/api/endpoint_client.py +3 -3
- etlplus/api/enums.py +51 -0
- etlplus/api/pagination/client.py +1 -1
- etlplus/api/rate_limiting/config.py +13 -1
- etlplus/api/rate_limiting/rate_limiter.py +8 -11
- etlplus/api/request_manager.py +11 -6
- etlplus/api/transport.py +14 -2
- etlplus/api/types.py +96 -6
- etlplus/{run_helpers.py → api/utils.py} +209 -153
- etlplus/cli/README.md +40 -0
- etlplus/cli/commands.py +76 -43
- etlplus/cli/constants.py +1 -1
- etlplus/cli/handlers.py +40 -12
- etlplus/cli/io.py +2 -2
- etlplus/cli/main.py +1 -1
- etlplus/cli/state.py +4 -7
- etlplus/database/README.md +48 -0
- etlplus/database/ddl.py +1 -1
- etlplus/database/engine.py +19 -3
- etlplus/database/orm.py +2 -0
- etlplus/database/schema.py +1 -1
- etlplus/enums.py +1 -157
- etlplus/file/README.md +105 -0
- etlplus/file/__init__.py +25 -0
- etlplus/file/_imports.py +141 -0
- etlplus/file/_io.py +160 -0
- etlplus/file/accdb.py +78 -0
- etlplus/file/arrow.py +78 -0
- etlplus/file/avro.py +176 -0
- etlplus/file/bson.py +77 -0
- etlplus/file/cbor.py +78 -0
- etlplus/file/cfg.py +79 -0
- etlplus/file/conf.py +80 -0
- etlplus/file/core.py +322 -0
- etlplus/file/csv.py +79 -0
- etlplus/file/dat.py +78 -0
- etlplus/file/dta.py +77 -0
- etlplus/file/duckdb.py +78 -0
- etlplus/file/enums.py +343 -0
- etlplus/file/feather.py +111 -0
- etlplus/file/fwf.py +77 -0
- etlplus/file/gz.py +123 -0
- etlplus/file/hbs.py +78 -0
- etlplus/file/hdf5.py +78 -0
- etlplus/file/ini.py +79 -0
- etlplus/file/ion.py +78 -0
- etlplus/file/jinja2.py +78 -0
- etlplus/file/json.py +98 -0
- etlplus/file/log.py +78 -0
- etlplus/file/mat.py +78 -0
- etlplus/file/mdb.py +78 -0
- etlplus/file/msgpack.py +78 -0
- etlplus/file/mustache.py +78 -0
- etlplus/file/nc.py +78 -0
- etlplus/file/ndjson.py +108 -0
- etlplus/file/numbers.py +75 -0
- etlplus/file/ods.py +79 -0
- etlplus/file/orc.py +111 -0
- etlplus/file/parquet.py +113 -0
- etlplus/file/pb.py +78 -0
- etlplus/file/pbf.py +77 -0
- etlplus/file/properties.py +78 -0
- etlplus/file/proto.py +77 -0
- etlplus/file/psv.py +79 -0
- etlplus/file/rda.py +78 -0
- etlplus/file/rds.py +78 -0
- etlplus/file/sas7bdat.py +78 -0
- etlplus/file/sav.py +77 -0
- etlplus/file/sqlite.py +78 -0
- etlplus/file/stub.py +84 -0
- etlplus/file/sylk.py +77 -0
- etlplus/file/tab.py +81 -0
- etlplus/file/toml.py +78 -0
- etlplus/file/tsv.py +80 -0
- etlplus/file/txt.py +102 -0
- etlplus/file/vm.py +78 -0
- etlplus/file/wks.py +77 -0
- etlplus/file/xls.py +88 -0
- etlplus/file/xlsm.py +79 -0
- etlplus/file/xlsx.py +99 -0
- etlplus/file/xml.py +185 -0
- etlplus/file/xpt.py +78 -0
- etlplus/file/yaml.py +95 -0
- etlplus/file/zip.py +175 -0
- etlplus/file/zsav.py +77 -0
- etlplus/ops/README.md +50 -0
- etlplus/ops/__init__.py +61 -0
- etlplus/{extract.py → ops/extract.py} +81 -99
- etlplus/{load.py → ops/load.py} +78 -101
- etlplus/{run.py → ops/run.py} +159 -127
- etlplus/{transform.py → ops/transform.py} +75 -68
- etlplus/{validation → ops}/utils.py +53 -17
- etlplus/{validate.py → ops/validate.py} +22 -12
- etlplus/templates/README.md +46 -0
- etlplus/types.py +5 -4
- etlplus/utils.py +136 -2
- etlplus/workflow/README.md +52 -0
- etlplus/{config → workflow}/__init__.py +10 -23
- etlplus/{config → workflow}/connector.py +58 -44
- etlplus/workflow/dag.py +105 -0
- etlplus/{config → workflow}/jobs.py +105 -32
- etlplus/{config → workflow}/pipeline.py +59 -51
- etlplus/{config → workflow}/profile.py +8 -5
- etlplus/workflow/types.py +115 -0
- {etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/METADATA +210 -17
- etlplus-0.9.2.dist-info/RECORD +134 -0
- {etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/WHEEL +1 -1
- etlplus/config/types.py +0 -204
- etlplus/config/utils.py +0 -120
- etlplus/file.py +0 -657
- etlplus/validation/__init__.py +0 -44
- etlplus-0.9.1.dist-info/RECORD +0 -65
- {etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/entry_points.txt +0 -0
- {etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/licenses/LICENSE +0 -0
- {etlplus-0.9.1.dist-info → etlplus-0.9.2.dist-info}/top_level.txt +0 -0
etlplus/README.md
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# `etlplus` Package
|
|
2
|
+
|
|
3
|
+
The `etlplus` package provides a unified Python API and CLI for ETL operations: extraction,
|
|
4
|
+
validation, transformation, and loading of data from files, APIs, and databases.
|
|
5
|
+
|
|
6
|
+
- Top-level entry points for extract, validate, transform, and load
|
|
7
|
+
- Utilities for pipeline orchestration and helpers
|
|
8
|
+
- Exposes all subpackages for advanced usage
|
|
9
|
+
|
|
10
|
+
Back to project overview: see the top-level [README](../README.md).
|
|
11
|
+
|
|
12
|
+
## Subpackages
|
|
13
|
+
|
|
14
|
+
- [etlplus.api](api/README.md): Lightweight HTTP client and paginated REST helpers
|
|
15
|
+
- [etlplus.file](file/README.md): Unified file format support and helpers
|
|
16
|
+
- [etlplus.cli](cli/README.md): Command-line interface definitions for `etlplus`
|
|
17
|
+
- [etlplus.database](database/README.md): Database engine, schema, and ORM helpers
|
|
18
|
+
- [etlplus.templates](templates/README.md): SQL and DDL template helpers
|
|
19
|
+
- [etlplus.validation](validation/README.md): Data validation utilities and helpers
|
|
20
|
+
- [etlplus.workflow](etlplus/workflow/README.md): Helpers for data connectors, pipelines, jobs, and
|
|
21
|
+
profiles
|
|
22
|
+
|
|
23
|
+
## Quickstart
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
from etlplus.ops import extract, validate, transform, load
|
|
27
|
+
|
|
28
|
+
data = extract("file", "input.csv")
|
|
29
|
+
filtered = transform(data, {"filter": {"field": "age", "op": "gt", "value": 25}})
|
|
30
|
+
assert validate(filtered, {"age": {"type": "number", "min": 0}})["valid"]
|
|
31
|
+
load(filtered, "file", "output.json", file_format="json")
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## See Also
|
|
35
|
+
|
|
36
|
+
- [Top-level project README](../README.md)
|
|
37
|
+
- [API reference](../docs/README.md)
|
etlplus/__init__.py
CHANGED
|
@@ -2,42 +2,17 @@
|
|
|
2
2
|
:mod:`etlplus` package.
|
|
3
3
|
|
|
4
4
|
Top-level facade for the ETLPlus toolkit.
|
|
5
|
-
|
|
6
|
-
Importing :mod:`etlplus` exposes the handful of coarse-grained helpers most
|
|
7
|
-
users care about: ``extract``, ``transform``, ``load``, ``validate``, and
|
|
8
|
-
``run``. Each helper delegates to the richer modules under ``etlplus.*`` while
|
|
9
|
-
presenting a compact public API surface.
|
|
10
|
-
|
|
11
|
-
Examples
|
|
12
|
-
--------
|
|
13
|
-
>>> from etlplus import extract, transform
|
|
14
|
-
>>> raw = extract('file', 'input.json')
|
|
15
|
-
>>> curated = transform(raw, {'select': ['id', 'name']})
|
|
16
|
-
|
|
17
|
-
See Also
|
|
18
|
-
--------
|
|
19
|
-
- :mod:`etlplus.cli` for the command-line interface
|
|
20
|
-
- :mod:`etlplus.run` for orchestrating pipeline jobs
|
|
21
5
|
"""
|
|
22
6
|
|
|
23
7
|
from .__version__ import __version__
|
|
24
8
|
|
|
25
9
|
__author__ = 'ETLPlus Team'
|
|
26
10
|
|
|
27
|
-
from .extract import extract
|
|
28
|
-
from .load import load
|
|
29
|
-
from .run import run
|
|
30
|
-
from .transform import transform
|
|
31
|
-
from .validate import validate
|
|
32
11
|
|
|
33
12
|
# SECTION: EXPORTS ========================================================== #
|
|
34
13
|
|
|
35
14
|
|
|
36
15
|
__all__ = [
|
|
16
|
+
'__author__',
|
|
37
17
|
'__version__',
|
|
38
|
-
'extract',
|
|
39
|
-
'load',
|
|
40
|
-
'run',
|
|
41
|
-
'transform',
|
|
42
|
-
'validate',
|
|
43
18
|
]
|
etlplus/api/README.md
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
# etlplus.api
|
|
1
|
+
# `etlplus.api` Subpackage
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
Documentation for the `etlplus.api` subpackage: a lightweight HTTP client and helpers for paginated
|
|
4
|
+
REST endpoints.
|
|
5
5
|
|
|
6
6
|
- Provides a small `EndpointClient` for calling JSON APIs
|
|
7
7
|
- Supports page-, offset-, and cursor-based pagination via `PaginationConfig`
|
|
@@ -12,6 +12,21 @@ paginated REST endpoints.
|
|
|
12
12
|
|
|
13
13
|
Back to project overview: see the top-level [README](../../README.md).
|
|
14
14
|
|
|
15
|
+
- [`etlplus.api` Subpackage](#etlplusapi-subpackage)
|
|
16
|
+
- [Installation](#installation)
|
|
17
|
+
- [Quickstart](#quickstart)
|
|
18
|
+
- [Overriding Rate Limits Per Call](#overriding-rate-limits-per-call)
|
|
19
|
+
- [Choosing `records_path` and `cursor_path`](#choosing-records_path-and-cursor_path)
|
|
20
|
+
- [Cursor-Based Pagination Example](#cursor-based-pagination-example)
|
|
21
|
+
- [Offset-based pagination example](#offset-based-pagination-example)
|
|
22
|
+
- [Authentication](#authentication)
|
|
23
|
+
- [Errors and Rate Limiting](#errors-and-rate-limiting)
|
|
24
|
+
- [Types and Transport](#types-and-transport)
|
|
25
|
+
- [Config Schemas](#config-schemas)
|
|
26
|
+
- [Supporting Modules](#supporting-modules)
|
|
27
|
+
- [Minimal Contract](#minimal-contract)
|
|
28
|
+
- [See also](#see-also)
|
|
29
|
+
|
|
15
30
|
## Installation
|
|
16
31
|
|
|
17
32
|
`etlplus.api` ships as part of the `etlplus` package. Install the package as usual:
|
|
@@ -211,6 +226,36 @@ providers can fall back to their own defaults. If you already possess a static t
|
|
|
211
226
|
`etlplus/api/request_manager.py` wraps `requests` sessions plus retry orchestration. Advanced
|
|
212
227
|
users may consult those modules to adapt behavior.
|
|
213
228
|
|
|
229
|
+
## Config Schemas
|
|
230
|
+
|
|
231
|
+
`etlplus.api.types` defines TypedDict-based configuration shapes for API profiles and endpoints.
|
|
232
|
+
Runtime parsing remains permissive in `etlplus.api.config`, but these types improve IDE
|
|
233
|
+
autocomplete and static analysis.
|
|
234
|
+
|
|
235
|
+
Exported types:
|
|
236
|
+
|
|
237
|
+
- `ApiConfigMap`: top-level API config shape
|
|
238
|
+
- `ApiProfileConfigMap`: per-profile API config shape
|
|
239
|
+
- `ApiProfileDefaultsMap`: defaults block within a profile
|
|
240
|
+
- `EndpointMap`: endpoint config shape
|
|
241
|
+
|
|
242
|
+
Example:
|
|
243
|
+
|
|
244
|
+
```python
|
|
245
|
+
from etlplus.api import ApiConfigMap
|
|
246
|
+
|
|
247
|
+
api_cfg: ApiConfigMap = {
|
|
248
|
+
"base_url": "https://example.test",
|
|
249
|
+
"headers": {"Authorization": "Bearer token"},
|
|
250
|
+
"endpoints": {
|
|
251
|
+
"users": {
|
|
252
|
+
"path": "/users",
|
|
253
|
+
"method": "GET",
|
|
254
|
+
},
|
|
255
|
+
},
|
|
256
|
+
}
|
|
257
|
+
```
|
|
258
|
+
|
|
214
259
|
## Supporting Modules
|
|
215
260
|
|
|
216
261
|
- `etlplus.api.types` collects friendly aliases such as `Headers`, `Params`, `Url`, and
|
|
@@ -233,3 +278,6 @@ providers can fall back to their own defaults. If you already possess a static t
|
|
|
233
278
|
## See also
|
|
234
279
|
|
|
235
280
|
- Top-level CLI and library usage in the main [README](../../README.md)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
[def]: #installation
|
etlplus/api/__init__.py
CHANGED
|
@@ -78,6 +78,7 @@ from .config import ApiConfig
|
|
|
78
78
|
from .config import ApiProfileConfig
|
|
79
79
|
from .config import EndpointConfig
|
|
80
80
|
from .endpoint_client import EndpointClient
|
|
81
|
+
from .enums import HttpMethod
|
|
81
82
|
from .pagination import CursorPaginationConfigMap
|
|
82
83
|
from .pagination import PagePaginationConfigMap
|
|
83
84
|
from .pagination import PaginationClient
|
|
@@ -98,6 +99,10 @@ from .types import Headers
|
|
|
98
99
|
from .types import Params
|
|
99
100
|
from .types import RequestOptions
|
|
100
101
|
from .types import Url
|
|
102
|
+
from .utils import compose_api_request_env
|
|
103
|
+
from .utils import compose_api_target_env
|
|
104
|
+
from .utils import paginate_with_client
|
|
105
|
+
from .utils import resolve_request
|
|
101
106
|
|
|
102
107
|
# SECTION: EXPORTS ========================================================== #
|
|
103
108
|
|
|
@@ -119,9 +124,14 @@ __all__ = [
|
|
|
119
124
|
'RequestOptions',
|
|
120
125
|
'RetryStrategy',
|
|
121
126
|
# Enums
|
|
127
|
+
'HttpMethod',
|
|
122
128
|
'PaginationType',
|
|
123
129
|
# Functions
|
|
124
130
|
'build_http_adapter',
|
|
131
|
+
'compose_api_request_env',
|
|
132
|
+
'compose_api_target_env',
|
|
133
|
+
'paginate_with_client',
|
|
134
|
+
'resolve_request',
|
|
125
135
|
# Type Aliases
|
|
126
136
|
'CursorPaginationConfigMap',
|
|
127
137
|
'Headers',
|
etlplus/api/config.py
CHANGED
|
@@ -3,11 +3,6 @@
|
|
|
3
3
|
|
|
4
4
|
Configuration dataclasses for REST API services, profiles, and endpoints.
|
|
5
5
|
|
|
6
|
-
These models used to live under :mod:`etlplus.config`, but they belong in the
|
|
7
|
-
API layer because they compose runtime types such as
|
|
8
|
-
:class:`etlplus.api.EndpointClient`, :class:`etlplus.api.PaginationConfig`, and
|
|
9
|
-
:class:`etlplus.api.RateLimitConfig`.
|
|
10
|
-
|
|
11
6
|
Notes
|
|
12
7
|
-----
|
|
13
8
|
- TypedDict references remain editor hints only; :meth:`from_obj` accepts
|
|
@@ -18,6 +13,7 @@ Notes
|
|
|
18
13
|
|
|
19
14
|
from __future__ import annotations
|
|
20
15
|
|
|
16
|
+
from collections.abc import Callable
|
|
21
17
|
from collections.abc import Mapping
|
|
22
18
|
from dataclasses import dataclass
|
|
23
19
|
from dataclasses import field
|
|
@@ -29,20 +25,20 @@ from typing import overload
|
|
|
29
25
|
from urllib.parse import urlsplit
|
|
30
26
|
from urllib.parse import urlunsplit
|
|
31
27
|
|
|
32
|
-
from ..enums import HttpMethod
|
|
33
28
|
from ..types import StrAnyMap
|
|
34
29
|
from ..types import StrStrMap
|
|
35
30
|
from ..utils import cast_str_dict
|
|
36
31
|
from ..utils import coerce_dict
|
|
37
32
|
from ..utils import maybe_mapping
|
|
38
33
|
from .endpoint_client import EndpointClient
|
|
34
|
+
from .enums import HttpMethod
|
|
39
35
|
from .pagination import PaginationConfig
|
|
40
36
|
from .rate_limiting import RateLimitConfig
|
|
41
37
|
|
|
42
38
|
if TYPE_CHECKING:
|
|
43
|
-
from
|
|
44
|
-
from
|
|
45
|
-
from
|
|
39
|
+
from .types import ApiConfigMap
|
|
40
|
+
from .types import ApiProfileConfigMap
|
|
41
|
+
from .types import EndpointMap
|
|
46
42
|
|
|
47
43
|
|
|
48
44
|
# SECTION: EXPORTS ========================================================== #
|
|
@@ -106,6 +102,33 @@ def _effective_service_defaults(
|
|
|
106
102
|
return fallback_base, fallback_headers
|
|
107
103
|
|
|
108
104
|
|
|
105
|
+
def _freeze_mapping(
|
|
106
|
+
mapping: Mapping[Any, Any],
|
|
107
|
+
*,
|
|
108
|
+
key_cast: Callable[[Any], Any] | None = None,
|
|
109
|
+
) -> MappingProxyType:
|
|
110
|
+
"""
|
|
111
|
+
Return an immutable copy of a mapping, optionally normalizing keys.
|
|
112
|
+
|
|
113
|
+
Parameters
|
|
114
|
+
----------
|
|
115
|
+
mapping : Mapping[Any, Any]
|
|
116
|
+
Source mapping to freeze.
|
|
117
|
+
key_cast : Callable[[Any], Any] | None, optional
|
|
118
|
+
Optional key coercion applied to each key.
|
|
119
|
+
|
|
120
|
+
Returns
|
|
121
|
+
-------
|
|
122
|
+
MappingProxyType
|
|
123
|
+
Read-only mapping proxy with normalized keys.
|
|
124
|
+
"""
|
|
125
|
+
if key_cast is None:
|
|
126
|
+
data = dict(mapping)
|
|
127
|
+
else:
|
|
128
|
+
data = {key_cast(key): value for key, value in mapping.items()}
|
|
129
|
+
return MappingProxyType(data)
|
|
130
|
+
|
|
131
|
+
|
|
109
132
|
def _normalize_method(
|
|
110
133
|
value: Any,
|
|
111
134
|
) -> Any | None:
|
|
@@ -232,16 +255,8 @@ class ApiProfileConfig:
|
|
|
232
255
|
# -- Magic Methods (Object Lifecycle) -- #
|
|
233
256
|
|
|
234
257
|
def __post_init__(self) -> None:
|
|
235
|
-
object.__setattr__(
|
|
236
|
-
|
|
237
|
-
'headers',
|
|
238
|
-
MappingProxyType(dict(self.headers)),
|
|
239
|
-
)
|
|
240
|
-
object.__setattr__(
|
|
241
|
-
self,
|
|
242
|
-
'auth',
|
|
243
|
-
MappingProxyType(dict(self.auth)),
|
|
244
|
-
)
|
|
258
|
+
object.__setattr__(self, 'headers', _freeze_mapping(self.headers))
|
|
259
|
+
object.__setattr__(self, 'auth', _freeze_mapping(self.auth))
|
|
245
260
|
|
|
246
261
|
# -- Class Methods -- #
|
|
247
262
|
|
|
@@ -340,20 +355,16 @@ class ApiConfig:
|
|
|
340
355
|
# -- Magic Methods (Object Lifecycle) -- #
|
|
341
356
|
|
|
342
357
|
def __post_init__(self) -> None:
|
|
343
|
-
object.__setattr__(
|
|
344
|
-
self,
|
|
345
|
-
'headers',
|
|
346
|
-
MappingProxyType(dict(self.headers)),
|
|
347
|
-
)
|
|
358
|
+
object.__setattr__(self, 'headers', _freeze_mapping(self.headers))
|
|
348
359
|
object.__setattr__(
|
|
349
360
|
self,
|
|
350
361
|
'endpoints',
|
|
351
|
-
|
|
362
|
+
_freeze_mapping(self.endpoints, key_cast=str),
|
|
352
363
|
)
|
|
353
364
|
object.__setattr__(
|
|
354
365
|
self,
|
|
355
366
|
'profiles',
|
|
356
|
-
|
|
367
|
+
_freeze_mapping(self.profiles, key_cast=str),
|
|
357
368
|
)
|
|
358
369
|
|
|
359
370
|
# -- Internal Instance Methods -- #
|
|
@@ -545,12 +556,12 @@ class EndpointConfig:
|
|
|
545
556
|
object.__setattr__(
|
|
546
557
|
self,
|
|
547
558
|
'path_params',
|
|
548
|
-
|
|
559
|
+
_freeze_mapping(self.path_params),
|
|
549
560
|
)
|
|
550
561
|
object.__setattr__(
|
|
551
562
|
self,
|
|
552
563
|
'query_params',
|
|
553
|
-
|
|
564
|
+
_freeze_mapping(self.query_params),
|
|
554
565
|
)
|
|
555
566
|
|
|
556
567
|
# -- Class Methods -- #
|
etlplus/api/endpoint_client.py
CHANGED
|
@@ -455,7 +455,7 @@ class EndpointClient:
|
|
|
455
455
|
-------
|
|
456
456
|
JSONData
|
|
457
457
|
Parsed JSON payload or fallback structure matching
|
|
458
|
-
:func:`etlplus.extract.extract_from_api` semantics.
|
|
458
|
+
:func:`etlplus.ops.extract.extract_from_api` semantics.
|
|
459
459
|
"""
|
|
460
460
|
return self._request_manager.get(url, **kwargs)
|
|
461
461
|
|
|
@@ -479,7 +479,7 @@ class EndpointClient:
|
|
|
479
479
|
-------
|
|
480
480
|
JSONData
|
|
481
481
|
Parsed JSON payload or fallback structure matching
|
|
482
|
-
:func:`etlplus.extract.extract_from_api` semantics.
|
|
482
|
+
:func:`etlplus.ops.extract.extract_from_api` semantics.
|
|
483
483
|
"""
|
|
484
484
|
return self._request_manager.post(url, **kwargs)
|
|
485
485
|
|
|
@@ -506,7 +506,7 @@ class EndpointClient:
|
|
|
506
506
|
-------
|
|
507
507
|
JSONData
|
|
508
508
|
Parsed JSON payload or fallback structure matching
|
|
509
|
-
:func:`etlplus.extract.extract_from_api` semantics.
|
|
509
|
+
:func:`etlplus.ops.extract.extract_from_api` semantics.
|
|
510
510
|
"""
|
|
511
511
|
return self._request_manager.request(method, url, **kwargs)
|
|
512
512
|
|
etlplus/api/enums.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.api.enums` module.
|
|
3
|
+
|
|
4
|
+
File-specific REST API-aligned enums and helpers.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from ..enums import CoercibleStrEnum
|
|
10
|
+
|
|
11
|
+
# SECTION: EXPORTS ========================================================= #
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
# Enums
|
|
16
|
+
'HttpMethod',
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# SECTION: ENUMS ============================================================ #
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class HttpMethod(CoercibleStrEnum):
|
|
24
|
+
"""Supported HTTP verbs that accept JSON payloads."""
|
|
25
|
+
|
|
26
|
+
# -- Constants -- #
|
|
27
|
+
|
|
28
|
+
CONNECT = 'connect'
|
|
29
|
+
DELETE = 'delete'
|
|
30
|
+
GET = 'get'
|
|
31
|
+
HEAD = 'head'
|
|
32
|
+
OPTIONS = 'options'
|
|
33
|
+
PATCH = 'patch'
|
|
34
|
+
POST = 'post'
|
|
35
|
+
PUT = 'put'
|
|
36
|
+
TRACE = 'trace'
|
|
37
|
+
|
|
38
|
+
# -- Getters -- #
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def allows_body(self) -> bool:
|
|
42
|
+
"""
|
|
43
|
+
Whether the method typically allows a request body.
|
|
44
|
+
|
|
45
|
+
Notes
|
|
46
|
+
-----
|
|
47
|
+
- RFCs do not strictly forbid bodies on some other methods (e.g.,
|
|
48
|
+
``DELETE``), but many servers/clients do not expect them. We mark
|
|
49
|
+
``POST``, ``PUT``, and ``PATCH`` as True.
|
|
50
|
+
"""
|
|
51
|
+
return self in {HttpMethod.POST, HttpMethod.PUT, HttpMethod.PATCH}
|
etlplus/api/pagination/client.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
:mod:`etlplus.api.rate_limiting.
|
|
2
|
+
:mod:`etlplus.api.rate_limiting.config` module.
|
|
3
3
|
|
|
4
4
|
Rate limiting configuration primitives.
|
|
5
5
|
|
|
@@ -268,6 +268,18 @@ class RateLimitConfig(BoundsWarningsMixin):
|
|
|
268
268
|
) -> Self:
|
|
269
269
|
"""
|
|
270
270
|
Normalize rate-limit config and overrides into a single instance.
|
|
271
|
+
|
|
272
|
+
Parameters
|
|
273
|
+
----------
|
|
274
|
+
rate_limit : StrAnyMap | RateLimitConfig | None, optional
|
|
275
|
+
Base rate-limit configuration to normalize.
|
|
276
|
+
overrides : RateLimitOverrides, optional
|
|
277
|
+
Override values that take precedence over ``rate_limit``.
|
|
278
|
+
|
|
279
|
+
Returns
|
|
280
|
+
-------
|
|
281
|
+
Self
|
|
282
|
+
Normalized rate-limit configuration.
|
|
271
283
|
"""
|
|
272
284
|
normalized = _coerce_rate_limit_map(rate_limit)
|
|
273
285
|
cfg = _merge_rate_limit(normalized, overrides)
|
|
@@ -20,6 +20,7 @@ from __future__ import annotations
|
|
|
20
20
|
|
|
21
21
|
import time
|
|
22
22
|
from dataclasses import dataclass
|
|
23
|
+
from typing import Self
|
|
23
24
|
|
|
24
25
|
from ...utils import to_float
|
|
25
26
|
from ...utils import to_positive_float
|
|
@@ -143,13 +144,13 @@ class RateLimiter:
|
|
|
143
144
|
# -- Class Methods -- #
|
|
144
145
|
|
|
145
146
|
@classmethod
|
|
146
|
-
def disabled(cls) ->
|
|
147
|
+
def disabled(cls) -> Self:
|
|
147
148
|
"""
|
|
148
149
|
Create a limiter that never sleeps.
|
|
149
150
|
|
|
150
151
|
Returns
|
|
151
152
|
-------
|
|
152
|
-
|
|
153
|
+
Self
|
|
153
154
|
Instance with rate limiting disabled.
|
|
154
155
|
"""
|
|
155
156
|
return cls(sleep_seconds=0.0)
|
|
@@ -158,7 +159,7 @@ class RateLimiter:
|
|
|
158
159
|
def fixed(
|
|
159
160
|
cls,
|
|
160
161
|
seconds: float,
|
|
161
|
-
) ->
|
|
162
|
+
) -> Self:
|
|
162
163
|
"""
|
|
163
164
|
Create a limiter with a fixed non-negative delay.
|
|
164
165
|
|
|
@@ -170,7 +171,7 @@ class RateLimiter:
|
|
|
170
171
|
|
|
171
172
|
Returns
|
|
172
173
|
-------
|
|
173
|
-
|
|
174
|
+
Self
|
|
174
175
|
Instance with the specified delay.
|
|
175
176
|
"""
|
|
176
177
|
value = to_float(seconds, 0.0, minimum=0.0) or 0.0
|
|
@@ -181,7 +182,7 @@ class RateLimiter:
|
|
|
181
182
|
def from_config(
|
|
182
183
|
cls,
|
|
183
184
|
cfg: RateLimitInput,
|
|
184
|
-
) ->
|
|
185
|
+
) -> Self:
|
|
185
186
|
"""
|
|
186
187
|
Build a :class:`RateLimiter` from a configuration mapping.
|
|
187
188
|
|
|
@@ -201,12 +202,10 @@ class RateLimiter:
|
|
|
201
202
|
|
|
202
203
|
Returns
|
|
203
204
|
-------
|
|
204
|
-
|
|
205
|
+
Self
|
|
205
206
|
Instance with normalized ``sleep_seconds`` and ``max_per_sec``.
|
|
206
207
|
"""
|
|
207
208
|
config = RateLimitConfig.from_inputs(rate_limit=cfg)
|
|
208
|
-
if config is None:
|
|
209
|
-
return cls.disabled()
|
|
210
209
|
|
|
211
210
|
# RateLimiter.__post_init__ will normalize and enforce invariants.
|
|
212
211
|
return cls(**config.as_mapping())
|
|
@@ -261,6 +260,4 @@ class RateLimiter:
|
|
|
261
260
|
rate_limit=rate_limit,
|
|
262
261
|
overrides=overrides,
|
|
263
262
|
)
|
|
264
|
-
|
|
265
|
-
return 0.0
|
|
266
|
-
return float(config.sleep_seconds)
|
|
263
|
+
return float(config.sleep_seconds) if config.sleep_seconds else 0.0
|
etlplus/api/request_manager.py
CHANGED
|
@@ -14,6 +14,7 @@ from collections.abc import Sequence
|
|
|
14
14
|
from dataclasses import dataclass
|
|
15
15
|
from dataclasses import field
|
|
16
16
|
from functools import partial
|
|
17
|
+
from types import TracebackType
|
|
17
18
|
from typing import Any
|
|
18
19
|
from typing import cast
|
|
19
20
|
|
|
@@ -137,7 +138,7 @@ class RequestManager:
|
|
|
137
138
|
self,
|
|
138
139
|
exc_type: type[BaseException] | None,
|
|
139
140
|
exc: BaseException | None,
|
|
140
|
-
tb:
|
|
141
|
+
tb: TracebackType | None,
|
|
141
142
|
) -> None:
|
|
142
143
|
"""
|
|
143
144
|
Exit the runtime context and close owned sessions.
|
|
@@ -148,7 +149,7 @@ class RequestManager:
|
|
|
148
149
|
Exception type if raised, else ``None``.
|
|
149
150
|
exc : BaseException | None
|
|
150
151
|
Exception instance if raised, else ``None``.
|
|
151
|
-
tb :
|
|
152
|
+
tb : TracebackType | None
|
|
152
153
|
Traceback if an exception was raised, else ``None``.
|
|
153
154
|
"""
|
|
154
155
|
if self._ctx_session is None:
|
|
@@ -275,7 +276,7 @@ class RequestManager:
|
|
|
275
276
|
|
|
276
277
|
try:
|
|
277
278
|
policy = self.retry
|
|
278
|
-
if
|
|
279
|
+
if policy is None:
|
|
279
280
|
try:
|
|
280
281
|
return fetch(url, **call_kwargs)
|
|
281
282
|
except requests.RequestException as exc: # pragma: no cover
|
|
@@ -438,9 +439,13 @@ class RequestManager:
|
|
|
438
439
|
if isinstance(payload, dict):
|
|
439
440
|
return cast(JSONDict, payload)
|
|
440
441
|
if isinstance(payload, list):
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
442
|
+
out: list[JSONDict] = []
|
|
443
|
+
for item in payload:
|
|
444
|
+
if isinstance(item, dict):
|
|
445
|
+
out.append(cast(JSONDict, item))
|
|
446
|
+
else:
|
|
447
|
+
out.append({'value': item})
|
|
448
|
+
return cast(JSONData, out)
|
|
444
449
|
return {'value': payload}
|
|
445
450
|
return {
|
|
446
451
|
'content': response.text,
|
etlplus/api/transport.py
CHANGED
|
@@ -191,7 +191,19 @@ def _build_retry_value(
|
|
|
191
191
|
def _normalize_retry_kwargs(
|
|
192
192
|
retries_cfg: Mapping[str, Any],
|
|
193
193
|
) -> dict[str, Any]:
|
|
194
|
-
"""
|
|
194
|
+
"""
|
|
195
|
+
Filter and normalize urllib3 ``Retry`` kwargs from a mapping.
|
|
196
|
+
|
|
197
|
+
Parameters
|
|
198
|
+
----------
|
|
199
|
+
retries_cfg : Mapping[str, Any]
|
|
200
|
+
Raw retry configuration mapping.
|
|
201
|
+
|
|
202
|
+
Returns
|
|
203
|
+
-------
|
|
204
|
+
dict[str, Any]
|
|
205
|
+
Filtered and normalized keyword arguments for ``Retry``.
|
|
206
|
+
"""
|
|
195
207
|
allowed_keys = {
|
|
196
208
|
'total',
|
|
197
209
|
'connect',
|
|
@@ -239,7 +251,7 @@ def _resolve_max_retries(
|
|
|
239
251
|
"""
|
|
240
252
|
match retries_cfg:
|
|
241
253
|
case int():
|
|
242
|
-
return retries_cfg
|
|
254
|
+
return to_maximum_int(retries_cfg, 0)
|
|
243
255
|
case Mapping():
|
|
244
256
|
try:
|
|
245
257
|
return _build_retry_value(retries_cfg)
|