etlplus 0.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/__init__.py +43 -0
- etlplus/__main__.py +22 -0
- etlplus/__version__.py +14 -0
- etlplus/api/README.md +237 -0
- etlplus/api/__init__.py +136 -0
- etlplus/api/auth.py +432 -0
- etlplus/api/config.py +633 -0
- etlplus/api/endpoint_client.py +885 -0
- etlplus/api/errors.py +170 -0
- etlplus/api/pagination/__init__.py +47 -0
- etlplus/api/pagination/client.py +188 -0
- etlplus/api/pagination/config.py +440 -0
- etlplus/api/pagination/paginator.py +775 -0
- etlplus/api/rate_limiting/__init__.py +38 -0
- etlplus/api/rate_limiting/config.py +343 -0
- etlplus/api/rate_limiting/rate_limiter.py +266 -0
- etlplus/api/request_manager.py +589 -0
- etlplus/api/retry_manager.py +430 -0
- etlplus/api/transport.py +325 -0
- etlplus/api/types.py +172 -0
- etlplus/cli/__init__.py +15 -0
- etlplus/cli/app.py +1367 -0
- etlplus/cli/handlers.py +775 -0
- etlplus/cli/main.py +616 -0
- etlplus/config/__init__.py +56 -0
- etlplus/config/connector.py +372 -0
- etlplus/config/jobs.py +311 -0
- etlplus/config/pipeline.py +339 -0
- etlplus/config/profile.py +78 -0
- etlplus/config/types.py +204 -0
- etlplus/config/utils.py +120 -0
- etlplus/ddl.py +197 -0
- etlplus/enums.py +414 -0
- etlplus/extract.py +218 -0
- etlplus/file.py +657 -0
- etlplus/load.py +336 -0
- etlplus/mixins.py +62 -0
- etlplus/py.typed +0 -0
- etlplus/run.py +368 -0
- etlplus/run_helpers.py +843 -0
- etlplus/templates/__init__.py +5 -0
- etlplus/templates/ddl.sql.j2 +128 -0
- etlplus/templates/view.sql.j2 +69 -0
- etlplus/transform.py +1049 -0
- etlplus/types.py +227 -0
- etlplus/utils.py +638 -0
- etlplus/validate.py +493 -0
- etlplus/validation/__init__.py +44 -0
- etlplus/validation/utils.py +389 -0
- etlplus-0.5.4.dist-info/METADATA +616 -0
- etlplus-0.5.4.dist-info/RECORD +55 -0
- etlplus-0.5.4.dist-info/WHEEL +5 -0
- etlplus-0.5.4.dist-info/entry_points.txt +2 -0
- etlplus-0.5.4.dist-info/licenses/LICENSE +21 -0
- etlplus-0.5.4.dist-info/top_level.txt +1 -0
etlplus/run_helpers.py
ADDED
|
@@ -0,0 +1,843 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.run_helpers` module.
|
|
3
|
+
|
|
4
|
+
Helper functions and small utilities used by ``etlplus.run`` to compose API
|
|
5
|
+
request/load environments, pagination configs, session objects, and endpoint
|
|
6
|
+
clients. Extracted to keep ``run.py`` focused on orchestration while enabling
|
|
7
|
+
reuse and testability.
|
|
8
|
+
|
|
9
|
+
Public (re-export safe) helpers:
|
|
10
|
+
- build_pagination_cfg(pagination, overrides)
|
|
11
|
+
- build_session(cfg)
|
|
12
|
+
- compose_api_request_env(cfg, source_obj, extract_opts)
|
|
13
|
+
- compose_api_target_env(cfg, target_obj, overrides)
|
|
14
|
+
- build_endpoint_client(base_url, base_path, endpoints, env)
|
|
15
|
+
- compute_rl_sleep_seconds(rate_limit, overrides)
|
|
16
|
+
- paginate_with_client(client, endpoint_key, params, headers,
|
|
17
|
+
timeout, pagination, sleep_seconds)
|
|
18
|
+
|
|
19
|
+
Notes
|
|
20
|
+
-----
|
|
21
|
+
These helpers intentionally accept permissive ``Any``/``Mapping`` inputs to
|
|
22
|
+
avoid tight coupling with config dataclasses while keeping runtime flexible.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import inspect
|
|
28
|
+
from collections.abc import Mapping
|
|
29
|
+
from typing import Any
|
|
30
|
+
from typing import TypedDict
|
|
31
|
+
from typing import cast
|
|
32
|
+
|
|
33
|
+
import requests # type: ignore[import]
|
|
34
|
+
|
|
35
|
+
from .api import ApiConfig
|
|
36
|
+
from .api import EndpointClient
|
|
37
|
+
from .api import EndpointConfig
|
|
38
|
+
from .api import Headers
|
|
39
|
+
from .api import PaginationConfig
|
|
40
|
+
from .api import PaginationConfigMap
|
|
41
|
+
from .api import Params
|
|
42
|
+
from .api import RateLimitConfig
|
|
43
|
+
from .api import RateLimitConfigMap
|
|
44
|
+
from .api import RateLimiter
|
|
45
|
+
from .api import RetryPolicy
|
|
46
|
+
from .api import Url
|
|
47
|
+
from .types import Timeout
|
|
48
|
+
|
|
49
|
+
# SECTION: EXPORTS ========================================================== #
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
__all__ = [
|
|
53
|
+
# Functions
|
|
54
|
+
'build_endpoint_client',
|
|
55
|
+
'build_pagination_cfg',
|
|
56
|
+
'build_session',
|
|
57
|
+
'compose_api_request_env',
|
|
58
|
+
'compose_api_target_env',
|
|
59
|
+
'compute_rl_sleep_seconds',
|
|
60
|
+
'paginate_with_client',
|
|
61
|
+
# Typed Dicts
|
|
62
|
+
'ApiRequestEnv',
|
|
63
|
+
'ApiTargetEnv',
|
|
64
|
+
'SessionConfig',
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# SECTION: TYPED DICTS ====================================================== #
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class ApiRequestEnv(TypedDict, total=False):
|
|
72
|
+
"""API request environment configuration."""
|
|
73
|
+
|
|
74
|
+
url: Url | None
|
|
75
|
+
headers: dict[str, str]
|
|
76
|
+
timeout: Timeout
|
|
77
|
+
session: requests.Session | None
|
|
78
|
+
use_endpoints: bool
|
|
79
|
+
base_url: str | None
|
|
80
|
+
base_path: str | None
|
|
81
|
+
endpoints_map: dict[str, str] | None
|
|
82
|
+
endpoint_key: str | None
|
|
83
|
+
params: dict[str, Any]
|
|
84
|
+
pagination: PaginationConfigMap | None
|
|
85
|
+
sleep_seconds: float
|
|
86
|
+
retry: RetryPolicy | None
|
|
87
|
+
retry_network_errors: bool
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class ApiTargetEnv(TypedDict, total=False):
|
|
91
|
+
"""API target environment configuration."""
|
|
92
|
+
|
|
93
|
+
url: Url | None
|
|
94
|
+
headers: dict[str, str]
|
|
95
|
+
timeout: Timeout
|
|
96
|
+
session: requests.Session | None
|
|
97
|
+
method: str | None
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class SessionConfig(TypedDict, total=False):
|
|
101
|
+
"""Configuration for requests.Session."""
|
|
102
|
+
|
|
103
|
+
headers: Mapping[str, Any]
|
|
104
|
+
params: Mapping[str, Any]
|
|
105
|
+
auth: Any
|
|
106
|
+
verify: bool | str
|
|
107
|
+
cert: Any
|
|
108
|
+
proxies: Mapping[str, Any]
|
|
109
|
+
cookies: Mapping[str, Any]
|
|
110
|
+
trust_env: bool
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
# SECTION: INTERNAL FUNCTIONS ============================================== #
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
# -- API Environment Composition -- #
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _get_api_cfg_and_endpoint(
|
|
120
|
+
cfg: Any,
|
|
121
|
+
api_name: str,
|
|
122
|
+
endpoint_name: str,
|
|
123
|
+
) -> tuple[ApiConfig, EndpointConfig]:
|
|
124
|
+
"""
|
|
125
|
+
Retrieve API configuration and endpoint configuration.
|
|
126
|
+
|
|
127
|
+
Parameters
|
|
128
|
+
----------
|
|
129
|
+
cfg : Any
|
|
130
|
+
The overall configuration object.
|
|
131
|
+
api_name : str
|
|
132
|
+
The name of the API to retrieve.
|
|
133
|
+
endpoint_name : str
|
|
134
|
+
The name of the endpoint to retrieve.
|
|
135
|
+
|
|
136
|
+
Returns
|
|
137
|
+
-------
|
|
138
|
+
tuple[ApiConfig, EndpointConfig]
|
|
139
|
+
The API configuration and endpoint configuration.
|
|
140
|
+
|
|
141
|
+
Raises
|
|
142
|
+
------
|
|
143
|
+
ValueError
|
|
144
|
+
If the API or endpoint is not defined.
|
|
145
|
+
"""
|
|
146
|
+
api_cfg = cfg.apis.get(api_name)
|
|
147
|
+
if not api_cfg:
|
|
148
|
+
raise ValueError(f'API not defined: {api_name}')
|
|
149
|
+
ep = api_cfg.endpoints.get(endpoint_name)
|
|
150
|
+
if not ep:
|
|
151
|
+
raise ValueError(
|
|
152
|
+
f'Endpoint "{endpoint_name}" not defined in API "{api_name}"',
|
|
153
|
+
)
|
|
154
|
+
return api_cfg, ep
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _inherit_http_from_api_endpoint(
|
|
158
|
+
api_cfg: ApiConfig,
|
|
159
|
+
ep: EndpointConfig,
|
|
160
|
+
url: Url | None,
|
|
161
|
+
headers: dict[str, str],
|
|
162
|
+
session_cfg: SessionConfig | None,
|
|
163
|
+
force_url: bool = False,
|
|
164
|
+
) -> tuple[Url | None, dict[str, str], SessionConfig | None]:
|
|
165
|
+
"""
|
|
166
|
+
Return HTTP settings inherited from API + endpoint definitions.
|
|
167
|
+
|
|
168
|
+
Parameters
|
|
169
|
+
----------
|
|
170
|
+
api_cfg : ApiConfig
|
|
171
|
+
API configuration.
|
|
172
|
+
ep : EndpointConfig
|
|
173
|
+
Endpoint configuration.
|
|
174
|
+
url : Url | None
|
|
175
|
+
Existing URL to use when not forcing endpoint URL.
|
|
176
|
+
headers : dict[str, str]
|
|
177
|
+
Existing headers to augment.
|
|
178
|
+
session_cfg : SessionConfig | None
|
|
179
|
+
Existing session configuration to augment.
|
|
180
|
+
force_url : bool, optional
|
|
181
|
+
Whether to always use the endpoint URL.
|
|
182
|
+
|
|
183
|
+
Returns
|
|
184
|
+
-------
|
|
185
|
+
tuple[Url | None, dict[str, str], SessionConfig | None]
|
|
186
|
+
Resolved URL, headers, and session configuration.
|
|
187
|
+
"""
|
|
188
|
+
if force_url or not url:
|
|
189
|
+
url = api_cfg.build_endpoint_url(ep)
|
|
190
|
+
headers = {**api_cfg.headers, **headers}
|
|
191
|
+
session_cfg = _merge_session_cfg_three(api_cfg, ep, session_cfg)
|
|
192
|
+
return url, headers, session_cfg
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _merge_session_cfg_three(
|
|
196
|
+
api_cfg: ApiConfig,
|
|
197
|
+
ep: EndpointConfig,
|
|
198
|
+
source_session_cfg: SessionConfig | None,
|
|
199
|
+
) -> SessionConfig | None:
|
|
200
|
+
"""
|
|
201
|
+
Merge session configurations from API, endpoint, and source.
|
|
202
|
+
|
|
203
|
+
Parameters
|
|
204
|
+
----------
|
|
205
|
+
api_cfg : ApiConfig
|
|
206
|
+
API configuration.
|
|
207
|
+
ep : EndpointConfig
|
|
208
|
+
Endpoint configuration.
|
|
209
|
+
source_session_cfg : SessionConfig | None
|
|
210
|
+
Source session configuration.
|
|
211
|
+
|
|
212
|
+
Returns
|
|
213
|
+
-------
|
|
214
|
+
SessionConfig | None
|
|
215
|
+
Merged session configuration.
|
|
216
|
+
"""
|
|
217
|
+
api_sess = getattr(api_cfg, 'session', None)
|
|
218
|
+
ep_sess = getattr(ep, 'session', None)
|
|
219
|
+
merged: dict[str, Any] = {}
|
|
220
|
+
if isinstance(api_sess, dict):
|
|
221
|
+
merged.update(api_sess)
|
|
222
|
+
if isinstance(ep_sess, dict):
|
|
223
|
+
merged.update(ep_sess)
|
|
224
|
+
if isinstance(source_session_cfg, dict):
|
|
225
|
+
merged.update(source_session_cfg)
|
|
226
|
+
return cast(SessionConfig | None, (merged or None))
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
# -- Mapping Helpers -- #
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def _copy_mapping(
|
|
233
|
+
mapping: Mapping[str, Any] | None,
|
|
234
|
+
) -> dict[str, Any]:
|
|
235
|
+
"""
|
|
236
|
+
Return a shallow copy of *mapping* or an empty dict.
|
|
237
|
+
|
|
238
|
+
Parameters
|
|
239
|
+
----------
|
|
240
|
+
mapping : Mapping[str, Any] | None
|
|
241
|
+
The mapping to copy.
|
|
242
|
+
|
|
243
|
+
Returns
|
|
244
|
+
-------
|
|
245
|
+
dict[str, Any]
|
|
246
|
+
A shallow copy of the mapping or an empty dict.
|
|
247
|
+
"""
|
|
248
|
+
return dict(mapping) if isinstance(mapping, Mapping) else {}
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def _update_mapping(
|
|
252
|
+
target: dict[str, Any],
|
|
253
|
+
extra: Mapping[str, Any] | None,
|
|
254
|
+
) -> None:
|
|
255
|
+
"""
|
|
256
|
+
Update *target* with *extra* when provided.
|
|
257
|
+
|
|
258
|
+
Parameters
|
|
259
|
+
----------
|
|
260
|
+
target : dict[str, Any]
|
|
261
|
+
The target mapping to update.
|
|
262
|
+
extra : Mapping[str, Any] | None
|
|
263
|
+
The extra mapping to update the target with.
|
|
264
|
+
"""
|
|
265
|
+
if isinstance(extra, Mapping):
|
|
266
|
+
target.update(extra)
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
# -- Session -- #
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def _build_session_optional(
|
|
273
|
+
cfg: SessionConfig | None,
|
|
274
|
+
) -> requests.Session | None:
|
|
275
|
+
"""
|
|
276
|
+
Return a configured session when *cfg* is a mapping.
|
|
277
|
+
|
|
278
|
+
Parameters
|
|
279
|
+
----------
|
|
280
|
+
cfg : SessionConfig | None
|
|
281
|
+
Session configuration mapping.
|
|
282
|
+
|
|
283
|
+
Returns
|
|
284
|
+
-------
|
|
285
|
+
requests.Session | None
|
|
286
|
+
Configured session or ``None``.
|
|
287
|
+
"""
|
|
288
|
+
|
|
289
|
+
if isinstance(cfg, dict):
|
|
290
|
+
return build_session(cfg)
|
|
291
|
+
return None
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
# SECTION: FUNCTIONS ======================================================== #
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
# -- API Environment Composition -- #
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def build_endpoint_client(
|
|
301
|
+
*,
|
|
302
|
+
base_url: str,
|
|
303
|
+
base_path: str | None,
|
|
304
|
+
endpoints: dict[str, str],
|
|
305
|
+
env: Mapping[str, Any],
|
|
306
|
+
) -> EndpointClient:
|
|
307
|
+
"""
|
|
308
|
+
Build an endpoint client for the specified API environment.
|
|
309
|
+
|
|
310
|
+
Parameters
|
|
311
|
+
----------
|
|
312
|
+
base_url : str
|
|
313
|
+
The base URL for the API.
|
|
314
|
+
base_path : str | None
|
|
315
|
+
The base path for the API.
|
|
316
|
+
endpoints : dict[str, str]
|
|
317
|
+
A mapping of endpoint names to their paths.
|
|
318
|
+
env : Mapping[str, Any]
|
|
319
|
+
Environment variables and configuration options.
|
|
320
|
+
|
|
321
|
+
Returns
|
|
322
|
+
-------
|
|
323
|
+
EndpointClient
|
|
324
|
+
The constructed endpoint client.
|
|
325
|
+
"""
|
|
326
|
+
# Allow tests to monkeypatch etlplus.run.EndpointClient and have it
|
|
327
|
+
# propagate here by preferring the class on the run module if present.
|
|
328
|
+
try:
|
|
329
|
+
from . import run as run_mod # local import to avoid cycles
|
|
330
|
+
|
|
331
|
+
ClientClass = getattr(run_mod, 'EndpointClient', EndpointClient)
|
|
332
|
+
except (ImportError, AttributeError): # pragma: no cover - fallback path
|
|
333
|
+
ClientClass = EndpointClient
|
|
334
|
+
return ClientClass(
|
|
335
|
+
base_url=base_url,
|
|
336
|
+
base_path=base_path,
|
|
337
|
+
endpoints=endpoints,
|
|
338
|
+
retry=env.get('retry'),
|
|
339
|
+
retry_network_errors=bool(env.get('retry_network_errors', False)),
|
|
340
|
+
session=env.get('session'),
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def compose_api_request_env(
|
|
345
|
+
cfg: Any,
|
|
346
|
+
source_obj: Any,
|
|
347
|
+
ex_opts: Mapping[str, Any] | None,
|
|
348
|
+
) -> ApiRequestEnv:
|
|
349
|
+
"""
|
|
350
|
+
Compose the API request environment.
|
|
351
|
+
|
|
352
|
+
Parameters
|
|
353
|
+
----------
|
|
354
|
+
cfg : Any
|
|
355
|
+
The API configuration.
|
|
356
|
+
source_obj : Any
|
|
357
|
+
The source object for the API request.
|
|
358
|
+
ex_opts : Mapping[str, Any] | None
|
|
359
|
+
The external options for the API request.
|
|
360
|
+
|
|
361
|
+
Returns
|
|
362
|
+
-------
|
|
363
|
+
ApiRequestEnv
|
|
364
|
+
The composed API request environment.
|
|
365
|
+
"""
|
|
366
|
+
ex_opts = ex_opts or {}
|
|
367
|
+
url: Url | None = getattr(source_obj, 'url', None)
|
|
368
|
+
source_params = cast(
|
|
369
|
+
Mapping[str, Any] | None,
|
|
370
|
+
getattr(source_obj, 'query_params', None),
|
|
371
|
+
)
|
|
372
|
+
params: dict[str, Any] = _copy_mapping(source_params)
|
|
373
|
+
source_headers = cast(
|
|
374
|
+
Mapping[str, str] | None,
|
|
375
|
+
getattr(source_obj, 'headers', None),
|
|
376
|
+
)
|
|
377
|
+
headers: dict[str, str] = _copy_mapping(source_headers)
|
|
378
|
+
pagination = getattr(source_obj, 'pagination', None)
|
|
379
|
+
rate_limit = getattr(source_obj, 'rate_limit', None)
|
|
380
|
+
retry: RetryPolicy | None = cast(
|
|
381
|
+
RetryPolicy | None,
|
|
382
|
+
getattr(source_obj, 'retry', None),
|
|
383
|
+
)
|
|
384
|
+
retry_network_errors = bool(
|
|
385
|
+
getattr(source_obj, 'retry_network_errors', False),
|
|
386
|
+
)
|
|
387
|
+
session_cfg = cast(
|
|
388
|
+
SessionConfig | None,
|
|
389
|
+
getattr(source_obj, 'session', None),
|
|
390
|
+
)
|
|
391
|
+
api_name = getattr(source_obj, 'api', None)
|
|
392
|
+
endpoint_name = getattr(source_obj, 'endpoint', None)
|
|
393
|
+
use_client_endpoints = False
|
|
394
|
+
client_base_url: str | None = None
|
|
395
|
+
client_base_path: str | None = None
|
|
396
|
+
client_endpoints_map: dict[str, str] | None = None
|
|
397
|
+
selected_endpoint_key: str | None = None
|
|
398
|
+
if api_name and endpoint_name:
|
|
399
|
+
api_cfg, ep = _get_api_cfg_and_endpoint(cfg, api_name, endpoint_name)
|
|
400
|
+
url, headers, session_cfg = _inherit_http_from_api_endpoint(
|
|
401
|
+
api_cfg,
|
|
402
|
+
ep,
|
|
403
|
+
url,
|
|
404
|
+
headers,
|
|
405
|
+
session_cfg,
|
|
406
|
+
force_url=True,
|
|
407
|
+
)
|
|
408
|
+
ep_params: dict[str, Any] = _copy_mapping(
|
|
409
|
+
cast(Mapping[str, Any] | None, getattr(ep, 'query_params', None)),
|
|
410
|
+
)
|
|
411
|
+
_update_mapping(ep_params, params)
|
|
412
|
+
params = ep_params
|
|
413
|
+
pagination = (
|
|
414
|
+
pagination
|
|
415
|
+
or ep.pagination
|
|
416
|
+
or api_cfg.effective_pagination_defaults()
|
|
417
|
+
)
|
|
418
|
+
rate_limit = (
|
|
419
|
+
rate_limit
|
|
420
|
+
or ep.rate_limit
|
|
421
|
+
or api_cfg.effective_rate_limit_defaults()
|
|
422
|
+
)
|
|
423
|
+
retry = cast(
|
|
424
|
+
RetryPolicy | None,
|
|
425
|
+
(
|
|
426
|
+
retry
|
|
427
|
+
or getattr(ep, 'retry', None)
|
|
428
|
+
or getattr(api_cfg, 'retry', None)
|
|
429
|
+
),
|
|
430
|
+
)
|
|
431
|
+
retry_network_errors = (
|
|
432
|
+
retry_network_errors
|
|
433
|
+
or bool(getattr(ep, 'retry_network_errors', False))
|
|
434
|
+
or bool(getattr(api_cfg, 'retry_network_errors', False))
|
|
435
|
+
)
|
|
436
|
+
use_client_endpoints = True
|
|
437
|
+
client_base_url = api_cfg.base_url
|
|
438
|
+
client_base_path = api_cfg.effective_base_path()
|
|
439
|
+
client_endpoints_map = {
|
|
440
|
+
k: v.path for k, v in api_cfg.endpoints.items()
|
|
441
|
+
}
|
|
442
|
+
selected_endpoint_key = endpoint_name
|
|
443
|
+
_update_mapping(
|
|
444
|
+
params,
|
|
445
|
+
cast(Mapping[str, Any] | None, ex_opts.get('query_params')),
|
|
446
|
+
)
|
|
447
|
+
_update_mapping(
|
|
448
|
+
headers,
|
|
449
|
+
cast(Mapping[str, str] | None, ex_opts.get('headers')),
|
|
450
|
+
)
|
|
451
|
+
timeout: Timeout = ex_opts.get('timeout')
|
|
452
|
+
pag_ov = ex_opts.get('pagination', {})
|
|
453
|
+
rl_ov = ex_opts.get('rate_limit', {})
|
|
454
|
+
rty_ov: RetryPolicy | None = cast(
|
|
455
|
+
RetryPolicy | None,
|
|
456
|
+
(ex_opts.get('retry') if 'retry' in ex_opts else None),
|
|
457
|
+
)
|
|
458
|
+
rne_ov = (
|
|
459
|
+
ex_opts.get('retry_network_errors')
|
|
460
|
+
if 'retry_network_errors' in ex_opts
|
|
461
|
+
else None
|
|
462
|
+
)
|
|
463
|
+
sess_ov = cast(SessionConfig | None, ex_opts.get('session'))
|
|
464
|
+
sleep_s = compute_rl_sleep_seconds(rate_limit, rl_ov) or 0.0
|
|
465
|
+
if rty_ov is not None:
|
|
466
|
+
retry = rty_ov
|
|
467
|
+
if rne_ov is not None:
|
|
468
|
+
retry_network_errors = bool(rne_ov)
|
|
469
|
+
if isinstance(sess_ov, dict):
|
|
470
|
+
base_cfg: dict[str, Any] = dict(cast(dict, session_cfg or {}))
|
|
471
|
+
base_cfg.update(sess_ov)
|
|
472
|
+
session_cfg = cast(SessionConfig, base_cfg)
|
|
473
|
+
pag_cfg: PaginationConfigMap | None = build_pagination_cfg(
|
|
474
|
+
pagination,
|
|
475
|
+
pag_ov,
|
|
476
|
+
)
|
|
477
|
+
sess_obj = _build_session_optional(session_cfg)
|
|
478
|
+
return {
|
|
479
|
+
'use_endpoints': use_client_endpoints,
|
|
480
|
+
'base_url': client_base_url,
|
|
481
|
+
'base_path': client_base_path,
|
|
482
|
+
'endpoints_map': client_endpoints_map,
|
|
483
|
+
'endpoint_key': selected_endpoint_key,
|
|
484
|
+
'url': url,
|
|
485
|
+
'params': params,
|
|
486
|
+
'headers': headers,
|
|
487
|
+
'timeout': timeout,
|
|
488
|
+
'pagination': pag_cfg,
|
|
489
|
+
'sleep_seconds': sleep_s,
|
|
490
|
+
'retry': retry,
|
|
491
|
+
'retry_network_errors': retry_network_errors,
|
|
492
|
+
'session': sess_obj,
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
def compose_api_target_env(
|
|
497
|
+
cfg: Any,
|
|
498
|
+
target_obj: Any,
|
|
499
|
+
overrides: Mapping[str, Any] | None,
|
|
500
|
+
) -> ApiTargetEnv:
|
|
501
|
+
"""
|
|
502
|
+
Compose the API target environment.
|
|
503
|
+
|
|
504
|
+
Parameters
|
|
505
|
+
----------
|
|
506
|
+
cfg : Any
|
|
507
|
+
API configuration.
|
|
508
|
+
target_obj : Any
|
|
509
|
+
Target object for the API call.
|
|
510
|
+
overrides : Mapping[str, Any] | None
|
|
511
|
+
Override configuration options.
|
|
512
|
+
|
|
513
|
+
Returns
|
|
514
|
+
-------
|
|
515
|
+
ApiTargetEnv
|
|
516
|
+
Composed API target environment.
|
|
517
|
+
"""
|
|
518
|
+
ov = overrides or {}
|
|
519
|
+
url: Url | None = cast(
|
|
520
|
+
Url | None,
|
|
521
|
+
ov.get('url') or getattr(target_obj, 'url', None),
|
|
522
|
+
)
|
|
523
|
+
method: str | None = cast(
|
|
524
|
+
str | None,
|
|
525
|
+
ov.get('method') or getattr(target_obj, 'method', 'post'),
|
|
526
|
+
)
|
|
527
|
+
headers = _copy_mapping(
|
|
528
|
+
cast(Mapping[str, str] | None, getattr(target_obj, 'headers', None)),
|
|
529
|
+
)
|
|
530
|
+
_update_mapping(headers, cast(Mapping[str, str] | None, ov.get('headers')))
|
|
531
|
+
timeout: Timeout = (
|
|
532
|
+
cast(Timeout, ov.get('timeout')) if 'timeout' in ov else None
|
|
533
|
+
)
|
|
534
|
+
sess_cfg: SessionConfig | None = cast(
|
|
535
|
+
SessionConfig | None,
|
|
536
|
+
ov.get('session'),
|
|
537
|
+
)
|
|
538
|
+
api_name = getattr(target_obj, 'api', None)
|
|
539
|
+
endpoint_name = getattr(target_obj, 'endpoint', None)
|
|
540
|
+
if api_name and endpoint_name and not url:
|
|
541
|
+
api_cfg, ep = _get_api_cfg_and_endpoint(cfg, api_name, endpoint_name)
|
|
542
|
+
url, headers, sess_cfg = _inherit_http_from_api_endpoint(
|
|
543
|
+
api_cfg,
|
|
544
|
+
ep,
|
|
545
|
+
url,
|
|
546
|
+
headers,
|
|
547
|
+
sess_cfg,
|
|
548
|
+
force_url=False,
|
|
549
|
+
)
|
|
550
|
+
sess_obj = _build_session_optional(sess_cfg)
|
|
551
|
+
|
|
552
|
+
return {
|
|
553
|
+
'url': url,
|
|
554
|
+
'method': method,
|
|
555
|
+
'headers': headers,
|
|
556
|
+
'timeout': timeout,
|
|
557
|
+
'session': sess_obj,
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
# -- Pagination -- #
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
def build_pagination_cfg(
|
|
565
|
+
pagination: PaginationConfig | None,
|
|
566
|
+
overrides: Mapping[str, Any] | None,
|
|
567
|
+
) -> PaginationConfigMap | None:
|
|
568
|
+
"""
|
|
569
|
+
Build pagination configuration.
|
|
570
|
+
|
|
571
|
+
Parameters
|
|
572
|
+
----------
|
|
573
|
+
pagination : PaginationConfig | None
|
|
574
|
+
Pagination configuration.
|
|
575
|
+
overrides : Mapping[str, Any] | None
|
|
576
|
+
Override configuration options.
|
|
577
|
+
|
|
578
|
+
Returns
|
|
579
|
+
-------
|
|
580
|
+
PaginationConfigMap | None
|
|
581
|
+
Pagination configuration.
|
|
582
|
+
"""
|
|
583
|
+
ptype: str | None = None
|
|
584
|
+
records_path = None
|
|
585
|
+
max_pages = None
|
|
586
|
+
max_records = None
|
|
587
|
+
if pagination:
|
|
588
|
+
ptype = (getattr(pagination, 'type', '') or '').strip().lower()
|
|
589
|
+
records_path = getattr(pagination, 'records_path', None)
|
|
590
|
+
max_pages = getattr(pagination, 'max_pages', None)
|
|
591
|
+
max_records = getattr(pagination, 'max_records', None)
|
|
592
|
+
if overrides:
|
|
593
|
+
ptype = (overrides.get('type') or ptype or '').strip().lower()
|
|
594
|
+
records_path = overrides.get('records_path', records_path)
|
|
595
|
+
max_pages = overrides.get('max_pages', max_pages)
|
|
596
|
+
max_records = overrides.get('max_records', max_records)
|
|
597
|
+
if not ptype:
|
|
598
|
+
return None
|
|
599
|
+
cfg: dict[str, Any] = {
|
|
600
|
+
'type': ptype,
|
|
601
|
+
'records_path': records_path,
|
|
602
|
+
'max_pages': max_pages,
|
|
603
|
+
'max_records': max_records,
|
|
604
|
+
}
|
|
605
|
+
match ptype:
|
|
606
|
+
case 'page' | 'offset':
|
|
607
|
+
page_param = overrides.get('page_param') if overrides else None
|
|
608
|
+
size_param = overrides.get('size_param') if overrides else None
|
|
609
|
+
start_page = overrides.get('start_page') if overrides else None
|
|
610
|
+
page_size = overrides.get('page_size') if overrides else None
|
|
611
|
+
if pagination:
|
|
612
|
+
page_param = (
|
|
613
|
+
page_param
|
|
614
|
+
or getattr(pagination, 'page_param', None)
|
|
615
|
+
or 'page'
|
|
616
|
+
)
|
|
617
|
+
size_param = (
|
|
618
|
+
size_param
|
|
619
|
+
or getattr(pagination, 'size_param', None)
|
|
620
|
+
or 'per_page'
|
|
621
|
+
)
|
|
622
|
+
start_page = (
|
|
623
|
+
start_page or getattr(pagination, 'start_page', None) or 1
|
|
624
|
+
)
|
|
625
|
+
page_size = (
|
|
626
|
+
page_size or getattr(pagination, 'page_size', None) or 100
|
|
627
|
+
)
|
|
628
|
+
cfg.update(
|
|
629
|
+
{
|
|
630
|
+
'page_param': str(page_param or 'page'),
|
|
631
|
+
'size_param': str(size_param or 'per_page'),
|
|
632
|
+
'start_page': int(start_page or 1),
|
|
633
|
+
'page_size': int(page_size or 100),
|
|
634
|
+
},
|
|
635
|
+
)
|
|
636
|
+
case 'cursor':
|
|
637
|
+
cursor_param = overrides.get('cursor_param') if overrides else None
|
|
638
|
+
cursor_path = overrides.get('cursor_path') if overrides else None
|
|
639
|
+
page_size = overrides.get('page_size') if overrides else None
|
|
640
|
+
start_cursor = None
|
|
641
|
+
if pagination:
|
|
642
|
+
cursor_param = (
|
|
643
|
+
cursor_param
|
|
644
|
+
or getattr(pagination, 'cursor_param', None)
|
|
645
|
+
or 'cursor'
|
|
646
|
+
)
|
|
647
|
+
cursor_path = cursor_path or getattr(
|
|
648
|
+
pagination,
|
|
649
|
+
'cursor_path',
|
|
650
|
+
None,
|
|
651
|
+
)
|
|
652
|
+
page_size = (
|
|
653
|
+
page_size or getattr(pagination, 'page_size', None) or 100
|
|
654
|
+
)
|
|
655
|
+
start_cursor = getattr(pagination, 'start_cursor', None)
|
|
656
|
+
cfg.update(
|
|
657
|
+
{
|
|
658
|
+
'cursor_param': str(cursor_param or 'cursor'),
|
|
659
|
+
'cursor_path': cursor_path,
|
|
660
|
+
'page_size': int(page_size or 100),
|
|
661
|
+
'start_cursor': start_cursor,
|
|
662
|
+
},
|
|
663
|
+
)
|
|
664
|
+
case _:
|
|
665
|
+
pass
|
|
666
|
+
|
|
667
|
+
return cast(PaginationConfigMap, cfg)
|
|
668
|
+
|
|
669
|
+
|
|
670
|
+
# -- Pagination Invocation -- #
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
def paginate_with_client(
|
|
674
|
+
client: Any,
|
|
675
|
+
endpoint_key: str,
|
|
676
|
+
params: Params | None,
|
|
677
|
+
headers: Headers | None,
|
|
678
|
+
timeout: Timeout,
|
|
679
|
+
pagination: PaginationConfigMap | None,
|
|
680
|
+
sleep_seconds: float | None,
|
|
681
|
+
) -> Any:
|
|
682
|
+
"""
|
|
683
|
+
Paginate using the given client.
|
|
684
|
+
|
|
685
|
+
Parameters
|
|
686
|
+
----------
|
|
687
|
+
client : Any
|
|
688
|
+
The endpoint client.
|
|
689
|
+
endpoint_key : str
|
|
690
|
+
The key for the API endpoint.
|
|
691
|
+
params : Params | None
|
|
692
|
+
Query parameters for the API request.
|
|
693
|
+
headers : Headers | None
|
|
694
|
+
Headers to include in the API request.
|
|
695
|
+
timeout : Timeout
|
|
696
|
+
Timeout configuration for the API request.
|
|
697
|
+
pagination : PaginationConfigMap | None
|
|
698
|
+
Pagination configuration for the API request.
|
|
699
|
+
sleep_seconds : float | None
|
|
700
|
+
Sleep duration between API requests.
|
|
701
|
+
|
|
702
|
+
Returns
|
|
703
|
+
-------
|
|
704
|
+
Any
|
|
705
|
+
Paginated results from the API.
|
|
706
|
+
"""
|
|
707
|
+
sig = inspect.signature(client.paginate) # type: ignore[arg-type]
|
|
708
|
+
kw_pag: dict[str, Any] = {'pagination': pagination}
|
|
709
|
+
if '_params' in sig.parameters:
|
|
710
|
+
kw_pag['_params'] = params
|
|
711
|
+
else:
|
|
712
|
+
kw_pag['params'] = params
|
|
713
|
+
if '_headers' in sig.parameters:
|
|
714
|
+
kw_pag['_headers'] = headers
|
|
715
|
+
else:
|
|
716
|
+
kw_pag['headers'] = headers
|
|
717
|
+
if '_timeout' in sig.parameters:
|
|
718
|
+
kw_pag['_timeout'] = timeout
|
|
719
|
+
else:
|
|
720
|
+
kw_pag['timeout'] = timeout
|
|
721
|
+
eff_sleep = 0.0 if sleep_seconds is None else sleep_seconds
|
|
722
|
+
if '_sleep_seconds' in sig.parameters:
|
|
723
|
+
kw_pag['_sleep_seconds'] = eff_sleep
|
|
724
|
+
else:
|
|
725
|
+
kw_pag['sleep_seconds'] = eff_sleep
|
|
726
|
+
|
|
727
|
+
return client.paginate(endpoint_key, **kw_pag)
|
|
728
|
+
|
|
729
|
+
|
|
730
|
+
# -- Rate Limit -- #
|
|
731
|
+
|
|
732
|
+
|
|
733
|
+
def compute_rl_sleep_seconds(
|
|
734
|
+
rate_limit: RateLimitConfig | Mapping[str, Any] | None,
|
|
735
|
+
overrides: Mapping[str, Any] | None,
|
|
736
|
+
) -> float:
|
|
737
|
+
"""
|
|
738
|
+
Compute sleep seconds from rate limit configuration and overrides.
|
|
739
|
+
|
|
740
|
+
Parameters
|
|
741
|
+
----------
|
|
742
|
+
rate_limit : RateLimitConfig | Mapping[str, Any] | None
|
|
743
|
+
Rate limit configuration.
|
|
744
|
+
overrides : Mapping[str, Any] | None
|
|
745
|
+
Override values for rate limit configuration.
|
|
746
|
+
|
|
747
|
+
Returns
|
|
748
|
+
-------
|
|
749
|
+
float
|
|
750
|
+
Sleep duration in seconds (0.0 when disabled).
|
|
751
|
+
"""
|
|
752
|
+
rl_map: Mapping[str, Any] | None
|
|
753
|
+
if rate_limit and hasattr(rate_limit, 'sleep_seconds'):
|
|
754
|
+
rl_map = {
|
|
755
|
+
'sleep_seconds': getattr(rate_limit, 'sleep_seconds', None),
|
|
756
|
+
'max_per_sec': getattr(rate_limit, 'max_per_sec', None),
|
|
757
|
+
}
|
|
758
|
+
else:
|
|
759
|
+
rl_map = cast(Mapping[str, Any] | None, rate_limit)
|
|
760
|
+
|
|
761
|
+
rl_mapping = cast(RateLimitConfigMap | None, rl_map)
|
|
762
|
+
|
|
763
|
+
typed_override: RateLimitConfigMap | None = None
|
|
764
|
+
if overrides:
|
|
765
|
+
filtered: dict[str, float | None] = {}
|
|
766
|
+
if 'sleep_seconds' in overrides:
|
|
767
|
+
filtered['sleep_seconds'] = cast(
|
|
768
|
+
float | None,
|
|
769
|
+
overrides.get('sleep_seconds'),
|
|
770
|
+
)
|
|
771
|
+
if 'max_per_sec' in overrides:
|
|
772
|
+
filtered['max_per_sec'] = cast(
|
|
773
|
+
float | None,
|
|
774
|
+
overrides.get('max_per_sec'),
|
|
775
|
+
)
|
|
776
|
+
if filtered:
|
|
777
|
+
typed_override = cast(RateLimitConfigMap, filtered)
|
|
778
|
+
|
|
779
|
+
return RateLimiter.resolve_sleep_seconds(
|
|
780
|
+
rate_limit=rl_mapping,
|
|
781
|
+
overrides=typed_override,
|
|
782
|
+
)
|
|
783
|
+
|
|
784
|
+
|
|
785
|
+
# -- Session -- #
|
|
786
|
+
|
|
787
|
+
|
|
788
|
+
def build_session(
|
|
789
|
+
cfg: SessionConfig | None,
|
|
790
|
+
) -> requests.Session:
|
|
791
|
+
"""
|
|
792
|
+
Build a requests.Session object with the given configuration.
|
|
793
|
+
|
|
794
|
+
Parameters
|
|
795
|
+
----------
|
|
796
|
+
cfg : SessionConfig | None
|
|
797
|
+
Session configuration.
|
|
798
|
+
|
|
799
|
+
Returns
|
|
800
|
+
-------
|
|
801
|
+
requests.Session
|
|
802
|
+
Configured session object.
|
|
803
|
+
"""
|
|
804
|
+
s = requests.Session()
|
|
805
|
+
if not cfg:
|
|
806
|
+
return s
|
|
807
|
+
headers = cfg.get('headers')
|
|
808
|
+
if isinstance(headers, dict):
|
|
809
|
+
s.headers.update(headers)
|
|
810
|
+
params = cfg.get('params')
|
|
811
|
+
if isinstance(params, dict):
|
|
812
|
+
try:
|
|
813
|
+
s.params = params
|
|
814
|
+
except (AttributeError, TypeError):
|
|
815
|
+
pass
|
|
816
|
+
auth = cfg.get('auth')
|
|
817
|
+
if auth is not None:
|
|
818
|
+
if isinstance(auth, (list, tuple)) and len(auth) == 2:
|
|
819
|
+
s.auth = (auth[0], auth[1]) # type: ignore[assignment]
|
|
820
|
+
else:
|
|
821
|
+
s.auth = auth # type: ignore[assignment]
|
|
822
|
+
if 'verify' in cfg:
|
|
823
|
+
s.verify = cfg.get('verify') # type: ignore[assignment]
|
|
824
|
+
cert = cfg.get('cert')
|
|
825
|
+
if cert is not None:
|
|
826
|
+
s.cert = cert # type: ignore[assignment]
|
|
827
|
+
proxies = cfg.get('proxies')
|
|
828
|
+
if isinstance(proxies, dict):
|
|
829
|
+
s.proxies.update(proxies)
|
|
830
|
+
cookies = cfg.get('cookies')
|
|
831
|
+
if isinstance(cookies, dict):
|
|
832
|
+
try:
|
|
833
|
+
s.cookies.update(cookies)
|
|
834
|
+
except (TypeError, ValueError):
|
|
835
|
+
pass
|
|
836
|
+
if 'trust_env' in cfg:
|
|
837
|
+
try:
|
|
838
|
+
# type: ignore[attr-defined]
|
|
839
|
+
s.trust_env = bool(cfg.get('trust_env'))
|
|
840
|
+
except AttributeError:
|
|
841
|
+
pass
|
|
842
|
+
|
|
843
|
+
return s
|