etlplus 0.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. etlplus/__init__.py +43 -0
  2. etlplus/__main__.py +22 -0
  3. etlplus/__version__.py +14 -0
  4. etlplus/api/README.md +237 -0
  5. etlplus/api/__init__.py +136 -0
  6. etlplus/api/auth.py +432 -0
  7. etlplus/api/config.py +633 -0
  8. etlplus/api/endpoint_client.py +885 -0
  9. etlplus/api/errors.py +170 -0
  10. etlplus/api/pagination/__init__.py +47 -0
  11. etlplus/api/pagination/client.py +188 -0
  12. etlplus/api/pagination/config.py +440 -0
  13. etlplus/api/pagination/paginator.py +775 -0
  14. etlplus/api/rate_limiting/__init__.py +38 -0
  15. etlplus/api/rate_limiting/config.py +343 -0
  16. etlplus/api/rate_limiting/rate_limiter.py +266 -0
  17. etlplus/api/request_manager.py +589 -0
  18. etlplus/api/retry_manager.py +430 -0
  19. etlplus/api/transport.py +325 -0
  20. etlplus/api/types.py +172 -0
  21. etlplus/cli/__init__.py +15 -0
  22. etlplus/cli/app.py +1367 -0
  23. etlplus/cli/handlers.py +775 -0
  24. etlplus/cli/main.py +616 -0
  25. etlplus/config/__init__.py +56 -0
  26. etlplus/config/connector.py +372 -0
  27. etlplus/config/jobs.py +311 -0
  28. etlplus/config/pipeline.py +339 -0
  29. etlplus/config/profile.py +78 -0
  30. etlplus/config/types.py +204 -0
  31. etlplus/config/utils.py +120 -0
  32. etlplus/ddl.py +197 -0
  33. etlplus/enums.py +414 -0
  34. etlplus/extract.py +218 -0
  35. etlplus/file.py +657 -0
  36. etlplus/load.py +336 -0
  37. etlplus/mixins.py +62 -0
  38. etlplus/py.typed +0 -0
  39. etlplus/run.py +368 -0
  40. etlplus/run_helpers.py +843 -0
  41. etlplus/templates/__init__.py +5 -0
  42. etlplus/templates/ddl.sql.j2 +128 -0
  43. etlplus/templates/view.sql.j2 +69 -0
  44. etlplus/transform.py +1049 -0
  45. etlplus/types.py +227 -0
  46. etlplus/utils.py +638 -0
  47. etlplus/validate.py +493 -0
  48. etlplus/validation/__init__.py +44 -0
  49. etlplus/validation/utils.py +389 -0
  50. etlplus-0.5.4.dist-info/METADATA +616 -0
  51. etlplus-0.5.4.dist-info/RECORD +55 -0
  52. etlplus-0.5.4.dist-info/WHEEL +5 -0
  53. etlplus-0.5.4.dist-info/entry_points.txt +2 -0
  54. etlplus-0.5.4.dist-info/licenses/LICENSE +21 -0
  55. etlplus-0.5.4.dist-info/top_level.txt +1 -0
etlplus/run_helpers.py ADDED
@@ -0,0 +1,843 @@
1
+ """
2
+ :mod:`etlplus.run_helpers` module.
3
+
4
+ Helper functions and small utilities used by ``etlplus.run`` to compose API
5
+ request/load environments, pagination configs, session objects, and endpoint
6
+ clients. Extracted to keep ``run.py`` focused on orchestration while enabling
7
+ reuse and testability.
8
+
9
+ Public (re-export safe) helpers:
10
+ - build_pagination_cfg(pagination, overrides)
11
+ - build_session(cfg)
12
+ - compose_api_request_env(cfg, source_obj, extract_opts)
13
+ - compose_api_target_env(cfg, target_obj, overrides)
14
+ - build_endpoint_client(base_url, base_path, endpoints, env)
15
+ - compute_rl_sleep_seconds(rate_limit, overrides)
16
+ - paginate_with_client(client, endpoint_key, params, headers,
17
+ timeout, pagination, sleep_seconds)
18
+
19
+ Notes
20
+ -----
21
+ These helpers intentionally accept permissive ``Any``/``Mapping`` inputs to
22
+ avoid tight coupling with config dataclasses while keeping runtime flexible.
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import inspect
28
+ from collections.abc import Mapping
29
+ from typing import Any
30
+ from typing import TypedDict
31
+ from typing import cast
32
+
33
+ import requests # type: ignore[import]
34
+
35
+ from .api import ApiConfig
36
+ from .api import EndpointClient
37
+ from .api import EndpointConfig
38
+ from .api import Headers
39
+ from .api import PaginationConfig
40
+ from .api import PaginationConfigMap
41
+ from .api import Params
42
+ from .api import RateLimitConfig
43
+ from .api import RateLimitConfigMap
44
+ from .api import RateLimiter
45
+ from .api import RetryPolicy
46
+ from .api import Url
47
+ from .types import Timeout
48
+
49
+ # SECTION: EXPORTS ========================================================== #
50
+
51
+
52
+ __all__ = [
53
+ # Functions
54
+ 'build_endpoint_client',
55
+ 'build_pagination_cfg',
56
+ 'build_session',
57
+ 'compose_api_request_env',
58
+ 'compose_api_target_env',
59
+ 'compute_rl_sleep_seconds',
60
+ 'paginate_with_client',
61
+ # Typed Dicts
62
+ 'ApiRequestEnv',
63
+ 'ApiTargetEnv',
64
+ 'SessionConfig',
65
+ ]
66
+
67
+
68
+ # SECTION: TYPED DICTS ====================================================== #
69
+
70
+
71
+ class ApiRequestEnv(TypedDict, total=False):
72
+ """API request environment configuration."""
73
+
74
+ url: Url | None
75
+ headers: dict[str, str]
76
+ timeout: Timeout
77
+ session: requests.Session | None
78
+ use_endpoints: bool
79
+ base_url: str | None
80
+ base_path: str | None
81
+ endpoints_map: dict[str, str] | None
82
+ endpoint_key: str | None
83
+ params: dict[str, Any]
84
+ pagination: PaginationConfigMap | None
85
+ sleep_seconds: float
86
+ retry: RetryPolicy | None
87
+ retry_network_errors: bool
88
+
89
+
90
+ class ApiTargetEnv(TypedDict, total=False):
91
+ """API target environment configuration."""
92
+
93
+ url: Url | None
94
+ headers: dict[str, str]
95
+ timeout: Timeout
96
+ session: requests.Session | None
97
+ method: str | None
98
+
99
+
100
+ class SessionConfig(TypedDict, total=False):
101
+ """Configuration for requests.Session."""
102
+
103
+ headers: Mapping[str, Any]
104
+ params: Mapping[str, Any]
105
+ auth: Any
106
+ verify: bool | str
107
+ cert: Any
108
+ proxies: Mapping[str, Any]
109
+ cookies: Mapping[str, Any]
110
+ trust_env: bool
111
+
112
+
113
+ # SECTION: INTERNAL FUNCTIONS ============================================== #
114
+
115
+
116
+ # -- API Environment Composition -- #
117
+
118
+
119
+ def _get_api_cfg_and_endpoint(
120
+ cfg: Any,
121
+ api_name: str,
122
+ endpoint_name: str,
123
+ ) -> tuple[ApiConfig, EndpointConfig]:
124
+ """
125
+ Retrieve API configuration and endpoint configuration.
126
+
127
+ Parameters
128
+ ----------
129
+ cfg : Any
130
+ The overall configuration object.
131
+ api_name : str
132
+ The name of the API to retrieve.
133
+ endpoint_name : str
134
+ The name of the endpoint to retrieve.
135
+
136
+ Returns
137
+ -------
138
+ tuple[ApiConfig, EndpointConfig]
139
+ The API configuration and endpoint configuration.
140
+
141
+ Raises
142
+ ------
143
+ ValueError
144
+ If the API or endpoint is not defined.
145
+ """
146
+ api_cfg = cfg.apis.get(api_name)
147
+ if not api_cfg:
148
+ raise ValueError(f'API not defined: {api_name}')
149
+ ep = api_cfg.endpoints.get(endpoint_name)
150
+ if not ep:
151
+ raise ValueError(
152
+ f'Endpoint "{endpoint_name}" not defined in API "{api_name}"',
153
+ )
154
+ return api_cfg, ep
155
+
156
+
157
+ def _inherit_http_from_api_endpoint(
158
+ api_cfg: ApiConfig,
159
+ ep: EndpointConfig,
160
+ url: Url | None,
161
+ headers: dict[str, str],
162
+ session_cfg: SessionConfig | None,
163
+ force_url: bool = False,
164
+ ) -> tuple[Url | None, dict[str, str], SessionConfig | None]:
165
+ """
166
+ Return HTTP settings inherited from API + endpoint definitions.
167
+
168
+ Parameters
169
+ ----------
170
+ api_cfg : ApiConfig
171
+ API configuration.
172
+ ep : EndpointConfig
173
+ Endpoint configuration.
174
+ url : Url | None
175
+ Existing URL to use when not forcing endpoint URL.
176
+ headers : dict[str, str]
177
+ Existing headers to augment.
178
+ session_cfg : SessionConfig | None
179
+ Existing session configuration to augment.
180
+ force_url : bool, optional
181
+ Whether to always use the endpoint URL.
182
+
183
+ Returns
184
+ -------
185
+ tuple[Url | None, dict[str, str], SessionConfig | None]
186
+ Resolved URL, headers, and session configuration.
187
+ """
188
+ if force_url or not url:
189
+ url = api_cfg.build_endpoint_url(ep)
190
+ headers = {**api_cfg.headers, **headers}
191
+ session_cfg = _merge_session_cfg_three(api_cfg, ep, session_cfg)
192
+ return url, headers, session_cfg
193
+
194
+
195
+ def _merge_session_cfg_three(
196
+ api_cfg: ApiConfig,
197
+ ep: EndpointConfig,
198
+ source_session_cfg: SessionConfig | None,
199
+ ) -> SessionConfig | None:
200
+ """
201
+ Merge session configurations from API, endpoint, and source.
202
+
203
+ Parameters
204
+ ----------
205
+ api_cfg : ApiConfig
206
+ API configuration.
207
+ ep : EndpointConfig
208
+ Endpoint configuration.
209
+ source_session_cfg : SessionConfig | None
210
+ Source session configuration.
211
+
212
+ Returns
213
+ -------
214
+ SessionConfig | None
215
+ Merged session configuration.
216
+ """
217
+ api_sess = getattr(api_cfg, 'session', None)
218
+ ep_sess = getattr(ep, 'session', None)
219
+ merged: dict[str, Any] = {}
220
+ if isinstance(api_sess, dict):
221
+ merged.update(api_sess)
222
+ if isinstance(ep_sess, dict):
223
+ merged.update(ep_sess)
224
+ if isinstance(source_session_cfg, dict):
225
+ merged.update(source_session_cfg)
226
+ return cast(SessionConfig | None, (merged or None))
227
+
228
+
229
+ # -- Mapping Helpers -- #
230
+
231
+
232
+ def _copy_mapping(
233
+ mapping: Mapping[str, Any] | None,
234
+ ) -> dict[str, Any]:
235
+ """
236
+ Return a shallow copy of *mapping* or an empty dict.
237
+
238
+ Parameters
239
+ ----------
240
+ mapping : Mapping[str, Any] | None
241
+ The mapping to copy.
242
+
243
+ Returns
244
+ -------
245
+ dict[str, Any]
246
+ A shallow copy of the mapping or an empty dict.
247
+ """
248
+ return dict(mapping) if isinstance(mapping, Mapping) else {}
249
+
250
+
251
+ def _update_mapping(
252
+ target: dict[str, Any],
253
+ extra: Mapping[str, Any] | None,
254
+ ) -> None:
255
+ """
256
+ Update *target* with *extra* when provided.
257
+
258
+ Parameters
259
+ ----------
260
+ target : dict[str, Any]
261
+ The target mapping to update.
262
+ extra : Mapping[str, Any] | None
263
+ The extra mapping to update the target with.
264
+ """
265
+ if isinstance(extra, Mapping):
266
+ target.update(extra)
267
+
268
+
269
+ # -- Session -- #
270
+
271
+
272
+ def _build_session_optional(
273
+ cfg: SessionConfig | None,
274
+ ) -> requests.Session | None:
275
+ """
276
+ Return a configured session when *cfg* is a mapping.
277
+
278
+ Parameters
279
+ ----------
280
+ cfg : SessionConfig | None
281
+ Session configuration mapping.
282
+
283
+ Returns
284
+ -------
285
+ requests.Session | None
286
+ Configured session or ``None``.
287
+ """
288
+
289
+ if isinstance(cfg, dict):
290
+ return build_session(cfg)
291
+ return None
292
+
293
+
294
+ # SECTION: FUNCTIONS ======================================================== #
295
+
296
+
297
+ # -- API Environment Composition -- #
298
+
299
+
300
+ def build_endpoint_client(
301
+ *,
302
+ base_url: str,
303
+ base_path: str | None,
304
+ endpoints: dict[str, str],
305
+ env: Mapping[str, Any],
306
+ ) -> EndpointClient:
307
+ """
308
+ Build an endpoint client for the specified API environment.
309
+
310
+ Parameters
311
+ ----------
312
+ base_url : str
313
+ The base URL for the API.
314
+ base_path : str | None
315
+ The base path for the API.
316
+ endpoints : dict[str, str]
317
+ A mapping of endpoint names to their paths.
318
+ env : Mapping[str, Any]
319
+ Environment variables and configuration options.
320
+
321
+ Returns
322
+ -------
323
+ EndpointClient
324
+ The constructed endpoint client.
325
+ """
326
+ # Allow tests to monkeypatch etlplus.run.EndpointClient and have it
327
+ # propagate here by preferring the class on the run module if present.
328
+ try:
329
+ from . import run as run_mod # local import to avoid cycles
330
+
331
+ ClientClass = getattr(run_mod, 'EndpointClient', EndpointClient)
332
+ except (ImportError, AttributeError): # pragma: no cover - fallback path
333
+ ClientClass = EndpointClient
334
+ return ClientClass(
335
+ base_url=base_url,
336
+ base_path=base_path,
337
+ endpoints=endpoints,
338
+ retry=env.get('retry'),
339
+ retry_network_errors=bool(env.get('retry_network_errors', False)),
340
+ session=env.get('session'),
341
+ )
342
+
343
+
344
+ def compose_api_request_env(
345
+ cfg: Any,
346
+ source_obj: Any,
347
+ ex_opts: Mapping[str, Any] | None,
348
+ ) -> ApiRequestEnv:
349
+ """
350
+ Compose the API request environment.
351
+
352
+ Parameters
353
+ ----------
354
+ cfg : Any
355
+ The API configuration.
356
+ source_obj : Any
357
+ The source object for the API request.
358
+ ex_opts : Mapping[str, Any] | None
359
+ The external options for the API request.
360
+
361
+ Returns
362
+ -------
363
+ ApiRequestEnv
364
+ The composed API request environment.
365
+ """
366
+ ex_opts = ex_opts or {}
367
+ url: Url | None = getattr(source_obj, 'url', None)
368
+ source_params = cast(
369
+ Mapping[str, Any] | None,
370
+ getattr(source_obj, 'query_params', None),
371
+ )
372
+ params: dict[str, Any] = _copy_mapping(source_params)
373
+ source_headers = cast(
374
+ Mapping[str, str] | None,
375
+ getattr(source_obj, 'headers', None),
376
+ )
377
+ headers: dict[str, str] = _copy_mapping(source_headers)
378
+ pagination = getattr(source_obj, 'pagination', None)
379
+ rate_limit = getattr(source_obj, 'rate_limit', None)
380
+ retry: RetryPolicy | None = cast(
381
+ RetryPolicy | None,
382
+ getattr(source_obj, 'retry', None),
383
+ )
384
+ retry_network_errors = bool(
385
+ getattr(source_obj, 'retry_network_errors', False),
386
+ )
387
+ session_cfg = cast(
388
+ SessionConfig | None,
389
+ getattr(source_obj, 'session', None),
390
+ )
391
+ api_name = getattr(source_obj, 'api', None)
392
+ endpoint_name = getattr(source_obj, 'endpoint', None)
393
+ use_client_endpoints = False
394
+ client_base_url: str | None = None
395
+ client_base_path: str | None = None
396
+ client_endpoints_map: dict[str, str] | None = None
397
+ selected_endpoint_key: str | None = None
398
+ if api_name and endpoint_name:
399
+ api_cfg, ep = _get_api_cfg_and_endpoint(cfg, api_name, endpoint_name)
400
+ url, headers, session_cfg = _inherit_http_from_api_endpoint(
401
+ api_cfg,
402
+ ep,
403
+ url,
404
+ headers,
405
+ session_cfg,
406
+ force_url=True,
407
+ )
408
+ ep_params: dict[str, Any] = _copy_mapping(
409
+ cast(Mapping[str, Any] | None, getattr(ep, 'query_params', None)),
410
+ )
411
+ _update_mapping(ep_params, params)
412
+ params = ep_params
413
+ pagination = (
414
+ pagination
415
+ or ep.pagination
416
+ or api_cfg.effective_pagination_defaults()
417
+ )
418
+ rate_limit = (
419
+ rate_limit
420
+ or ep.rate_limit
421
+ or api_cfg.effective_rate_limit_defaults()
422
+ )
423
+ retry = cast(
424
+ RetryPolicy | None,
425
+ (
426
+ retry
427
+ or getattr(ep, 'retry', None)
428
+ or getattr(api_cfg, 'retry', None)
429
+ ),
430
+ )
431
+ retry_network_errors = (
432
+ retry_network_errors
433
+ or bool(getattr(ep, 'retry_network_errors', False))
434
+ or bool(getattr(api_cfg, 'retry_network_errors', False))
435
+ )
436
+ use_client_endpoints = True
437
+ client_base_url = api_cfg.base_url
438
+ client_base_path = api_cfg.effective_base_path()
439
+ client_endpoints_map = {
440
+ k: v.path for k, v in api_cfg.endpoints.items()
441
+ }
442
+ selected_endpoint_key = endpoint_name
443
+ _update_mapping(
444
+ params,
445
+ cast(Mapping[str, Any] | None, ex_opts.get('query_params')),
446
+ )
447
+ _update_mapping(
448
+ headers,
449
+ cast(Mapping[str, str] | None, ex_opts.get('headers')),
450
+ )
451
+ timeout: Timeout = ex_opts.get('timeout')
452
+ pag_ov = ex_opts.get('pagination', {})
453
+ rl_ov = ex_opts.get('rate_limit', {})
454
+ rty_ov: RetryPolicy | None = cast(
455
+ RetryPolicy | None,
456
+ (ex_opts.get('retry') if 'retry' in ex_opts else None),
457
+ )
458
+ rne_ov = (
459
+ ex_opts.get('retry_network_errors')
460
+ if 'retry_network_errors' in ex_opts
461
+ else None
462
+ )
463
+ sess_ov = cast(SessionConfig | None, ex_opts.get('session'))
464
+ sleep_s = compute_rl_sleep_seconds(rate_limit, rl_ov) or 0.0
465
+ if rty_ov is not None:
466
+ retry = rty_ov
467
+ if rne_ov is not None:
468
+ retry_network_errors = bool(rne_ov)
469
+ if isinstance(sess_ov, dict):
470
+ base_cfg: dict[str, Any] = dict(cast(dict, session_cfg or {}))
471
+ base_cfg.update(sess_ov)
472
+ session_cfg = cast(SessionConfig, base_cfg)
473
+ pag_cfg: PaginationConfigMap | None = build_pagination_cfg(
474
+ pagination,
475
+ pag_ov,
476
+ )
477
+ sess_obj = _build_session_optional(session_cfg)
478
+ return {
479
+ 'use_endpoints': use_client_endpoints,
480
+ 'base_url': client_base_url,
481
+ 'base_path': client_base_path,
482
+ 'endpoints_map': client_endpoints_map,
483
+ 'endpoint_key': selected_endpoint_key,
484
+ 'url': url,
485
+ 'params': params,
486
+ 'headers': headers,
487
+ 'timeout': timeout,
488
+ 'pagination': pag_cfg,
489
+ 'sleep_seconds': sleep_s,
490
+ 'retry': retry,
491
+ 'retry_network_errors': retry_network_errors,
492
+ 'session': sess_obj,
493
+ }
494
+
495
+
496
+ def compose_api_target_env(
497
+ cfg: Any,
498
+ target_obj: Any,
499
+ overrides: Mapping[str, Any] | None,
500
+ ) -> ApiTargetEnv:
501
+ """
502
+ Compose the API target environment.
503
+
504
+ Parameters
505
+ ----------
506
+ cfg : Any
507
+ API configuration.
508
+ target_obj : Any
509
+ Target object for the API call.
510
+ overrides : Mapping[str, Any] | None
511
+ Override configuration options.
512
+
513
+ Returns
514
+ -------
515
+ ApiTargetEnv
516
+ Composed API target environment.
517
+ """
518
+ ov = overrides or {}
519
+ url: Url | None = cast(
520
+ Url | None,
521
+ ov.get('url') or getattr(target_obj, 'url', None),
522
+ )
523
+ method: str | None = cast(
524
+ str | None,
525
+ ov.get('method') or getattr(target_obj, 'method', 'post'),
526
+ )
527
+ headers = _copy_mapping(
528
+ cast(Mapping[str, str] | None, getattr(target_obj, 'headers', None)),
529
+ )
530
+ _update_mapping(headers, cast(Mapping[str, str] | None, ov.get('headers')))
531
+ timeout: Timeout = (
532
+ cast(Timeout, ov.get('timeout')) if 'timeout' in ov else None
533
+ )
534
+ sess_cfg: SessionConfig | None = cast(
535
+ SessionConfig | None,
536
+ ov.get('session'),
537
+ )
538
+ api_name = getattr(target_obj, 'api', None)
539
+ endpoint_name = getattr(target_obj, 'endpoint', None)
540
+ if api_name and endpoint_name and not url:
541
+ api_cfg, ep = _get_api_cfg_and_endpoint(cfg, api_name, endpoint_name)
542
+ url, headers, sess_cfg = _inherit_http_from_api_endpoint(
543
+ api_cfg,
544
+ ep,
545
+ url,
546
+ headers,
547
+ sess_cfg,
548
+ force_url=False,
549
+ )
550
+ sess_obj = _build_session_optional(sess_cfg)
551
+
552
+ return {
553
+ 'url': url,
554
+ 'method': method,
555
+ 'headers': headers,
556
+ 'timeout': timeout,
557
+ 'session': sess_obj,
558
+ }
559
+
560
+
561
+ # -- Pagination -- #
562
+
563
+
564
+ def build_pagination_cfg(
565
+ pagination: PaginationConfig | None,
566
+ overrides: Mapping[str, Any] | None,
567
+ ) -> PaginationConfigMap | None:
568
+ """
569
+ Build pagination configuration.
570
+
571
+ Parameters
572
+ ----------
573
+ pagination : PaginationConfig | None
574
+ Pagination configuration.
575
+ overrides : Mapping[str, Any] | None
576
+ Override configuration options.
577
+
578
+ Returns
579
+ -------
580
+ PaginationConfigMap | None
581
+ Pagination configuration.
582
+ """
583
+ ptype: str | None = None
584
+ records_path = None
585
+ max_pages = None
586
+ max_records = None
587
+ if pagination:
588
+ ptype = (getattr(pagination, 'type', '') or '').strip().lower()
589
+ records_path = getattr(pagination, 'records_path', None)
590
+ max_pages = getattr(pagination, 'max_pages', None)
591
+ max_records = getattr(pagination, 'max_records', None)
592
+ if overrides:
593
+ ptype = (overrides.get('type') or ptype or '').strip().lower()
594
+ records_path = overrides.get('records_path', records_path)
595
+ max_pages = overrides.get('max_pages', max_pages)
596
+ max_records = overrides.get('max_records', max_records)
597
+ if not ptype:
598
+ return None
599
+ cfg: dict[str, Any] = {
600
+ 'type': ptype,
601
+ 'records_path': records_path,
602
+ 'max_pages': max_pages,
603
+ 'max_records': max_records,
604
+ }
605
+ match ptype:
606
+ case 'page' | 'offset':
607
+ page_param = overrides.get('page_param') if overrides else None
608
+ size_param = overrides.get('size_param') if overrides else None
609
+ start_page = overrides.get('start_page') if overrides else None
610
+ page_size = overrides.get('page_size') if overrides else None
611
+ if pagination:
612
+ page_param = (
613
+ page_param
614
+ or getattr(pagination, 'page_param', None)
615
+ or 'page'
616
+ )
617
+ size_param = (
618
+ size_param
619
+ or getattr(pagination, 'size_param', None)
620
+ or 'per_page'
621
+ )
622
+ start_page = (
623
+ start_page or getattr(pagination, 'start_page', None) or 1
624
+ )
625
+ page_size = (
626
+ page_size or getattr(pagination, 'page_size', None) or 100
627
+ )
628
+ cfg.update(
629
+ {
630
+ 'page_param': str(page_param or 'page'),
631
+ 'size_param': str(size_param or 'per_page'),
632
+ 'start_page': int(start_page or 1),
633
+ 'page_size': int(page_size or 100),
634
+ },
635
+ )
636
+ case 'cursor':
637
+ cursor_param = overrides.get('cursor_param') if overrides else None
638
+ cursor_path = overrides.get('cursor_path') if overrides else None
639
+ page_size = overrides.get('page_size') if overrides else None
640
+ start_cursor = None
641
+ if pagination:
642
+ cursor_param = (
643
+ cursor_param
644
+ or getattr(pagination, 'cursor_param', None)
645
+ or 'cursor'
646
+ )
647
+ cursor_path = cursor_path or getattr(
648
+ pagination,
649
+ 'cursor_path',
650
+ None,
651
+ )
652
+ page_size = (
653
+ page_size or getattr(pagination, 'page_size', None) or 100
654
+ )
655
+ start_cursor = getattr(pagination, 'start_cursor', None)
656
+ cfg.update(
657
+ {
658
+ 'cursor_param': str(cursor_param or 'cursor'),
659
+ 'cursor_path': cursor_path,
660
+ 'page_size': int(page_size or 100),
661
+ 'start_cursor': start_cursor,
662
+ },
663
+ )
664
+ case _:
665
+ pass
666
+
667
+ return cast(PaginationConfigMap, cfg)
668
+
669
+
670
+ # -- Pagination Invocation -- #
671
+
672
+
673
+ def paginate_with_client(
674
+ client: Any,
675
+ endpoint_key: str,
676
+ params: Params | None,
677
+ headers: Headers | None,
678
+ timeout: Timeout,
679
+ pagination: PaginationConfigMap | None,
680
+ sleep_seconds: float | None,
681
+ ) -> Any:
682
+ """
683
+ Paginate using the given client.
684
+
685
+ Parameters
686
+ ----------
687
+ client : Any
688
+ The endpoint client.
689
+ endpoint_key : str
690
+ The key for the API endpoint.
691
+ params : Params | None
692
+ Query parameters for the API request.
693
+ headers : Headers | None
694
+ Headers to include in the API request.
695
+ timeout : Timeout
696
+ Timeout configuration for the API request.
697
+ pagination : PaginationConfigMap | None
698
+ Pagination configuration for the API request.
699
+ sleep_seconds : float | None
700
+ Sleep duration between API requests.
701
+
702
+ Returns
703
+ -------
704
+ Any
705
+ Paginated results from the API.
706
+ """
707
+ sig = inspect.signature(client.paginate) # type: ignore[arg-type]
708
+ kw_pag: dict[str, Any] = {'pagination': pagination}
709
+ if '_params' in sig.parameters:
710
+ kw_pag['_params'] = params
711
+ else:
712
+ kw_pag['params'] = params
713
+ if '_headers' in sig.parameters:
714
+ kw_pag['_headers'] = headers
715
+ else:
716
+ kw_pag['headers'] = headers
717
+ if '_timeout' in sig.parameters:
718
+ kw_pag['_timeout'] = timeout
719
+ else:
720
+ kw_pag['timeout'] = timeout
721
+ eff_sleep = 0.0 if sleep_seconds is None else sleep_seconds
722
+ if '_sleep_seconds' in sig.parameters:
723
+ kw_pag['_sleep_seconds'] = eff_sleep
724
+ else:
725
+ kw_pag['sleep_seconds'] = eff_sleep
726
+
727
+ return client.paginate(endpoint_key, **kw_pag)
728
+
729
+
730
+ # -- Rate Limit -- #
731
+
732
+
733
+ def compute_rl_sleep_seconds(
734
+ rate_limit: RateLimitConfig | Mapping[str, Any] | None,
735
+ overrides: Mapping[str, Any] | None,
736
+ ) -> float:
737
+ """
738
+ Compute sleep seconds from rate limit configuration and overrides.
739
+
740
+ Parameters
741
+ ----------
742
+ rate_limit : RateLimitConfig | Mapping[str, Any] | None
743
+ Rate limit configuration.
744
+ overrides : Mapping[str, Any] | None
745
+ Override values for rate limit configuration.
746
+
747
+ Returns
748
+ -------
749
+ float
750
+ Sleep duration in seconds (0.0 when disabled).
751
+ """
752
+ rl_map: Mapping[str, Any] | None
753
+ if rate_limit and hasattr(rate_limit, 'sleep_seconds'):
754
+ rl_map = {
755
+ 'sleep_seconds': getattr(rate_limit, 'sleep_seconds', None),
756
+ 'max_per_sec': getattr(rate_limit, 'max_per_sec', None),
757
+ }
758
+ else:
759
+ rl_map = cast(Mapping[str, Any] | None, rate_limit)
760
+
761
+ rl_mapping = cast(RateLimitConfigMap | None, rl_map)
762
+
763
+ typed_override: RateLimitConfigMap | None = None
764
+ if overrides:
765
+ filtered: dict[str, float | None] = {}
766
+ if 'sleep_seconds' in overrides:
767
+ filtered['sleep_seconds'] = cast(
768
+ float | None,
769
+ overrides.get('sleep_seconds'),
770
+ )
771
+ if 'max_per_sec' in overrides:
772
+ filtered['max_per_sec'] = cast(
773
+ float | None,
774
+ overrides.get('max_per_sec'),
775
+ )
776
+ if filtered:
777
+ typed_override = cast(RateLimitConfigMap, filtered)
778
+
779
+ return RateLimiter.resolve_sleep_seconds(
780
+ rate_limit=rl_mapping,
781
+ overrides=typed_override,
782
+ )
783
+
784
+
785
+ # -- Session -- #
786
+
787
+
788
+ def build_session(
789
+ cfg: SessionConfig | None,
790
+ ) -> requests.Session:
791
+ """
792
+ Build a requests.Session object with the given configuration.
793
+
794
+ Parameters
795
+ ----------
796
+ cfg : SessionConfig | None
797
+ Session configuration.
798
+
799
+ Returns
800
+ -------
801
+ requests.Session
802
+ Configured session object.
803
+ """
804
+ s = requests.Session()
805
+ if not cfg:
806
+ return s
807
+ headers = cfg.get('headers')
808
+ if isinstance(headers, dict):
809
+ s.headers.update(headers)
810
+ params = cfg.get('params')
811
+ if isinstance(params, dict):
812
+ try:
813
+ s.params = params
814
+ except (AttributeError, TypeError):
815
+ pass
816
+ auth = cfg.get('auth')
817
+ if auth is not None:
818
+ if isinstance(auth, (list, tuple)) and len(auth) == 2:
819
+ s.auth = (auth[0], auth[1]) # type: ignore[assignment]
820
+ else:
821
+ s.auth = auth # type: ignore[assignment]
822
+ if 'verify' in cfg:
823
+ s.verify = cfg.get('verify') # type: ignore[assignment]
824
+ cert = cfg.get('cert')
825
+ if cert is not None:
826
+ s.cert = cert # type: ignore[assignment]
827
+ proxies = cfg.get('proxies')
828
+ if isinstance(proxies, dict):
829
+ s.proxies.update(proxies)
830
+ cookies = cfg.get('cookies')
831
+ if isinstance(cookies, dict):
832
+ try:
833
+ s.cookies.update(cookies)
834
+ except (TypeError, ValueError):
835
+ pass
836
+ if 'trust_env' in cfg:
837
+ try:
838
+ # type: ignore[attr-defined]
839
+ s.trust_env = bool(cfg.get('trust_env'))
840
+ except AttributeError:
841
+ pass
842
+
843
+ return s