etlplus 0.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. etlplus/__init__.py +43 -0
  2. etlplus/__main__.py +22 -0
  3. etlplus/__version__.py +14 -0
  4. etlplus/api/README.md +237 -0
  5. etlplus/api/__init__.py +136 -0
  6. etlplus/api/auth.py +432 -0
  7. etlplus/api/config.py +633 -0
  8. etlplus/api/endpoint_client.py +885 -0
  9. etlplus/api/errors.py +170 -0
  10. etlplus/api/pagination/__init__.py +47 -0
  11. etlplus/api/pagination/client.py +188 -0
  12. etlplus/api/pagination/config.py +440 -0
  13. etlplus/api/pagination/paginator.py +775 -0
  14. etlplus/api/rate_limiting/__init__.py +38 -0
  15. etlplus/api/rate_limiting/config.py +343 -0
  16. etlplus/api/rate_limiting/rate_limiter.py +266 -0
  17. etlplus/api/request_manager.py +589 -0
  18. etlplus/api/retry_manager.py +430 -0
  19. etlplus/api/transport.py +325 -0
  20. etlplus/api/types.py +172 -0
  21. etlplus/cli/__init__.py +15 -0
  22. etlplus/cli/app.py +1367 -0
  23. etlplus/cli/handlers.py +775 -0
  24. etlplus/cli/main.py +616 -0
  25. etlplus/config/__init__.py +56 -0
  26. etlplus/config/connector.py +372 -0
  27. etlplus/config/jobs.py +311 -0
  28. etlplus/config/pipeline.py +339 -0
  29. etlplus/config/profile.py +78 -0
  30. etlplus/config/types.py +204 -0
  31. etlplus/config/utils.py +120 -0
  32. etlplus/ddl.py +197 -0
  33. etlplus/enums.py +414 -0
  34. etlplus/extract.py +218 -0
  35. etlplus/file.py +657 -0
  36. etlplus/load.py +336 -0
  37. etlplus/mixins.py +62 -0
  38. etlplus/py.typed +0 -0
  39. etlplus/run.py +368 -0
  40. etlplus/run_helpers.py +843 -0
  41. etlplus/templates/__init__.py +5 -0
  42. etlplus/templates/ddl.sql.j2 +128 -0
  43. etlplus/templates/view.sql.j2 +69 -0
  44. etlplus/transform.py +1049 -0
  45. etlplus/types.py +227 -0
  46. etlplus/utils.py +638 -0
  47. etlplus/validate.py +493 -0
  48. etlplus/validation/__init__.py +44 -0
  49. etlplus/validation/utils.py +389 -0
  50. etlplus-0.5.4.dist-info/METADATA +616 -0
  51. etlplus-0.5.4.dist-info/RECORD +55 -0
  52. etlplus-0.5.4.dist-info/WHEEL +5 -0
  53. etlplus-0.5.4.dist-info/entry_points.txt +2 -0
  54. etlplus-0.5.4.dist-info/licenses/LICENSE +21 -0
  55. etlplus-0.5.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,885 @@
1
+ """
2
+ :mod:`etlplus.api.endpoint_client` module.
3
+
4
+ Endpoint client for composing URLs, requests, and pagination.
5
+
6
+ This module provides :class:`EndpointClient`, a small frozen dataclass that
7
+ registers endpoint paths under a base URL, applies retry and rate-limiting
8
+ policies, and wires pagination helpers to fetch JSON records from REST APIs.
9
+
10
+ Notes
11
+ -----
12
+ - Retry-related types live in :mod:`etlplus.api.retry_manager`.
13
+ - Pagination requires a ``PaginationConfig``; see
14
+ :class:`PagePaginationConfigMap` and :class:`CursorPaginationConfigMap` for
15
+ the accepted shapes.
16
+
17
+ Examples
18
+ --------
19
+ >>> # Page-based pagination
20
+ >>> client = EndpointClient(
21
+ ... base_url="https://api.example.com/v1",
22
+ ... endpoints={"list": "/items"},
23
+ ... )
24
+ >>> pg = {"type": "page", "page_size": 100}
25
+ >>> rows = client.paginate("list", pagination=pg)
26
+
27
+ >>> # Cursor-based pagination
28
+ >>> pg = {
29
+ ... "type": "cursor",
30
+ ... "records_path": "data.items",
31
+ ... "cursor_param": "cursor",
32
+ ... "cursor_path": "data.nextCursor",
33
+ ... "page_size": 100,
34
+ ... }
35
+ >>> rows = client.paginate("list", pagination=pg)
36
+ """
37
+
38
+ from __future__ import annotations
39
+
40
+ import time
41
+ from collections.abc import Callable
42
+ from collections.abc import Iterator
43
+ from collections.abc import Mapping
44
+ from collections.abc import Sequence
45
+ from dataclasses import dataclass
46
+ from dataclasses import field
47
+ from types import MappingProxyType
48
+ from types import TracebackType
49
+ from typing import Any
50
+ from typing import ClassVar
51
+ from typing import Self
52
+ from typing import cast
53
+ from urllib.parse import parse_qsl
54
+ from urllib.parse import quote
55
+ from urllib.parse import urlencode
56
+ from urllib.parse import urlsplit
57
+ from urllib.parse import urlunsplit
58
+
59
+ import requests # type: ignore[import]
60
+
61
+ from ..types import JSONData
62
+ from ..types import JSONDict
63
+ from .errors import ApiRequestError
64
+ from .errors import PaginationError
65
+ from .pagination import PaginationClient
66
+ from .pagination import PaginationInput
67
+ from .pagination import Paginator
68
+ from .rate_limiting import RateLimitConfigMap
69
+ from .rate_limiting import RateLimiter
70
+ from .rate_limiting import RateLimitOverrides
71
+ from .request_manager import RequestManager
72
+ from .retry_manager import RetryManager
73
+ from .retry_manager import RetryPolicy
74
+ from .retry_manager import RetryStrategy
75
+ from .transport import HTTPAdapterMountConfig
76
+ from .types import RequestOptions
77
+ from .types import Url
78
+
79
+ # SECTION: CLASSES ========================================================== #
80
+
81
+
82
+ @dataclass(frozen=True, slots=True)
83
+ class EndpointClient:
84
+ """
85
+ Immutable registry of endpoint path templates rooted at a base URL.
86
+
87
+ Summary
88
+ -------
89
+ Provides helpers for composing absolute URLs, paginating responses,
90
+ applying client-wide rate limits, and performing jittered exponential
91
+ backoff retries. The dataclass is frozen and uses ``slots`` for memory
92
+ efficiency; mutating attribute values is disallowed.
93
+
94
+ Parameters
95
+ ----------
96
+ base_url : Url
97
+ Absolute base URL, e.g., ``"https://api.example.com/v1"``.
98
+ endpoints : Mapping[str, str]
99
+ Mapping of endpoint keys to relative paths, e.g.,
100
+ ``{"list_users": "/users", "user": "/users/{id}"}``.
101
+ base_path : str | None, optional
102
+ Optional base path prefix (``/v2``) prepended to all endpoint
103
+ paths when building URLs.
104
+ retry : RetryPolicy | None, optional
105
+ Optional retry policy. When provided, failed requests matching
106
+ ``retry_on`` statuses are retried with full jitter.
107
+ retry_network_errors : bool, optional
108
+ When ``True``, also retry on network errors (timeouts, connection
109
+ resets). Defaults to ``False``.
110
+ rate_limit : RateLimitConfigMap | None, optional
111
+ Optional client-wide rate limit used to derive an inter-request
112
+ delay when an explicit ``sleep_seconds`` isn't supplied.
113
+ session : requests.Session | None, optional
114
+ Explicit HTTP session for all requests.
115
+ session_factory : Callable[[], requests.Session] | None, optional
116
+ Factory used to lazily create a session. Ignored if ``session`` is
117
+ provided.
118
+ session_adapters : Sequence[HTTPAdapterMountConfig] | None, optional
119
+ Adapter mount configuration(s) used to build a session lazily when
120
+ neither ``session`` nor ``session_factory`` is supplied.
121
+
122
+ Attributes
123
+ ----------
124
+ base_url : Url
125
+ Absolute base URL.
126
+ endpoints : Mapping[str, str]
127
+ Read-only mapping of endpoint keys to relative paths
128
+ (``MappingProxyType``).
129
+ base_path : str | None
130
+ Optional base path prefix appended after ``base_url``.
131
+ retry : RetryPolicy | None
132
+ Retry policy reference (may be ``None``).
133
+ retry_network_errors : bool
134
+ Whether network errors are retried in addition to HTTP statuses.
135
+ rate_limit : RateLimitConfigMap | None
136
+ Client-wide rate limit configuration (may be ``None``).
137
+ session : requests.Session | None
138
+ Explicit HTTP session used for requests when provided.
139
+ session_factory : Callable[[], requests.Session] | None
140
+ Lazily invoked factory producing a session when needed.
141
+ session_adapters : Sequence[HTTPAdapterMountConfig] | None
142
+ Adapter mount configuration(s) for connection pooling / retries.
143
+ DEFAULT_PAGE_PARAM : ClassVar[str]
144
+ Default page parameter name.
145
+ DEFAULT_SIZE_PARAM : ClassVar[str]
146
+ Default page-size parameter name.
147
+ DEFAULT_START_PAGE : ClassVar[int]
148
+ Default starting page number.
149
+ DEFAULT_PAGE_SIZE : ClassVar[int]
150
+ Default records-per-page when unspecified.
151
+ DEFAULT_CURSOR_PARAM : ClassVar[str]
152
+ Default cursor parameter name.
153
+ DEFAULT_LIMIT_PARAM : ClassVar[str]
154
+ Default limit parameter name used for cursor pagination.
155
+ DEFAULT_RETRY_MAX_ATTEMPTS : ClassVar[int]
156
+ Fallback max attempts when retry policy omits it.
157
+ DEFAULT_RETRY_BACKOFF : ClassVar[float]
158
+ Fallback exponential backoff base seconds.
159
+ DEFAULT_RETRY_ON : ClassVar[tuple[int, ...]]
160
+ Default HTTP status codes eligible for retry.
161
+ DEFAULT_RETRY_CAP : ClassVar[float]
162
+ Maximum sleep seconds for jittered backoff.
163
+ DEFAULT_TIMEOUT : ClassVar[float]
164
+ Default timeout applied to HTTP requests when unspecified.
165
+
166
+ Raises
167
+ ------
168
+ ValueError
169
+ If ``base_url`` is not absolute or endpoint keys/values are invalid.
170
+
171
+ Notes
172
+ -----
173
+ - Endpoint mapping is defensively copied and wrapped read-only.
174
+ - Pagination defaults (page size, start page, cursor param, etc.) are
175
+ centralized as class variables.
176
+ - Context manager support (``with EndpointClient(...) as client``)
177
+ manages session lifecycle; owned sessions are closed on exit.
178
+ - Retries use exponential backoff with jitter capped by
179
+ ``DEFAULT_RETRY_CAP`` seconds.
180
+
181
+ Examples
182
+ --------
183
+ Basic URL composition
184
+ ^^^^^^^^^^^^^^^^^^^^^
185
+ >>> client = EndpointClient(
186
+ ... base_url="https://api.example.com/v1",
187
+ ... endpoints={"list_users": "/users", "user": "/users/{id}"},
188
+ ... )
189
+ >>> client.url("list_users", query_parameters={"active": "true"})
190
+ 'https://api.example.com/v1/users?active=true'
191
+ >>> client.url("user", path_parameters={"id": 42})
192
+ 'https://api.example.com/v1/users/42'
193
+
194
+ Page pagination with retries
195
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
196
+ >>> client = EndpointClient(
197
+ ... base_url="https://api.example.com/v1",
198
+ ... endpoints={"list": "/items"},
199
+ ... retry={"max_attempts": 5, "backoff": 0.5, "retry_on": [429, 503]},
200
+ ... retry_network_errors=True,
201
+ ... )
202
+ >>> rows = client.paginate(
203
+ ... "list",
204
+ ... pagination={"type": "page", "page_size": 50},
205
+ ... )
206
+ """
207
+
208
+ # -- Attributes -- #
209
+
210
+ base_url: Url
211
+ endpoints: Mapping[str, str]
212
+ base_path: str | None = None
213
+
214
+ # Optional retry configuration (constructor parameter; object is frozen)
215
+ retry: RetryPolicy | None = None
216
+ retry_network_errors: bool = False
217
+ # Optional client-wide rate limit configuration
218
+ rate_limit: RateLimitConfigMap | None = None
219
+
220
+ # Optional HTTP session or factory
221
+ session: requests.Session | None = None
222
+ session_factory: Callable[[], requests.Session] | None = None
223
+
224
+ # Optional HTTPAdapter mount configuration(s) for transport-level retries
225
+ # and connection pooling. If provided and neither `session` nor
226
+ # `session_factory` is supplied, a factory is synthesized to create a
227
+ # Session and mount the configured adapters lazily.
228
+ session_adapters: Sequence[HTTPAdapterMountConfig] | None = None
229
+
230
+ # Internal: context-managed session and ownership flag.
231
+ _request_manager: RequestManager = field(
232
+ init=False,
233
+ repr=False,
234
+ compare=False,
235
+ )
236
+
237
+ # -- Class Defaults (Centralized) -- #
238
+
239
+ DEFAULT_PAGE_PARAM: ClassVar[str] = 'page'
240
+ DEFAULT_SIZE_PARAM: ClassVar[str] = 'per_page'
241
+ DEFAULT_START_PAGE: ClassVar[int] = 1
242
+ DEFAULT_PAGE_SIZE: ClassVar[int] = 100
243
+ DEFAULT_CURSOR_PARAM: ClassVar[str] = 'cursor'
244
+ DEFAULT_LIMIT_PARAM: ClassVar[str] = 'limit'
245
+
246
+ # Retry defaults (only used if a policy is provided)
247
+ DEFAULT_RETRY_MAX_ATTEMPTS: ClassVar[int] = RetryStrategy.DEFAULT_ATTEMPTS
248
+ DEFAULT_RETRY_BACKOFF: ClassVar[float] = RetryStrategy.DEFAULT_BACKOFF
249
+ DEFAULT_RETRY_ON: ClassVar[tuple[int, ...]] = tuple(
250
+ RetryManager.DEFAULT_STATUS_CODES,
251
+ )
252
+
253
+ # Cap for jittered backoff sleeps (seconds)
254
+ DEFAULT_RETRY_CAP: ClassVar[float] = RetryManager.DEFAULT_CAP
255
+
256
+ # Default timeout applied when callers do not explicitly provide one.
257
+ DEFAULT_TIMEOUT: ClassVar[float] = 10.0
258
+
259
+ # -- Magic Methods (Object Lifecycle) -- #
260
+
261
+ def __post_init__(self) -> None:
262
+ """
263
+ Validate inputs and finalize immutable state.
264
+
265
+ Ensures ``base_url`` is absolute, copies and validates endpoint
266
+ mappings, wraps them in a read-only proxy, and synthesizes a
267
+ session factory when only adapter configs are provided.
268
+
269
+ Raises
270
+ ------
271
+ ValueError
272
+ If ``base_url`` is not absolute or endpoints are invalid.
273
+ """
274
+ # Validate base_url is absolute.
275
+ parts = urlsplit(self.base_url)
276
+ if not parts.scheme or not parts.netloc:
277
+ raise ValueError(
278
+ 'base_url must be absolute, e.g. "https://api.example.com"',
279
+ )
280
+
281
+ # Defensive copy + validate endpoints with concise comprehension.
282
+ eps = dict(self.endpoints)
283
+ invalid = [
284
+ (k, v)
285
+ for k, v in eps.items()
286
+ if not (isinstance(k, str) and isinstance(v, str) and v)
287
+ ]
288
+ if invalid:
289
+ sample = invalid[:3]
290
+ msg = (
291
+ 'endpoints must map str -> non-empty str; '
292
+ f'invalid entries: {sample}'
293
+ )
294
+ raise ValueError(msg)
295
+ # Wrap in a read-only mapping to ensure immutability
296
+ object.__setattr__(self, 'endpoints', MappingProxyType(eps))
297
+
298
+ # If both session and factory are provided, prefer explicit session.
299
+ if self.session is not None and self.session_factory is not None:
300
+ object.__setattr__(self, 'session_factory', None)
301
+
302
+ # Normalize adapter configs to tuples for immutability.
303
+ if self.session_adapters:
304
+ adapters_cfg = tuple(self.session_adapters)
305
+ object.__setattr__(self, 'session_adapters', adapters_cfg)
306
+ else:
307
+ object.__setattr__(self, 'session_adapters', None)
308
+
309
+ manager = RequestManager(
310
+ retry=self.retry,
311
+ retry_network_errors=self.retry_network_errors,
312
+ default_timeout=self.DEFAULT_TIMEOUT,
313
+ session=self.session,
314
+ session_factory=self.session_factory,
315
+ session_adapters=self.session_adapters,
316
+ retry_cap=self.DEFAULT_RETRY_CAP,
317
+ )
318
+ object.__setattr__(self, '_request_manager', manager)
319
+
320
+ # -- Magic Methods (Context Manager Protocol) -- #
321
+
322
+ def __enter__(self) -> Self:
323
+ """
324
+ Enter the runtime context related to this object.
325
+
326
+ Returns
327
+ -------
328
+ Self
329
+ The client instance.
330
+ """
331
+ self._request_manager.__enter__()
332
+ return self
333
+
334
+ def __exit__(
335
+ self,
336
+ exc_type: type[BaseException] | None,
337
+ exc: BaseException | None,
338
+ tb: TracebackType | None,
339
+ ) -> None:
340
+ """
341
+ Exit the runtime context related to this object.
342
+
343
+ Parameters
344
+ ----------
345
+ exc_type : type[BaseException] | None
346
+ Exception type if raised, else ``None``.
347
+ exc : BaseException | None
348
+ Exception instance if raised, else ``None``.
349
+ tb : TracebackType | None
350
+ Traceback if exception raised, else ``None``.
351
+ """
352
+ self._request_manager.__exit__(exc_type, exc, tb)
353
+
354
+ # -- Internal Instance Methods -- #
355
+
356
+ def _build_pagination_client(
357
+ self,
358
+ *,
359
+ pagination: PaginationInput,
360
+ sleep_seconds: float,
361
+ rate_limit_overrides: RateLimitOverrides,
362
+ ) -> PaginationClient:
363
+ """
364
+ Create a :class:`PaginationClient` wired to the request manager.
365
+
366
+ Parameters
367
+ ----------
368
+ pagination : PaginationInput
369
+ Pagination configuration mapping or :class:`PaginationConfig`.
370
+ sleep_seconds : float
371
+ Number of seconds to sleep between requests.
372
+ rate_limit_overrides : RateLimitOverrides
373
+ Overrides for rate limiting.
374
+
375
+ Returns
376
+ -------
377
+ PaginationClient
378
+ Configured pagination helper instance.
379
+ """
380
+ effective_sleep = self._resolve_sleep_seconds(
381
+ sleep_seconds,
382
+ self.rate_limit,
383
+ rate_limit_overrides,
384
+ )
385
+ rate_limiter = (
386
+ RateLimiter.fixed(effective_sleep) if effective_sleep > 0 else None
387
+ )
388
+ return PaginationClient(
389
+ pagination=pagination,
390
+ fetch=self._fetch_page,
391
+ rate_limiter=rate_limiter,
392
+ )
393
+
394
+ def _fetch_page(
395
+ self,
396
+ url_: Url,
397
+ request: RequestOptions,
398
+ page_index: int | None,
399
+ ) -> JSONData:
400
+ """
401
+ Fetch a single page using shared pagination guardrails.
402
+
403
+ Parameters
404
+ ----------
405
+ url_ : Url
406
+ Absolute URL to request.
407
+ request : RequestOptions
408
+ Request metadata produced by ``Paginator``.
409
+ page_index : int | None
410
+ Index of the page being fetched.
411
+
412
+ Returns
413
+ -------
414
+ JSONData
415
+ Parsed response payload.
416
+
417
+ Raises
418
+ ------
419
+ PaginationError
420
+ If the request fails.
421
+ """
422
+ call_kw = request.as_kwargs()
423
+ try:
424
+ return self.get(url_, **call_kw)
425
+ except ApiRequestError as exc:
426
+ raise PaginationError(
427
+ url=url_,
428
+ status=exc.status,
429
+ attempts=exc.attempts,
430
+ retried=exc.retried,
431
+ retry_policy=exc.retry_policy,
432
+ cause=exc,
433
+ page=page_index,
434
+ ) from exc
435
+
436
+ # -- Instance Methods (HTTP Requests ) -- #
437
+
438
+ def get(
439
+ self,
440
+ url: Url,
441
+ **kwargs: Any,
442
+ ) -> JSONData:
443
+ """
444
+ Wrap ``request('GET', ...)`` for convenience.
445
+
446
+ Parameters
447
+ ----------
448
+ url : Url
449
+ Absolute URL to request.
450
+ **kwargs : Any
451
+ Additional keyword arguments forwarded to ``requests``
452
+ (e.g., ``params``, ``headers``).
453
+
454
+ Returns
455
+ -------
456
+ JSONData
457
+ Parsed JSON payload or fallback structure matching
458
+ :func:`etlplus.extract.extract_from_api` semantics.
459
+ """
460
+ return self._request_manager.get(url, **kwargs)
461
+
462
+ def post(
463
+ self,
464
+ url: Url,
465
+ **kwargs: Any,
466
+ ) -> JSONData:
467
+ """
468
+ Wrap ``request('POST', ...)`` for convenience.
469
+
470
+ Parameters
471
+ ----------
472
+ url : Url
473
+ Absolute URL to request.
474
+ **kwargs : Any
475
+ Additional keyword arguments forwarded to ``requests``
476
+ (e.g., ``params``, ``headers``, ``json``).
477
+
478
+ Returns
479
+ -------
480
+ JSONData
481
+ Parsed JSON payload or fallback structure matching
482
+ :func:`etlplus.extract.extract_from_api` semantics.
483
+ """
484
+ return self._request_manager.post(url, **kwargs)
485
+
486
+ def request(
487
+ self,
488
+ method: str,
489
+ url: Url,
490
+ **kwargs: Any,
491
+ ) -> JSONData:
492
+ """
493
+ Execute an HTTP request using the client's retry and session settings.
494
+
495
+ Parameters
496
+ ----------
497
+ method : str
498
+ HTTP method to invoke (``'GET'``, ``'POST'``, etc.).
499
+ url : Url
500
+ Absolute URL to request.
501
+ **kwargs : Any
502
+ Additional keyword arguments forwarded to ``requests``
503
+ (e.g., ``params``, ``headers``, ``json``).
504
+
505
+ Returns
506
+ -------
507
+ JSONData
508
+ Parsed JSON payload or fallback structure matching
509
+ :func:`etlplus.extract.extract_from_api` semantics.
510
+ """
511
+ return self._request_manager.request(method, url, **kwargs)
512
+
513
+ # -- Instance Methods (HTTP Responses) -- #
514
+
515
+ def paginate(
516
+ self,
517
+ endpoint_key: str,
518
+ *,
519
+ path_parameters: Mapping[str, str] | None = None,
520
+ query_parameters: Mapping[str, str] | None = None,
521
+ pagination: PaginationInput = None,
522
+ request: RequestOptions | None = None,
523
+ sleep_seconds: float = 0.0,
524
+ rate_limit_overrides: RateLimitOverrides = None,
525
+ ) -> JSONData:
526
+ """
527
+ Paginate by endpoint key.
528
+
529
+ Builds the URL via ``self.url(...)`` and delegates to ``paginate_url``.
530
+
531
+ Parameters
532
+ ----------
533
+ endpoint_key : str
534
+ Key into the ``endpoints`` mapping whose relative path will be
535
+ resolved against ``base_url``.
536
+ path_parameters : Mapping[str, str] | None
537
+ Values to substitute into placeholders in the endpoint path.
538
+ query_parameters : Mapping[str, str] | None
539
+ Query parameters to append (merged with any already present on
540
+ ``base_url``).
541
+ pagination : PaginationInput, optional
542
+ Pagination configuration mapping or :class:`PaginationConfig`.
543
+ request : RequestOptions | None, optional
544
+ Pre-built request metadata snapshot (params/headers/timeout).
545
+ sleep_seconds : float
546
+ Time to sleep between requests.
547
+ rate_limit_overrides : RateLimitOverrides, optional
548
+ Optional per-call overrides merged with ``self.rate_limit`` when
549
+ deriving pacing.
550
+
551
+ Returns
552
+ -------
553
+ JSONData
554
+ Raw JSON object for non-paginated calls, or a list of record
555
+ dicts aggregated across pages for paginated calls.
556
+ """
557
+ url = self.url(
558
+ endpoint_key,
559
+ path_parameters=path_parameters,
560
+ query_parameters=query_parameters,
561
+ )
562
+ return self.paginate_url(
563
+ url,
564
+ pagination=pagination,
565
+ request=request,
566
+ sleep_seconds=sleep_seconds,
567
+ rate_limit_overrides=rate_limit_overrides,
568
+ )
569
+
570
+ def paginate_iter(
571
+ self,
572
+ endpoint_key: str,
573
+ *,
574
+ path_parameters: Mapping[str, str] | None = None,
575
+ query_parameters: Mapping[str, str] | None = None,
576
+ pagination: PaginationInput = None,
577
+ request: RequestOptions | None = None,
578
+ sleep_seconds: float = 0.0,
579
+ rate_limit_overrides: RateLimitOverrides = None,
580
+ ) -> Iterator[JSONDict]:
581
+ """
582
+ Stream records for a registered endpoint using pagination.
583
+
584
+ Summary
585
+ -------
586
+ Generator variant of ``paginate`` that yields record dicts across
587
+ pages instead of aggregating them into a list.
588
+
589
+ Parameters
590
+ ----------
591
+ endpoint_key : str
592
+ Key into the ``endpoints`` mapping whose relative path will be
593
+ resolved against ``base_url``.
594
+ path_parameters : Mapping[str, str] | None
595
+ Values to substitute into placeholders in the endpoint path.
596
+ query_parameters : Mapping[str, str] | None
597
+ Query parameters to append (merged with any already present).
598
+ pagination : PaginationInput, optional
599
+ Pagination configuration mapping or :class:`PaginationConfig`.
600
+ request : RequestOptions | None, optional
601
+ Pre-built request metadata snapshot (params/headers/timeout).
602
+ sleep_seconds : float
603
+ Time to sleep between requests.
604
+ rate_limit_overrides : RateLimitOverrides, optional
605
+ Optional per-call overrides merged with ``self.rate_limit`` when
606
+ deriving pacing.
607
+
608
+ Yields
609
+ ------
610
+ JSONDict
611
+ Record dictionaries extracted from each page.
612
+ """
613
+ url = self.url(
614
+ endpoint_key,
615
+ path_parameters=path_parameters,
616
+ query_parameters=query_parameters,
617
+ )
618
+ yield from self.paginate_url_iter(
619
+ url=url,
620
+ pagination=pagination,
621
+ request=request,
622
+ sleep_seconds=sleep_seconds,
623
+ rate_limit_overrides=rate_limit_overrides,
624
+ )
625
+
626
+ def paginate_url(
627
+ self,
628
+ url: Url,
629
+ pagination: PaginationInput = None,
630
+ *,
631
+ request: RequestOptions | None = None,
632
+ sleep_seconds: float = 0.0,
633
+ rate_limit_overrides: RateLimitOverrides = None,
634
+ ) -> JSONData:
635
+ """
636
+ Paginate API responses for an absolute URL and aggregate records.
637
+
638
+ Parameters
639
+ ----------
640
+ url : Url
641
+ Absolute URL to paginate.
642
+ pagination : PaginationInput, optional
643
+ Pagination configuration mapping or :class:`PaginationConfig`.
644
+ request : RequestOptions | None, optional
645
+ Optional request snapshot with existing params/headers/timeout.
646
+ sleep_seconds : float
647
+ Time to sleep between requests.
648
+ rate_limit_overrides : RateLimitOverrides, optional
649
+ Optional per-call overrides merged with ``self.rate_limit`` when
650
+ deriving pacing.
651
+
652
+ Returns
653
+ -------
654
+ JSONData
655
+ Raw JSON object for non-paginated calls, or a list of record
656
+ dicts aggregated across pages for paginated calls.
657
+ """
658
+ # Normalize pagination config for typed access.
659
+ if pagination is not None and not isinstance(pagination, Mapping):
660
+ ptype = getattr(pagination, 'type', None)
661
+ else:
662
+ pg_map = cast(Mapping[str, Any] | None, pagination)
663
+ ptype = Paginator.detect_type(pg_map, default=None)
664
+ request_obj = request or RequestOptions()
665
+
666
+ # Preserve raw JSON behavior for non-paginated and unknown types.
667
+ if ptype is None:
668
+ return self.get(url, **request_obj.as_kwargs())
669
+
670
+ # For known pagination types, delegate through paginate_url_iter to
671
+ # preserve subclass overrides (tests rely on this shim behavior).
672
+ # Pass the composed ``request_obj`` as the baseline snapshot and
673
+ # avoid re-specifying params/headers/timeout so pagination glue
674
+ # does not re-merge the same values a second time.
675
+ return list(
676
+ self.paginate_url_iter(
677
+ url,
678
+ pagination=pagination,
679
+ request=request_obj,
680
+ sleep_seconds=sleep_seconds,
681
+ rate_limit_overrides=rate_limit_overrides,
682
+ ),
683
+ )
684
+
685
+ def paginate_url_iter(
686
+ self,
687
+ url: Url,
688
+ pagination: PaginationInput = None,
689
+ *,
690
+ request: RequestOptions | None = None,
691
+ sleep_seconds: float = 0.0,
692
+ rate_limit_overrides: RateLimitOverrides = None,
693
+ ) -> Iterator[JSONDict]:
694
+ """
695
+ Stream records by paginating an absolute URL.
696
+
697
+ Parameters
698
+ ----------
699
+ url : Url
700
+ Absolute URL to paginate.
701
+ pagination : PaginationInput, optional
702
+ Pagination configuration mapping or :class:`PaginationConfig`.
703
+ request : RequestOptions | None, optional
704
+ Optional request snapshot reused across pages.
705
+ sleep_seconds : float
706
+ Time to sleep between requests.
707
+ rate_limit_overrides : RateLimitOverrides, optional
708
+ Optional per-call overrides merged with ``self.rate_limit`` when
709
+ deriving pacing.
710
+
711
+ Yields
712
+ ------
713
+ JSONDict
714
+ Record dictionaries extracted from each page.
715
+ """
716
+ base_request = request or RequestOptions()
717
+
718
+ runner = self._build_pagination_client(
719
+ pagination=pagination,
720
+ sleep_seconds=sleep_seconds,
721
+ rate_limit_overrides=rate_limit_overrides,
722
+ )
723
+ yield from runner.iterate(
724
+ url,
725
+ request=base_request,
726
+ )
727
+
728
+ # -- Instance Methods (Endpoints)-- #
729
+
730
+ def url(
731
+ self,
732
+ endpoint_key: str,
733
+ path_parameters: Mapping[str, Any] | None = None,
734
+ query_parameters: Mapping[str, Any] | None = None,
735
+ ) -> str:
736
+ """
737
+ Build an absolute URL for a registered endpoint.
738
+
739
+ Parameters
740
+ ----------
741
+ endpoint_key : str
742
+ Key into the ``endpoints`` mapping whose relative path will be
743
+ resolved against ``base_url``.
744
+ path_parameters : Mapping[str, Any] | None, optional
745
+ Values to substitute into placeholders in the endpoint path.
746
+ Placeholders must be written as ``{placeholder}`` in the relative
747
+ path. Each substituted value is percent-encoded as a single path
748
+ segment (slashes are encoded) to prevent path traversal.
749
+ query_parameters : Mapping[str, Any] | None, optional
750
+ Query parameters to append (and merge with any already present on
751
+ ``base_url``). Values are percent-encoded and combined using
752
+ ``application/x-www-form-urlencoded`` rules.
753
+
754
+ Returns
755
+ -------
756
+ str
757
+ Constructed absolute URL.
758
+
759
+ Raises
760
+ ------
761
+ KeyError
762
+ If ``endpoint_key`` is unknown or a required placeholder in the
763
+ path has no corresponding entry in ``path_parameters``.
764
+ ValueError
765
+ If the path template is invalid.
766
+
767
+ Examples
768
+ --------
769
+ >>> ep = EndpointClient(
770
+ ... base_url='https://api.example.com/v1',
771
+ ... endpoints={
772
+ ... 'user': '/users/{id}',
773
+ ... 'search': '/users'
774
+ ... }
775
+ ... )
776
+ >>> ep.url('user', path_parameters={'id': '42'})
777
+ 'https://api.example.com/v1/users/42'
778
+ >>> ep.url('search', query_parameters={'q': 'Jane Doe', 'page': '2'})
779
+ 'https://api.example.com/v1/users?q=Jane+Doe&page=2'
780
+ """
781
+ if endpoint_key not in self.endpoints:
782
+ raise KeyError(f'Unknown endpoint_key: {endpoint_key!r}')
783
+
784
+ rel_path = self.endpoints[endpoint_key]
785
+
786
+ # Substitute path parameters if provided.
787
+ if '{' in rel_path:
788
+ try:
789
+ encoded = (
790
+ {
791
+ k: quote(str(v), safe='')
792
+ for k, v in path_parameters.items()
793
+ }
794
+ if path_parameters
795
+ else {}
796
+ )
797
+ rel_path = rel_path.format(**encoded)
798
+ except KeyError as e:
799
+ missing = e.args[0]
800
+ raise KeyError(
801
+ f'Missing path parameter for placeholder: {missing!r}',
802
+ ) from None
803
+ except ValueError as e:
804
+ raise ValueError(
805
+ f'Invalid path template {rel_path!r}: {e}',
806
+ ) from None
807
+
808
+ # Build final absolute URL, honoring any client base_path prefix.
809
+ parts = urlsplit(self.base_url)
810
+ base_url_path = parts.path.rstrip('/')
811
+ extra = self.base_path
812
+ extra_norm = ('/' + extra.lstrip('/')) if extra else ''
813
+ composed_base = (
814
+ base_url_path + extra_norm if (base_url_path or extra_norm) else ''
815
+ )
816
+ rel_norm = '/' + rel_path.lstrip('/')
817
+ path = (composed_base + rel_norm) if composed_base else rel_norm
818
+
819
+ # Merge base query with provided query_parameters.
820
+ base_q = parse_qsl(parts.query, keep_blank_values=True)
821
+ add_q = list((query_parameters or {}).items())
822
+ qs = urlencode(base_q + add_q, doseq=True)
823
+
824
+ return urlunsplit(
825
+ (parts.scheme, parts.netloc, path, qs, parts.fragment),
826
+ )
827
+
828
+ # -- Static Methods -- #
829
+
830
+ @staticmethod
831
+ def apply_sleep(
832
+ sleep_seconds: float,
833
+ *,
834
+ sleeper: Callable[[float], None] | None = None,
835
+ ) -> None:
836
+ """
837
+ Sleep for the specified seconds if positive.
838
+
839
+ The optional ``sleeper`` is useful for tests (e.g., pass
840
+ ``lambda s: None``). Defaults to using time.sleep when not provided.
841
+
842
+ Parameters
843
+ ----------
844
+ sleep_seconds : float
845
+ Number of seconds to sleep; no-op if non-positive.
846
+ sleeper : Callable[[float], None] | None, optional
847
+ Optional sleeper function taking seconds as input.
848
+ """
849
+ if sleep_seconds and sleep_seconds > 0:
850
+ if sleeper is None:
851
+ time.sleep(sleep_seconds)
852
+ else:
853
+ sleeper(sleep_seconds)
854
+
855
+ # -- Internal Static Methods -- #
856
+
857
+ @staticmethod
858
+ def _resolve_sleep_seconds(
859
+ explicit: float,
860
+ rate_limit: RateLimitConfigMap | None,
861
+ overrides: RateLimitOverrides = None,
862
+ ) -> float:
863
+ """
864
+ Derive the effective sleep interval honoring rate-limit config.
865
+
866
+ Parameters
867
+ ----------
868
+ explicit : float
869
+ Explicit sleep seconds provided by the caller.
870
+ rate_limit : RateLimitConfigMap | None
871
+ Client-wide rate limit configuration.
872
+ overrides : RateLimitOverrides, optional
873
+ Per-call overrides that take precedence over ``rate_limit``.
874
+
875
+ Returns
876
+ -------
877
+ float
878
+ The resolved sleep seconds to apply between requests.
879
+ """
880
+ if explicit and explicit > 0:
881
+ return explicit
882
+ return RateLimiter.resolve_sleep_seconds(
883
+ rate_limit=rate_limit,
884
+ overrides=overrides,
885
+ )