etlplus 0.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etlplus/__init__.py +43 -0
- etlplus/__main__.py +22 -0
- etlplus/__version__.py +14 -0
- etlplus/api/README.md +237 -0
- etlplus/api/__init__.py +136 -0
- etlplus/api/auth.py +432 -0
- etlplus/api/config.py +633 -0
- etlplus/api/endpoint_client.py +885 -0
- etlplus/api/errors.py +170 -0
- etlplus/api/pagination/__init__.py +47 -0
- etlplus/api/pagination/client.py +188 -0
- etlplus/api/pagination/config.py +440 -0
- etlplus/api/pagination/paginator.py +775 -0
- etlplus/api/rate_limiting/__init__.py +38 -0
- etlplus/api/rate_limiting/config.py +343 -0
- etlplus/api/rate_limiting/rate_limiter.py +266 -0
- etlplus/api/request_manager.py +589 -0
- etlplus/api/retry_manager.py +430 -0
- etlplus/api/transport.py +325 -0
- etlplus/api/types.py +172 -0
- etlplus/cli/__init__.py +15 -0
- etlplus/cli/app.py +1367 -0
- etlplus/cli/handlers.py +775 -0
- etlplus/cli/main.py +616 -0
- etlplus/config/__init__.py +56 -0
- etlplus/config/connector.py +372 -0
- etlplus/config/jobs.py +311 -0
- etlplus/config/pipeline.py +339 -0
- etlplus/config/profile.py +78 -0
- etlplus/config/types.py +204 -0
- etlplus/config/utils.py +120 -0
- etlplus/ddl.py +197 -0
- etlplus/enums.py +414 -0
- etlplus/extract.py +218 -0
- etlplus/file.py +657 -0
- etlplus/load.py +336 -0
- etlplus/mixins.py +62 -0
- etlplus/py.typed +0 -0
- etlplus/run.py +368 -0
- etlplus/run_helpers.py +843 -0
- etlplus/templates/__init__.py +5 -0
- etlplus/templates/ddl.sql.j2 +128 -0
- etlplus/templates/view.sql.j2 +69 -0
- etlplus/transform.py +1049 -0
- etlplus/types.py +227 -0
- etlplus/utils.py +638 -0
- etlplus/validate.py +493 -0
- etlplus/validation/__init__.py +44 -0
- etlplus/validation/utils.py +389 -0
- etlplus-0.5.4.dist-info/METADATA +616 -0
- etlplus-0.5.4.dist-info/RECORD +55 -0
- etlplus-0.5.4.dist-info/WHEEL +5 -0
- etlplus-0.5.4.dist-info/entry_points.txt +2 -0
- etlplus-0.5.4.dist-info/licenses/LICENSE +21 -0
- etlplus-0.5.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,775 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:mod:`etlplus.api.pagination.paginator` module.
|
|
3
|
+
|
|
4
|
+
Core pagination runtime for REST API responses.
|
|
5
|
+
|
|
6
|
+
This module implements :class:`Paginator`, which encapsulates pagination
|
|
7
|
+
behavior for page-, offset-, and cursor-based APIs. It delegates configuration
|
|
8
|
+
parsing to :mod:`etlplus.api.pagination.config` and focuses on executing
|
|
9
|
+
requests, extracting records, and enforcing limits.
|
|
10
|
+
|
|
11
|
+
Examples
|
|
12
|
+
--------
|
|
13
|
+
>>> from etlplus.api.pagination import Paginator, PaginationType
|
|
14
|
+
>>> from etlplus.api.types import RequestOptions, Url
|
|
15
|
+
>>> def fetch(url: Url, req: RequestOptions, page: int | None) -> dict:
|
|
16
|
+
... ... # issue HTTP request and return JSON payload
|
|
17
|
+
>>> paginator = Paginator(type=PaginationType.PAGE, page_size=100)
|
|
18
|
+
>>> rows = list(paginator.paginate_iter('https://api.example.com/items'))
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
from collections.abc import Generator
|
|
24
|
+
from collections.abc import Mapping
|
|
25
|
+
from dataclasses import dataclass
|
|
26
|
+
from functools import partial
|
|
27
|
+
from typing import Any
|
|
28
|
+
from typing import ClassVar
|
|
29
|
+
from typing import cast
|
|
30
|
+
|
|
31
|
+
from ...types import JSONDict
|
|
32
|
+
from ...types import JSONRecords
|
|
33
|
+
from ...utils import to_int
|
|
34
|
+
from ...utils import to_maximum_int
|
|
35
|
+
from ...utils import to_positive_int
|
|
36
|
+
from ..errors import ApiRequestError
|
|
37
|
+
from ..errors import PaginationError
|
|
38
|
+
from ..rate_limiting import RateLimiter
|
|
39
|
+
from ..types import FetchPageCallable
|
|
40
|
+
from ..types import RequestOptions
|
|
41
|
+
from ..types import Url
|
|
42
|
+
from .config import PaginationConfig
|
|
43
|
+
from .config import PaginationInput
|
|
44
|
+
from .config import PaginationType
|
|
45
|
+
|
|
46
|
+
# SECTION: EXPORTS ========================================================== #
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
__all__ = [
|
|
50
|
+
# Classes
|
|
51
|
+
'Paginator',
|
|
52
|
+
]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# SECTION: CONSTANTS ======================================================== #
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
_MISSING = object()
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# SECTION: INTERNAL FUNCTIONS =============================================== #
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _resolve_path(
|
|
65
|
+
obj: Any,
|
|
66
|
+
path: str | None,
|
|
67
|
+
) -> Any:
|
|
68
|
+
"""
|
|
69
|
+
Resolve dotted ``path`` within ``obj`` or return ``_MISSING``.
|
|
70
|
+
|
|
71
|
+
Parameters
|
|
72
|
+
----------
|
|
73
|
+
obj : Any
|
|
74
|
+
JSON payload from an API response.
|
|
75
|
+
path : str | None
|
|
76
|
+
Dotted path to the target value within ``obj``.
|
|
77
|
+
|
|
78
|
+
Returns
|
|
79
|
+
-------
|
|
80
|
+
Any
|
|
81
|
+
Target value from the payload, or ``_MISSING`` if the path does not
|
|
82
|
+
exist.
|
|
83
|
+
"""
|
|
84
|
+
if not isinstance(path, str) or not path:
|
|
85
|
+
return obj
|
|
86
|
+
cur: Any = obj
|
|
87
|
+
for part in path.split('.'):
|
|
88
|
+
if isinstance(cur, dict) and part in cur:
|
|
89
|
+
cur = cur[part]
|
|
90
|
+
else:
|
|
91
|
+
return _MISSING
|
|
92
|
+
return cur
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# SECTION: CLASSES ========================================================== #
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@dataclass(slots=True, kw_only=True)
|
|
99
|
+
class Paginator:
|
|
100
|
+
"""
|
|
101
|
+
REST API endpoint response pagination manager.
|
|
102
|
+
|
|
103
|
+
The caller supplies a ``fetch`` function that retrieves a JSON page
|
|
104
|
+
given an absolute URL and request params. The paginator handles iterating
|
|
105
|
+
over pages according to the configured strategy, extracting records from
|
|
106
|
+
each page, and yielding them one by one. Pagination strategies supported
|
|
107
|
+
are:
|
|
108
|
+
- Cursor/token based (``type='cursor'``)
|
|
109
|
+
- Offset based (``type='offset'``)
|
|
110
|
+
- Page-number based (``type='page'``)
|
|
111
|
+
|
|
112
|
+
Attributes
|
|
113
|
+
----------
|
|
114
|
+
START_PAGE : ClassVar[int]
|
|
115
|
+
Default starting page number.
|
|
116
|
+
PAGE_SIZE : ClassVar[int]
|
|
117
|
+
Default number of records per page.
|
|
118
|
+
CURSOR_PARAM : ClassVar[str]
|
|
119
|
+
Default query parameter name for cursor value.
|
|
120
|
+
LIMIT_PARAM : ClassVar[str]
|
|
121
|
+
Default query parameter name for page size in cursor pagination.
|
|
122
|
+
PAGE_PARAMS : ClassVar[dict[PaginationType, str]]
|
|
123
|
+
Default query parameter name for page number per pagination type.
|
|
124
|
+
SIZE_PARAMS : ClassVar[dict[PaginationType, str]]
|
|
125
|
+
Default query parameter name for page size per pagination type.
|
|
126
|
+
START_PAGES : ClassVar[dict[PaginationType, int]]
|
|
127
|
+
Default starting page number per pagination type.
|
|
128
|
+
type : PaginationType
|
|
129
|
+
Pagination type: ``"page"``, ``"offset"``, or ``"cursor"``.
|
|
130
|
+
page_size : int
|
|
131
|
+
Number of records per page (minimum of 1).
|
|
132
|
+
start_page : int
|
|
133
|
+
Starting page number or offset, depending on ``type``.
|
|
134
|
+
start_cursor : object | None
|
|
135
|
+
Initial cursor value for cursor-based pagination.
|
|
136
|
+
records_path : str | None
|
|
137
|
+
Dotted path to the records list inside each page payload.
|
|
138
|
+
fallback_path : str | None
|
|
139
|
+
Alternate dotted path used when ``records_path`` resolves to an empty
|
|
140
|
+
collection or ``None``.
|
|
141
|
+
cursor_path : str | None
|
|
142
|
+
Dotted path to the next-cursor value inside each page payload.
|
|
143
|
+
max_pages : int | None
|
|
144
|
+
Optional maximum number of pages to fetch.
|
|
145
|
+
max_records : int | None
|
|
146
|
+
Optional maximum number of records to fetch.
|
|
147
|
+
page_param : str
|
|
148
|
+
Query parameter name carrying the page number or offset.
|
|
149
|
+
size_param : str
|
|
150
|
+
Query parameter name carrying the page size.
|
|
151
|
+
cursor_param : str
|
|
152
|
+
Query parameter name carrying the cursor.
|
|
153
|
+
limit_param : str
|
|
154
|
+
Query parameter name carrying the page size for cursor-based
|
|
155
|
+
pagination when the API uses a separate limit field.
|
|
156
|
+
fetch : FetchPageCallable | None
|
|
157
|
+
Callback used to fetch a single page. It receives the absolute URL,
|
|
158
|
+
the request params mapping, and the 1-based page index.
|
|
159
|
+
rate_limiter : RateLimiter | None
|
|
160
|
+
Optional rate limiter invoked between page fetches.
|
|
161
|
+
last_page : int
|
|
162
|
+
Tracks the last page index attempted. Useful for diagnostics.
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
# -- Constants -- #
|
|
166
|
+
|
|
167
|
+
# Pagination defaults
|
|
168
|
+
START_PAGE: ClassVar[int] = 1
|
|
169
|
+
PAGE_SIZE: ClassVar[int] = 100
|
|
170
|
+
CURSOR_PARAM: ClassVar[str] = PaginationType.CURSOR
|
|
171
|
+
LIMIT_PARAM: ClassVar[str] = 'limit'
|
|
172
|
+
|
|
173
|
+
# Mapped pagination defaults
|
|
174
|
+
PAGE_PARAMS: ClassVar[dict[PaginationType, str]] = {
|
|
175
|
+
PaginationType.PAGE: 'page',
|
|
176
|
+
PaginationType.OFFSET: 'offset',
|
|
177
|
+
PaginationType.CURSOR: 'page',
|
|
178
|
+
}
|
|
179
|
+
SIZE_PARAMS: ClassVar[dict[PaginationType, str]] = {
|
|
180
|
+
PaginationType.PAGE: 'per_page',
|
|
181
|
+
PaginationType.OFFSET: 'limit',
|
|
182
|
+
PaginationType.CURSOR: 'limit',
|
|
183
|
+
}
|
|
184
|
+
START_PAGES: ClassVar[dict[PaginationType, int]] = {
|
|
185
|
+
PaginationType.PAGE: 1,
|
|
186
|
+
PaginationType.OFFSET: 0,
|
|
187
|
+
PaginationType.CURSOR: 1,
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
# -- Attributes -- #
|
|
191
|
+
|
|
192
|
+
type: PaginationType = PaginationType.PAGE
|
|
193
|
+
page_size: int = PAGE_SIZE
|
|
194
|
+
start_page: int = START_PAGE
|
|
195
|
+
# start_cursor: str | int | None = None
|
|
196
|
+
start_cursor: object | None = None
|
|
197
|
+
records_path: str | None = None
|
|
198
|
+
fallback_path: str | None = None
|
|
199
|
+
cursor_path: str | None = None
|
|
200
|
+
max_pages: int | None = None
|
|
201
|
+
max_records: int | None = None
|
|
202
|
+
page_param: str = ''
|
|
203
|
+
size_param: str = ''
|
|
204
|
+
cursor_param: str = ''
|
|
205
|
+
limit_param: str = ''
|
|
206
|
+
|
|
207
|
+
# -- Magic Methods (Object Lifecycle) -- #
|
|
208
|
+
|
|
209
|
+
def __post_init__(self) -> None:
|
|
210
|
+
"""
|
|
211
|
+
Normalize and validate pagination configuration.
|
|
212
|
+
"""
|
|
213
|
+
# Normalize type to supported PaginationType.
|
|
214
|
+
if self.type not in (
|
|
215
|
+
PaginationType.PAGE,
|
|
216
|
+
PaginationType.OFFSET,
|
|
217
|
+
PaginationType.CURSOR,
|
|
218
|
+
):
|
|
219
|
+
self.type = PaginationType.PAGE
|
|
220
|
+
# Normalize start_page based on type.
|
|
221
|
+
if self.start_page < 0:
|
|
222
|
+
self.start_page = self.START_PAGES[self.type]
|
|
223
|
+
if self.type == PaginationType.PAGE and self.start_page < 1:
|
|
224
|
+
self.start_page = 1
|
|
225
|
+
# Enforce minimum page_size.
|
|
226
|
+
if self.page_size < 1:
|
|
227
|
+
self.page_size = 1
|
|
228
|
+
# Normalize parameter names by type-specific defaults.
|
|
229
|
+
if not self.page_param:
|
|
230
|
+
self.page_param = self.PAGE_PARAMS[self.type]
|
|
231
|
+
if not self.size_param:
|
|
232
|
+
self.size_param = self.SIZE_PARAMS[self.type]
|
|
233
|
+
if not self.cursor_param:
|
|
234
|
+
self.cursor_param = self.CURSOR_PARAM
|
|
235
|
+
if not self.limit_param:
|
|
236
|
+
self.limit_param = self.LIMIT_PARAM
|
|
237
|
+
|
|
238
|
+
fetch: FetchPageCallable | None = None
|
|
239
|
+
rate_limiter: RateLimiter | None = None
|
|
240
|
+
last_page: int = 0
|
|
241
|
+
|
|
242
|
+
# -- Class Methods -- #
|
|
243
|
+
|
|
244
|
+
@classmethod
|
|
245
|
+
def from_config(
|
|
246
|
+
cls,
|
|
247
|
+
config: PaginationInput,
|
|
248
|
+
*,
|
|
249
|
+
fetch: FetchPageCallable,
|
|
250
|
+
rate_limiter: RateLimiter | None = None,
|
|
251
|
+
) -> Paginator:
|
|
252
|
+
"""
|
|
253
|
+
Normalize config and build a paginator instance.
|
|
254
|
+
|
|
255
|
+
Parameters
|
|
256
|
+
----------
|
|
257
|
+
config : PaginationInput
|
|
258
|
+
Pagination configuration mapping or :class:`PaginationConfig`.
|
|
259
|
+
fetch : FetchPageCallable
|
|
260
|
+
Callback used to fetch a single page for a request given the
|
|
261
|
+
absolute URL, the request params mapping, and the 1-based page
|
|
262
|
+
index.
|
|
263
|
+
rate_limiter : RateLimiter | None, optional
|
|
264
|
+
Optional limiter invoked between page fetches.
|
|
265
|
+
|
|
266
|
+
Returns
|
|
267
|
+
-------
|
|
268
|
+
Paginator
|
|
269
|
+
Configured paginator instance.
|
|
270
|
+
"""
|
|
271
|
+
# Normalize configuration into a mapping for downstream helpers.
|
|
272
|
+
if isinstance(config, PaginationConfig):
|
|
273
|
+
cfg: Mapping[str, Any] = {
|
|
274
|
+
'type': config.type,
|
|
275
|
+
'page_param': config.page_param,
|
|
276
|
+
'size_param': config.size_param,
|
|
277
|
+
'start_page': config.start_page,
|
|
278
|
+
'page_size': config.page_size,
|
|
279
|
+
'cursor_param': config.cursor_param,
|
|
280
|
+
'cursor_path': config.cursor_path,
|
|
281
|
+
'start_cursor': config.start_cursor,
|
|
282
|
+
'records_path': config.records_path,
|
|
283
|
+
'fallback_path': config.fallback_path,
|
|
284
|
+
'max_pages': config.max_pages,
|
|
285
|
+
'max_records': config.max_records,
|
|
286
|
+
'limit_param': config.limit_param,
|
|
287
|
+
}
|
|
288
|
+
else:
|
|
289
|
+
cfg = cast(Mapping[str, Any], config or {})
|
|
290
|
+
|
|
291
|
+
ptype = cls.detect_type(cfg, default=PaginationType.PAGE)
|
|
292
|
+
assert ptype is not None
|
|
293
|
+
|
|
294
|
+
return cls(
|
|
295
|
+
type=ptype,
|
|
296
|
+
page_size=to_positive_int(cfg.get('page_size'), cls.PAGE_SIZE),
|
|
297
|
+
start_page=to_maximum_int(
|
|
298
|
+
cfg.get('start_page'),
|
|
299
|
+
cls.START_PAGES[ptype],
|
|
300
|
+
),
|
|
301
|
+
start_cursor=cfg.get('start_cursor'),
|
|
302
|
+
records_path=cfg.get('records_path'),
|
|
303
|
+
fallback_path=cfg.get('fallback_path'),
|
|
304
|
+
cursor_path=cfg.get('cursor_path'),
|
|
305
|
+
max_pages=to_int(cfg.get('max_pages'), None, minimum=1),
|
|
306
|
+
max_records=to_int(cfg.get('max_records'), None, minimum=1),
|
|
307
|
+
page_param=cfg.get('page_param', ''),
|
|
308
|
+
size_param=cfg.get('size_param', ''),
|
|
309
|
+
cursor_param=cfg.get('cursor_param', ''),
|
|
310
|
+
limit_param=cfg.get('limit_param', ''),
|
|
311
|
+
fetch=fetch,
|
|
312
|
+
rate_limiter=rate_limiter,
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
# -- Instance Methods -- #
|
|
316
|
+
|
|
317
|
+
def paginate(
|
|
318
|
+
self,
|
|
319
|
+
url: Url,
|
|
320
|
+
*,
|
|
321
|
+
request: RequestOptions | None = None,
|
|
322
|
+
) -> JSONRecords:
|
|
323
|
+
"""
|
|
324
|
+
Collect all records across pages into a list of dicts.
|
|
325
|
+
|
|
326
|
+
Parameters
|
|
327
|
+
----------
|
|
328
|
+
url : Url
|
|
329
|
+
Absolute URL of the endpoint to fetch.
|
|
330
|
+
request : RequestOptions | None, optional
|
|
331
|
+
Request metadata snapshot reused across pages.
|
|
332
|
+
|
|
333
|
+
Returns
|
|
334
|
+
-------
|
|
335
|
+
JSONRecords
|
|
336
|
+
List of record dicts aggregated across all fetched pages.
|
|
337
|
+
"""
|
|
338
|
+
prepared = request or RequestOptions()
|
|
339
|
+
return list(self.paginate_iter(url, request=prepared))
|
|
340
|
+
|
|
341
|
+
def paginate_iter(
|
|
342
|
+
self,
|
|
343
|
+
url: Url,
|
|
344
|
+
*,
|
|
345
|
+
request: RequestOptions | None = None,
|
|
346
|
+
) -> Generator[JSONDict]:
|
|
347
|
+
"""
|
|
348
|
+
Yield record dicts across pages for the configured strategy.
|
|
349
|
+
|
|
350
|
+
Parameters
|
|
351
|
+
----------
|
|
352
|
+
url : Url
|
|
353
|
+
Absolute URL of the endpoint to fetch.
|
|
354
|
+
request : RequestOptions | None, optional
|
|
355
|
+
Pre-built request metadata snapshot to clone per page.
|
|
356
|
+
|
|
357
|
+
Yields
|
|
358
|
+
------
|
|
359
|
+
Generator[JSONDict]
|
|
360
|
+
Iterator over the record dicts extracted from paginated responses.
|
|
361
|
+
|
|
362
|
+
Raises
|
|
363
|
+
------
|
|
364
|
+
ValueError
|
|
365
|
+
If ``fetch`` callback is not provided.
|
|
366
|
+
"""
|
|
367
|
+
if self.fetch is None:
|
|
368
|
+
raise ValueError('Paginator.fetch must be provided')
|
|
369
|
+
|
|
370
|
+
base_request = request or RequestOptions()
|
|
371
|
+
|
|
372
|
+
match self.type:
|
|
373
|
+
case PaginationType.PAGE | PaginationType.OFFSET:
|
|
374
|
+
yield from self._iterate_page_style(url, base_request)
|
|
375
|
+
return
|
|
376
|
+
case PaginationType.CURSOR:
|
|
377
|
+
yield from self._iterate_cursor_style(url, base_request)
|
|
378
|
+
return
|
|
379
|
+
|
|
380
|
+
# -- Internal Instance Methods -- #
|
|
381
|
+
|
|
382
|
+
def _enforce_rate_limit(self) -> None:
|
|
383
|
+
"""Apply configured pacing between subsequent page fetches."""
|
|
384
|
+
if self.rate_limiter is not None:
|
|
385
|
+
self.rate_limiter.enforce()
|
|
386
|
+
|
|
387
|
+
def _fetch_page(
|
|
388
|
+
self,
|
|
389
|
+
url: Url,
|
|
390
|
+
request: RequestOptions,
|
|
391
|
+
) -> Any:
|
|
392
|
+
"""
|
|
393
|
+
Fetch a single page and attach page index on failure.
|
|
394
|
+
|
|
395
|
+
When the underlying ``fetch`` raises :class:`ApiRequestError`, this
|
|
396
|
+
helper re-raises :class:`PaginationError` with the current
|
|
397
|
+
``last_page`` value populated so callers can inspect the failing
|
|
398
|
+
page index.
|
|
399
|
+
|
|
400
|
+
Parameters
|
|
401
|
+
----------
|
|
402
|
+
url : Url
|
|
403
|
+
Absolute URL of the endpoint to fetch.
|
|
404
|
+
request : RequestOptions
|
|
405
|
+
Request metadata (params/headers/timeout) for the fetch.
|
|
406
|
+
|
|
407
|
+
Returns
|
|
408
|
+
-------
|
|
409
|
+
Any
|
|
410
|
+
Parsed JSON payload of the fetched page.
|
|
411
|
+
|
|
412
|
+
Raises
|
|
413
|
+
------
|
|
414
|
+
PaginationError
|
|
415
|
+
When the underlying ``fetch`` fails with :class:`ApiRequestError`.
|
|
416
|
+
ValueError
|
|
417
|
+
When ``fetch`` is not provided.
|
|
418
|
+
"""
|
|
419
|
+
if self.fetch is None:
|
|
420
|
+
raise ValueError('Paginator.fetch must be provided')
|
|
421
|
+
try:
|
|
422
|
+
return self.fetch(url, request, self.last_page)
|
|
423
|
+
except ApiRequestError as e:
|
|
424
|
+
raise PaginationError(
|
|
425
|
+
url=e.url,
|
|
426
|
+
status=e.status,
|
|
427
|
+
attempts=e.attempts,
|
|
428
|
+
retried=e.retried,
|
|
429
|
+
retry_policy=e.retry_policy,
|
|
430
|
+
cause=e,
|
|
431
|
+
page=self.last_page,
|
|
432
|
+
) from e
|
|
433
|
+
|
|
434
|
+
def _iterate_cursor_style(
|
|
435
|
+
self,
|
|
436
|
+
url: Url,
|
|
437
|
+
request: RequestOptions,
|
|
438
|
+
) -> Generator[JSONDict]:
|
|
439
|
+
"""
|
|
440
|
+
Yield record dicts for cursor-based pagination strategies.
|
|
441
|
+
|
|
442
|
+
Parameters
|
|
443
|
+
----------
|
|
444
|
+
url : Url
|
|
445
|
+
Endpoint URL to paginate.
|
|
446
|
+
request : RequestOptions
|
|
447
|
+
Base request metadata passed by the caller.
|
|
448
|
+
|
|
449
|
+
Yields
|
|
450
|
+
------
|
|
451
|
+
Generator[JSONDict]
|
|
452
|
+
Iterator over normalized record dictionaries for each page.
|
|
453
|
+
"""
|
|
454
|
+
cursor = self.start_cursor
|
|
455
|
+
pages = 0
|
|
456
|
+
emitted = 0
|
|
457
|
+
|
|
458
|
+
while True:
|
|
459
|
+
self.last_page = pages + 1
|
|
460
|
+
overrides = (
|
|
461
|
+
{self.cursor_param: cursor} if cursor is not None else None
|
|
462
|
+
)
|
|
463
|
+
combined: dict[str, Any] = {
|
|
464
|
+
self.limit_param: self.page_size,
|
|
465
|
+
} | dict(request.params or {})
|
|
466
|
+
if overrides:
|
|
467
|
+
combined |= {
|
|
468
|
+
k: v for k, v in overrides.items() if v is not None
|
|
469
|
+
}
|
|
470
|
+
req_options = request.evolve(params=combined)
|
|
471
|
+
|
|
472
|
+
page_data = self._fetch_page(url, req_options)
|
|
473
|
+
batch = self.coalesce_records(
|
|
474
|
+
page_data,
|
|
475
|
+
self.records_path,
|
|
476
|
+
self.fallback_path,
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
pages += 1
|
|
480
|
+
trimmed, exhausted = self._limit_batch(batch, emitted)
|
|
481
|
+
yield from trimmed
|
|
482
|
+
emitted += len(trimmed)
|
|
483
|
+
|
|
484
|
+
nxt = self.next_cursor_from(page_data, self.cursor_path)
|
|
485
|
+
if exhausted or not nxt or not batch:
|
|
486
|
+
break
|
|
487
|
+
if self._stop_limits(pages, emitted):
|
|
488
|
+
break
|
|
489
|
+
|
|
490
|
+
cursor = nxt
|
|
491
|
+
self._enforce_rate_limit()
|
|
492
|
+
|
|
493
|
+
def _iterate_page_style(
|
|
494
|
+
self,
|
|
495
|
+
url: Url,
|
|
496
|
+
request: RequestOptions,
|
|
497
|
+
) -> Generator[JSONDict]:
|
|
498
|
+
"""
|
|
499
|
+
Yield record dicts for page/offset pagination strategies.
|
|
500
|
+
|
|
501
|
+
Parameters
|
|
502
|
+
----------
|
|
503
|
+
url : Url
|
|
504
|
+
Endpoint URL to paginate.
|
|
505
|
+
request : RequestOptions
|
|
506
|
+
Base request metadata passed by the caller.
|
|
507
|
+
|
|
508
|
+
Yields
|
|
509
|
+
------
|
|
510
|
+
Generator[JSONDict]
|
|
511
|
+
Iterator over normalized record dictionaries for each page.
|
|
512
|
+
"""
|
|
513
|
+
current = self._resolve_start_page(request)
|
|
514
|
+
pages = 0
|
|
515
|
+
emitted = 0
|
|
516
|
+
|
|
517
|
+
while True:
|
|
518
|
+
self.last_page = pages + 1
|
|
519
|
+
merged = dict(request.params or {}) | {
|
|
520
|
+
self.page_param: current,
|
|
521
|
+
self.size_param: self.page_size,
|
|
522
|
+
}
|
|
523
|
+
req_options = request.evolve(params=merged)
|
|
524
|
+
page_data = self._fetch_page(url, req_options)
|
|
525
|
+
batch = self.coalesce_records(
|
|
526
|
+
page_data,
|
|
527
|
+
self.records_path,
|
|
528
|
+
self.fallback_path,
|
|
529
|
+
)
|
|
530
|
+
|
|
531
|
+
pages += 1
|
|
532
|
+
trimmed, exhausted = self._limit_batch(batch, emitted)
|
|
533
|
+
yield from trimmed
|
|
534
|
+
emitted += len(trimmed)
|
|
535
|
+
|
|
536
|
+
if exhausted or len(batch) < self.page_size:
|
|
537
|
+
break
|
|
538
|
+
if self._stop_limits(pages, emitted):
|
|
539
|
+
break
|
|
540
|
+
|
|
541
|
+
current = self._next_page_value(current)
|
|
542
|
+
self._enforce_rate_limit()
|
|
543
|
+
|
|
544
|
+
def _limit_batch(
|
|
545
|
+
self,
|
|
546
|
+
batch: JSONRecords,
|
|
547
|
+
emitted: int,
|
|
548
|
+
) -> tuple[JSONRecords, bool]:
|
|
549
|
+
"""Respect ``max_records`` while yielding the current batch.
|
|
550
|
+
|
|
551
|
+
Parameters
|
|
552
|
+
----------
|
|
553
|
+
batch : JSONRecords
|
|
554
|
+
Records retrieved from the latest page fetch.
|
|
555
|
+
emitted : int
|
|
556
|
+
Count of records yielded so far.
|
|
557
|
+
|
|
558
|
+
Returns
|
|
559
|
+
-------
|
|
560
|
+
tuple[JSONRecords, bool]
|
|
561
|
+
``(records_to_emit, exhausted)`` where ``exhausted`` indicates
|
|
562
|
+
the ``max_records`` limit was reached.
|
|
563
|
+
"""
|
|
564
|
+
if not isinstance(self.max_records, int):
|
|
565
|
+
return batch, False
|
|
566
|
+
|
|
567
|
+
remaining = self.max_records - emitted
|
|
568
|
+
if remaining <= 0:
|
|
569
|
+
return [], True
|
|
570
|
+
if len(batch) > remaining:
|
|
571
|
+
return batch[:remaining], True
|
|
572
|
+
return batch, False
|
|
573
|
+
|
|
574
|
+
def _next_page_value(
|
|
575
|
+
self,
|
|
576
|
+
current: int,
|
|
577
|
+
) -> int:
|
|
578
|
+
"""
|
|
579
|
+
Return the next page/offset value for the active strategy.
|
|
580
|
+
|
|
581
|
+
Parameters
|
|
582
|
+
----------
|
|
583
|
+
current : int
|
|
584
|
+
Current page number or offset value.
|
|
585
|
+
|
|
586
|
+
Returns
|
|
587
|
+
-------
|
|
588
|
+
int
|
|
589
|
+
Incremented page number or offset respecting pagination type.
|
|
590
|
+
"""
|
|
591
|
+
if self.type == PaginationType.OFFSET:
|
|
592
|
+
return current + self.page_size
|
|
593
|
+
return current + 1
|
|
594
|
+
|
|
595
|
+
def _resolve_start_page(
|
|
596
|
+
self,
|
|
597
|
+
request: RequestOptions,
|
|
598
|
+
) -> int:
|
|
599
|
+
"""
|
|
600
|
+
Allow per-call overrides of the first page via request params.
|
|
601
|
+
|
|
602
|
+
Parameters
|
|
603
|
+
----------
|
|
604
|
+
request : RequestOptions
|
|
605
|
+
Request metadata snapshot passed by the caller.
|
|
606
|
+
|
|
607
|
+
Returns
|
|
608
|
+
-------
|
|
609
|
+
int
|
|
610
|
+
Starting page number or offset for this pagination session.
|
|
611
|
+
"""
|
|
612
|
+
if not request.params:
|
|
613
|
+
return self.start_page
|
|
614
|
+
maybe = request.params.get(self.page_param)
|
|
615
|
+
if maybe is None:
|
|
616
|
+
return self.start_page
|
|
617
|
+
parsed = to_int(maybe)
|
|
618
|
+
if parsed is None:
|
|
619
|
+
return self.start_page
|
|
620
|
+
if self.type == PaginationType.OFFSET:
|
|
621
|
+
return parsed if parsed >= 0 else self.START_PAGES[self.type]
|
|
622
|
+
return parsed if parsed >= 1 else self.START_PAGES[self.type]
|
|
623
|
+
|
|
624
|
+
def _stop_limits(
|
|
625
|
+
self,
|
|
626
|
+
pages: int,
|
|
627
|
+
recs: int,
|
|
628
|
+
) -> bool:
|
|
629
|
+
"""
|
|
630
|
+
Check if pagination limits have been reached.
|
|
631
|
+
|
|
632
|
+
Parameters
|
|
633
|
+
----------
|
|
634
|
+
pages : int
|
|
635
|
+
Number of pages fetched so far.
|
|
636
|
+
recs : int
|
|
637
|
+
Number of records fetched so far.
|
|
638
|
+
|
|
639
|
+
Returns
|
|
640
|
+
-------
|
|
641
|
+
bool
|
|
642
|
+
True if any limit has been reached, False otherwise.
|
|
643
|
+
"""
|
|
644
|
+
if isinstance(self.max_pages, int) and pages >= self.max_pages:
|
|
645
|
+
return True
|
|
646
|
+
if isinstance(self.max_records, int) and recs >= self.max_records:
|
|
647
|
+
return True
|
|
648
|
+
return False
|
|
649
|
+
|
|
650
|
+
# -- Static Methods -- #
|
|
651
|
+
|
|
652
|
+
@staticmethod
|
|
653
|
+
def coalesce_records(
|
|
654
|
+
x: Any,
|
|
655
|
+
records_path: str | None,
|
|
656
|
+
fallback_path: str | None = None,
|
|
657
|
+
) -> JSONRecords:
|
|
658
|
+
"""
|
|
659
|
+
Coalesce JSON page payloads into a list of dicts.
|
|
660
|
+
|
|
661
|
+
Parameters
|
|
662
|
+
----------
|
|
663
|
+
x : Any
|
|
664
|
+
The JSON payload from an API response.
|
|
665
|
+
records_path : str | None
|
|
666
|
+
Optional dotted path to the records within the payload.
|
|
667
|
+
fallback_path : str | None
|
|
668
|
+
Secondary dotted path consulted when ``records_path`` resolves to
|
|
669
|
+
``None`` or an empty list.
|
|
670
|
+
|
|
671
|
+
Returns
|
|
672
|
+
-------
|
|
673
|
+
JSONRecords
|
|
674
|
+
List of record dicts extracted from the payload.
|
|
675
|
+
|
|
676
|
+
Notes
|
|
677
|
+
-----
|
|
678
|
+
Supports dotted path extraction via ``records_path`` and handles
|
|
679
|
+
lists, mappings, and scalars by coercing non-dict items into
|
|
680
|
+
``{"value": x}``.
|
|
681
|
+
"""
|
|
682
|
+
resolver = partial(_resolve_path, x)
|
|
683
|
+
data = resolver(records_path)
|
|
684
|
+
if data is _MISSING:
|
|
685
|
+
data = None
|
|
686
|
+
|
|
687
|
+
if fallback_path and (
|
|
688
|
+
data is None or (isinstance(data, list) and not data)
|
|
689
|
+
):
|
|
690
|
+
fallback = resolver(fallback_path)
|
|
691
|
+
if fallback is not _MISSING:
|
|
692
|
+
data = fallback
|
|
693
|
+
|
|
694
|
+
if data is None and not records_path:
|
|
695
|
+
data = x
|
|
696
|
+
|
|
697
|
+
if isinstance(data, list):
|
|
698
|
+
out: JSONRecords = []
|
|
699
|
+
for item in data:
|
|
700
|
+
if isinstance(item, dict):
|
|
701
|
+
out.append(cast(JSONDict, item))
|
|
702
|
+
else:
|
|
703
|
+
out.append(cast(JSONDict, {'value': item}))
|
|
704
|
+
return out
|
|
705
|
+
if isinstance(data, dict):
|
|
706
|
+
items = data.get('items')
|
|
707
|
+
if isinstance(items, list):
|
|
708
|
+
return Paginator.coalesce_records(items, None)
|
|
709
|
+
return [cast(JSONDict, data)]
|
|
710
|
+
|
|
711
|
+
return [cast(JSONDict, {'value': data})]
|
|
712
|
+
|
|
713
|
+
@staticmethod
|
|
714
|
+
def detect_type(
|
|
715
|
+
config: Mapping[str, Any] | None,
|
|
716
|
+
*,
|
|
717
|
+
default: PaginationType | None = None,
|
|
718
|
+
) -> PaginationType | None:
|
|
719
|
+
"""
|
|
720
|
+
Return a normalized pagination type when possible.
|
|
721
|
+
|
|
722
|
+
Parameters
|
|
723
|
+
----------
|
|
724
|
+
config : Mapping[str, Any] | None
|
|
725
|
+
Pagination configuration mapping.
|
|
726
|
+
default : PaginationType | None, optional
|
|
727
|
+
Default type to return when not specified in config.
|
|
728
|
+
|
|
729
|
+
Returns
|
|
730
|
+
-------
|
|
731
|
+
PaginationType | None
|
|
732
|
+
Detected pagination type, or ``default`` if not found.
|
|
733
|
+
"""
|
|
734
|
+
if not config:
|
|
735
|
+
return default
|
|
736
|
+
|
|
737
|
+
raw = config.get('type')
|
|
738
|
+
if raw is None:
|
|
739
|
+
return default
|
|
740
|
+
|
|
741
|
+
# Delegate normalization to CoercibleStrEnum implementation,
|
|
742
|
+
# allowing aliases and consistent error handling.
|
|
743
|
+
coerced = PaginationType.try_coerce(raw)
|
|
744
|
+
return coerced if coerced is not None else default
|
|
745
|
+
|
|
746
|
+
@staticmethod
|
|
747
|
+
def next_cursor_from(
|
|
748
|
+
data_obj: Any,
|
|
749
|
+
path: str | None,
|
|
750
|
+
) -> str | int | None:
|
|
751
|
+
"""
|
|
752
|
+
Extract a cursor value from a JSON payload using a dotted path.
|
|
753
|
+
|
|
754
|
+
Parameters
|
|
755
|
+
----------
|
|
756
|
+
data_obj : Any
|
|
757
|
+
The JSON payload object (expected to be a mapping).
|
|
758
|
+
path : str | None
|
|
759
|
+
Dotted path within the payload that points to the next cursor.
|
|
760
|
+
|
|
761
|
+
Returns
|
|
762
|
+
-------
|
|
763
|
+
str | int | None
|
|
764
|
+
The extracted cursor value if present and of type ``str`` or
|
|
765
|
+
``int``; otherwise ``None``.
|
|
766
|
+
"""
|
|
767
|
+
if not (isinstance(path, str) and path and isinstance(data_obj, dict)):
|
|
768
|
+
return None
|
|
769
|
+
cur: Any = data_obj
|
|
770
|
+
for part in path.split('.'):
|
|
771
|
+
if isinstance(cur, dict):
|
|
772
|
+
cur = cur.get(part)
|
|
773
|
+
else:
|
|
774
|
+
return None
|
|
775
|
+
return cur if isinstance(cur, (str, int)) else None
|