cloe-nessy 0.3.17.0__py3-none-any.whl → 0.3.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cloe_nessy/clients/api_client/__init__.py +10 -1
- cloe_nessy/clients/api_client/api_client.py +19 -8
- cloe_nessy/clients/api_client/api_response.py +7 -4
- cloe_nessy/clients/api_client/pagination_config.py +84 -0
- cloe_nessy/clients/api_client/pagination_strategy.py +500 -0
- cloe_nessy/integration/delta_loader/delta_loader.py +1 -1
- cloe_nessy/integration/reader/__init__.py +2 -2
- cloe_nessy/integration/reader/api_reader.py +463 -72
- cloe_nessy/integration/reader/catalog_reader.py +49 -10
- cloe_nessy/integration/reader/excel_reader.py +3 -3
- cloe_nessy/integration/reader/file_reader.py +3 -1
- cloe_nessy/integration/reader/reader.py +1 -1
- cloe_nessy/integration/writer/catalog_writer.py +64 -2
- cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py +5 -1
- cloe_nessy/models/column.py +3 -2
- cloe_nessy/models/schema.py +1 -0
- cloe_nessy/models/templates/create_table.sql.j2 +22 -0
- cloe_nessy/object_manager/table_manager.py +29 -7
- cloe_nessy/pipeline/actions/__init__.py +1 -1
- cloe_nessy/pipeline/actions/read_api.py +272 -75
- cloe_nessy/pipeline/actions/read_catalog_table.py +73 -10
- cloe_nessy/pipeline/actions/read_excel.py +1 -1
- cloe_nessy/pipeline/actions/read_metadata_yaml.py +61 -33
- cloe_nessy/pipeline/actions/transform_decode.py +2 -1
- cloe_nessy/pipeline/actions/transform_join.py +98 -24
- cloe_nessy/pipeline/actions/transform_union.py +2 -2
- cloe_nessy/pipeline/actions/write_catalog_table.py +66 -21
- cloe_nessy/pipeline/actions/write_delta_merge.py +1 -0
- cloe_nessy/pipeline/pipeline_config.py +2 -0
- cloe_nessy/pipeline/pipeline_context.py +1 -1
- cloe_nessy/pipeline/pipeline_parsing_service.py +104 -39
- cloe_nessy/pipeline/pipeline_step.py +2 -0
- cloe_nessy/session/__init__.py +2 -1
- cloe_nessy/session/pyspark_compat.py +15 -0
- cloe_nessy/session/session_manager.py +1 -1
- {cloe_nessy-0.3.17.0.dist-info → cloe_nessy-0.3.19.dist-info}/METADATA +19 -19
- {cloe_nessy-0.3.17.0.dist-info → cloe_nessy-0.3.19.dist-info}/RECORD +38 -36
- {cloe_nessy-0.3.17.0.dist-info → cloe_nessy-0.3.19.dist-info}/WHEEL +1 -2
- cloe_nessy-0.3.17.0.dist-info/top_level.txt +0 -1
|
@@ -0,0 +1,500 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from collections.abc import Callable
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from .api_response import APIResponse
|
|
6
|
+
from .pagination_config import (
|
|
7
|
+
LimitOffsetPaginationConfigData,
|
|
8
|
+
PageBasedPaginationConfigData,
|
|
9
|
+
PaginationStrategyConfigData,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class PaginationStrategy(ABC):
|
|
14
|
+
"""Abstract base class for implementing pagination strategies."""
|
|
15
|
+
|
|
16
|
+
name = ""
|
|
17
|
+
|
|
18
|
+
def __init__(self, config: PaginationStrategyConfigData):
|
|
19
|
+
"""Initialize the strategy with a concrete pagination configuration."""
|
|
20
|
+
self._config: PaginationStrategyConfigData = config
|
|
21
|
+
|
|
22
|
+
@staticmethod
|
|
23
|
+
def _resolve_path(data: Any, path: str | None) -> Any:
|
|
24
|
+
"""Resolve a dotted path (e.g., 'info.next_page') inside a dict, returning None if any segment is missing."""
|
|
25
|
+
if not path:
|
|
26
|
+
return data
|
|
27
|
+
cur = data
|
|
28
|
+
for part in path.split("."):
|
|
29
|
+
if isinstance(cur, dict) and part in cur:
|
|
30
|
+
cur = cur[part]
|
|
31
|
+
else:
|
|
32
|
+
return None
|
|
33
|
+
return cur
|
|
34
|
+
|
|
35
|
+
def has_any_data(self, response: APIResponse) -> bool:
|
|
36
|
+
"""Return True if the current page contains data.
|
|
37
|
+
|
|
38
|
+
If 'check_field' is configured, truthiness of that field is used;
|
|
39
|
+
otherwise, truthiness of the entire response payload is used.
|
|
40
|
+
"""
|
|
41
|
+
payload = response.to_dict()
|
|
42
|
+
check_field = self._config.get("check_field")
|
|
43
|
+
if check_field:
|
|
44
|
+
value = self._resolve_path(payload, check_field)
|
|
45
|
+
return bool(value)
|
|
46
|
+
return bool(payload)
|
|
47
|
+
|
|
48
|
+
def has_more_pages(self, response: APIResponse) -> bool | None:
|
|
49
|
+
"""Return True/False if 'next_page_field' is configured; return None if not configured."""
|
|
50
|
+
next_field = self._config.get("next_page_field")
|
|
51
|
+
if not next_field:
|
|
52
|
+
return None
|
|
53
|
+
payload = response.to_dict()
|
|
54
|
+
value = self._resolve_path(payload, next_field)
|
|
55
|
+
return bool(value)
|
|
56
|
+
|
|
57
|
+
def has_more_data(self, response: APIResponse) -> bool:
|
|
58
|
+
"""Return True if there is more data to fetch.
|
|
59
|
+
|
|
60
|
+
Prefers explicit next-pointer semantics via 'next_page_field'. If not configured,
|
|
61
|
+
falls back to presence of current-page data.
|
|
62
|
+
"""
|
|
63
|
+
has_next = self.has_more_pages(response)
|
|
64
|
+
if has_next is not None:
|
|
65
|
+
return has_next
|
|
66
|
+
return self.has_any_data(response)
|
|
67
|
+
|
|
68
|
+
@abstractmethod
|
|
69
|
+
def get_next_params(self, current_params: Any) -> Any:
|
|
70
|
+
"""Generate the next set of parameters for the API request."""
|
|
71
|
+
pass
|
|
72
|
+
|
|
73
|
+
@abstractmethod
|
|
74
|
+
def probe_max_page(
|
|
75
|
+
self,
|
|
76
|
+
endpoint: str,
|
|
77
|
+
params: dict[str, Any],
|
|
78
|
+
headers: dict[str, Any] | None,
|
|
79
|
+
data: dict[str, Any] | None,
|
|
80
|
+
json_body: dict[str, Any] | None,
|
|
81
|
+
make_request: Callable[
|
|
82
|
+
[str, dict[str, Any], dict[str, Any] | None, dict[str, Any] | None, dict[str, Any] | None], APIResponse
|
|
83
|
+
],
|
|
84
|
+
) -> list[dict[str, Any]]:
|
|
85
|
+
"""Find and return the list of parameter maps for all available pages."""
|
|
86
|
+
pass
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class LimitOffsetStrategy(PaginationStrategy):
|
|
90
|
+
"""Implementation of the limit-offset pagination strategy."""
|
|
91
|
+
|
|
92
|
+
name = "limit_offset"
|
|
93
|
+
|
|
94
|
+
def __init__(self, config: LimitOffsetPaginationConfigData):
|
|
95
|
+
"""Initialize the limit/offset strategy with its configuration."""
|
|
96
|
+
super().__init__(config)
|
|
97
|
+
self._config: LimitOffsetPaginationConfigData = config
|
|
98
|
+
|
|
99
|
+
def get_next_params(self, current_params: dict[str, Any]) -> dict[str, Any]:
|
|
100
|
+
"""Return parameters for the next page by advancing 'offset' by 'limit'."""
|
|
101
|
+
limit_field = self._config["limit_field"]
|
|
102
|
+
offset_field = self._config["offset_field"]
|
|
103
|
+
|
|
104
|
+
limit = int(current_params[limit_field])
|
|
105
|
+
offset = int(current_params[offset_field])
|
|
106
|
+
|
|
107
|
+
current_params[offset_field] = offset + limit
|
|
108
|
+
return current_params
|
|
109
|
+
|
|
110
|
+
def _aligned_double(self, current_offset: int, limit_val: int) -> int:
|
|
111
|
+
"""Return a next offset that roughly doubles progress while remaining aligned to 'limit'."""
|
|
112
|
+
if current_offset == 0:
|
|
113
|
+
return limit_val
|
|
114
|
+
return ((current_offset // limit_val) + 1) * limit_val * 2
|
|
115
|
+
|
|
116
|
+
def _request(
|
|
117
|
+
self,
|
|
118
|
+
endpoint: str,
|
|
119
|
+
params: dict[str, Any],
|
|
120
|
+
headers: dict[str, Any] | None,
|
|
121
|
+
data: dict[str, Any] | None,
|
|
122
|
+
json_body: dict[str, Any] | None,
|
|
123
|
+
make_request: Callable[
|
|
124
|
+
[str, dict[str, Any], dict[str, Any] | None, dict[str, Any] | None, dict[str, Any] | None], APIResponse
|
|
125
|
+
],
|
|
126
|
+
) -> APIResponse:
|
|
127
|
+
"""Invoke the provided request callable with the given arguments."""
|
|
128
|
+
return make_request(endpoint, params, headers, data, json_body)
|
|
129
|
+
|
|
130
|
+
def _expansion_phase(
|
|
131
|
+
self,
|
|
132
|
+
endpoint: str,
|
|
133
|
+
base_params: dict[str, Any],
|
|
134
|
+
headers: dict[str, Any] | None,
|
|
135
|
+
data: dict[str, Any] | None,
|
|
136
|
+
json_body: dict[str, Any] | None,
|
|
137
|
+
make_request: Callable[
|
|
138
|
+
[str, dict[str, Any], dict[str, Any] | None, dict[str, Any] | None, dict[str, Any] | None], APIResponse
|
|
139
|
+
],
|
|
140
|
+
initial_offset: int,
|
|
141
|
+
limit_val: int,
|
|
142
|
+
offset_field: str,
|
|
143
|
+
limit_field: str,
|
|
144
|
+
max_steps: int = 64,
|
|
145
|
+
) -> tuple[bool, int | None, int, bool]:
|
|
146
|
+
"""Perform exponential probing to locate a valid range of offsets.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
seen_valid: whether any page contained data
|
|
150
|
+
low_offset: last offset known to be valid (None if none)
|
|
151
|
+
current_offset: offset at which probing ended
|
|
152
|
+
broke_on_no_more: True if an explicit 'no more pages' signal ended probing
|
|
153
|
+
"""
|
|
154
|
+
seen_valid = False
|
|
155
|
+
low_offset: int | None = None
|
|
156
|
+
current_offset = initial_offset
|
|
157
|
+
broke_on_no_more = False
|
|
158
|
+
|
|
159
|
+
for _ in range(max_steps):
|
|
160
|
+
new_params = base_params.copy()
|
|
161
|
+
new_params[offset_field] = current_offset
|
|
162
|
+
new_params[limit_field] = limit_val
|
|
163
|
+
|
|
164
|
+
resp = self._request(endpoint, new_params, headers, data, json_body, make_request)
|
|
165
|
+
valid_now = self.has_any_data(resp)
|
|
166
|
+
has_next = self.has_more_pages(resp)
|
|
167
|
+
|
|
168
|
+
if not valid_now:
|
|
169
|
+
break
|
|
170
|
+
|
|
171
|
+
seen_valid = True
|
|
172
|
+
low_offset = current_offset
|
|
173
|
+
|
|
174
|
+
if has_next is False:
|
|
175
|
+
broke_on_no_more = True
|
|
176
|
+
break
|
|
177
|
+
|
|
178
|
+
current_offset = self._aligned_double(current_offset, limit_val)
|
|
179
|
+
|
|
180
|
+
return seen_valid, low_offset, current_offset, broke_on_no_more
|
|
181
|
+
|
|
182
|
+
def _binary_search_last_valid_offset(
|
|
183
|
+
self,
|
|
184
|
+
endpoint: str,
|
|
185
|
+
base_params: dict[str, Any],
|
|
186
|
+
headers: dict[str, Any] | None,
|
|
187
|
+
data: dict[str, Any] | None,
|
|
188
|
+
json_body: dict[str, Any] | None,
|
|
189
|
+
make_request: Callable[
|
|
190
|
+
[str, dict[str, Any], dict[str, Any] | None, dict[str, Any] | None, dict[str, Any] | None], APIResponse
|
|
191
|
+
],
|
|
192
|
+
low_valid_offset: int,
|
|
193
|
+
high_invalid_offset: int,
|
|
194
|
+
limit_val: int,
|
|
195
|
+
offset_field: str,
|
|
196
|
+
limit_field: str,
|
|
197
|
+
max_steps: int = 64,
|
|
198
|
+
) -> int:
|
|
199
|
+
"""Binary search for the last valid offset between a known-valid low bound and an invalid/safety high bound."""
|
|
200
|
+
low = low_valid_offset
|
|
201
|
+
high = max(high_invalid_offset, low + limit_val)
|
|
202
|
+
|
|
203
|
+
for _ in range(max_steps):
|
|
204
|
+
if low + limit_val >= high:
|
|
205
|
+
break
|
|
206
|
+
|
|
207
|
+
mid = ((low + high) // 2 // limit_val) * limit_val
|
|
208
|
+
if mid <= low:
|
|
209
|
+
mid = low + limit_val
|
|
210
|
+
if mid >= high:
|
|
211
|
+
break
|
|
212
|
+
|
|
213
|
+
p = base_params.copy()
|
|
214
|
+
p[offset_field] = mid
|
|
215
|
+
p[limit_field] = limit_val
|
|
216
|
+
resp = self._request(endpoint, p, headers, data, json_body, make_request)
|
|
217
|
+
|
|
218
|
+
if self.has_any_data(resp):
|
|
219
|
+
low = mid
|
|
220
|
+
else:
|
|
221
|
+
high = mid
|
|
222
|
+
|
|
223
|
+
return low
|
|
224
|
+
|
|
225
|
+
def _build_offset_pages(
|
|
226
|
+
self,
|
|
227
|
+
base_params: dict[str, Any],
|
|
228
|
+
initial_offset: int,
|
|
229
|
+
last_valid_offset: int,
|
|
230
|
+
limit_val: int,
|
|
231
|
+
offset_field: str,
|
|
232
|
+
limit_field: str,
|
|
233
|
+
) -> list[dict[str, Any]]:
|
|
234
|
+
"""Build and return the list of parameter maps for all offsets from the initial offset to the last valid offset."""
|
|
235
|
+
if last_valid_offset < initial_offset:
|
|
236
|
+
return []
|
|
237
|
+
page_count = ((last_valid_offset - initial_offset) // limit_val) + 1
|
|
238
|
+
if page_count <= 0:
|
|
239
|
+
return []
|
|
240
|
+
|
|
241
|
+
out: list[dict[str, Any]] = []
|
|
242
|
+
for i in range(page_count):
|
|
243
|
+
p = base_params.copy()
|
|
244
|
+
p[offset_field] = initial_offset + i * limit_val
|
|
245
|
+
p[limit_field] = limit_val
|
|
246
|
+
out.append(p)
|
|
247
|
+
return out
|
|
248
|
+
|
|
249
|
+
def probe_max_page(
|
|
250
|
+
self,
|
|
251
|
+
endpoint: str,
|
|
252
|
+
params: dict[str, Any],
|
|
253
|
+
headers: dict[str, Any] | None,
|
|
254
|
+
data: dict[str, Any] | None,
|
|
255
|
+
json_body: dict[str, Any] | None,
|
|
256
|
+
make_request: Callable[
|
|
257
|
+
[str, dict[str, Any], dict[str, Any] | None, dict[str, Any] | None, dict[str, Any] | None], APIResponse
|
|
258
|
+
],
|
|
259
|
+
) -> list[dict[str, Any]]:
|
|
260
|
+
"""Find the maximum page available.
|
|
261
|
+
|
|
262
|
+
Works with either:
|
|
263
|
+
- a data list field (via config['check_field']), or
|
|
264
|
+
- an explicit next-page pointer (via config['next_page_field']).
|
|
265
|
+
|
|
266
|
+
If both are present, a page is considered valid if it has data, and probing
|
|
267
|
+
stops after the last valid page when 'next_page_field' indicates no more.
|
|
268
|
+
"""
|
|
269
|
+
offset_field = self._config["offset_field"]
|
|
270
|
+
limit_field = self._config["limit_field"]
|
|
271
|
+
|
|
272
|
+
initial_offset = max(0, int(params.get(offset_field, 0)))
|
|
273
|
+
limit_val = int(params[limit_field])
|
|
274
|
+
|
|
275
|
+
if limit_val <= 0:
|
|
276
|
+
p = params.copy()
|
|
277
|
+
p[offset_field] = initial_offset
|
|
278
|
+
p[limit_field] = limit_val
|
|
279
|
+
return [p]
|
|
280
|
+
|
|
281
|
+
seen_valid, low_offset, current_offset, broke_on_no_more = self._expansion_phase(
|
|
282
|
+
endpoint,
|
|
283
|
+
params,
|
|
284
|
+
headers,
|
|
285
|
+
data,
|
|
286
|
+
json_body,
|
|
287
|
+
make_request,
|
|
288
|
+
initial_offset,
|
|
289
|
+
limit_val,
|
|
290
|
+
offset_field,
|
|
291
|
+
limit_field,
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
if not seen_valid or low_offset is None:
|
|
295
|
+
return []
|
|
296
|
+
|
|
297
|
+
if broke_on_no_more:
|
|
298
|
+
high_offset = low_offset + limit_val
|
|
299
|
+
else:
|
|
300
|
+
high_offset = max(current_offset, low_offset + limit_val)
|
|
301
|
+
|
|
302
|
+
last_valid_offset = self._binary_search_last_valid_offset(
|
|
303
|
+
endpoint,
|
|
304
|
+
params,
|
|
305
|
+
headers,
|
|
306
|
+
data,
|
|
307
|
+
json_body,
|
|
308
|
+
make_request,
|
|
309
|
+
low_offset,
|
|
310
|
+
high_offset,
|
|
311
|
+
limit_val,
|
|
312
|
+
offset_field,
|
|
313
|
+
limit_field,
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
return self._build_offset_pages(params, initial_offset, last_valid_offset, limit_val, offset_field, limit_field)
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
class PageBasedStrategy(PaginationStrategy):
|
|
320
|
+
"""Implementation of page-based pagination strategy."""
|
|
321
|
+
|
|
322
|
+
name = "page_based"
|
|
323
|
+
|
|
324
|
+
def __init__(self, config: PageBasedPaginationConfigData):
|
|
325
|
+
"""Initialize the page-based strategy with its configuration."""
|
|
326
|
+
super().__init__(config)
|
|
327
|
+
self._config: PageBasedPaginationConfigData = config
|
|
328
|
+
|
|
329
|
+
def get_next_params(self, current_params: dict[str, Any]) -> dict[str, Any]:
|
|
330
|
+
"""Return parameters for the next page by incrementing 'page_field' by 1."""
|
|
331
|
+
page_field = self._config["page_field"]
|
|
332
|
+
current_page = int(current_params[page_field])
|
|
333
|
+
current_params[page_field] = current_page + 1
|
|
334
|
+
return current_params
|
|
335
|
+
|
|
336
|
+
def _page_request(
|
|
337
|
+
self,
|
|
338
|
+
endpoint: str,
|
|
339
|
+
base_params: dict[str, Any],
|
|
340
|
+
headers: dict[str, Any] | None,
|
|
341
|
+
data: dict[str, Any] | None,
|
|
342
|
+
json_body: dict[str, Any] | None,
|
|
343
|
+
make_request: Callable[
|
|
344
|
+
[str, dict[str, Any], dict[str, Any] | None, dict[str, Any] | None, dict[str, Any] | None], APIResponse
|
|
345
|
+
],
|
|
346
|
+
page_field: str,
|
|
347
|
+
page_value: int,
|
|
348
|
+
) -> APIResponse:
|
|
349
|
+
"""Send a request for a specific page value."""
|
|
350
|
+
p = base_params.copy()
|
|
351
|
+
p[page_field] = page_value
|
|
352
|
+
return make_request(endpoint, p, headers, data, json_body)
|
|
353
|
+
|
|
354
|
+
def _expansion_phase(
|
|
355
|
+
self,
|
|
356
|
+
endpoint: str,
|
|
357
|
+
base_params: dict[str, Any],
|
|
358
|
+
headers: dict[str, Any] | None,
|
|
359
|
+
data: dict[str, Any] | None,
|
|
360
|
+
json_body: dict[str, Any] | None,
|
|
361
|
+
make_request: Callable[
|
|
362
|
+
[str, dict[str, Any], dict[str, Any] | None, dict[str, Any] | None, dict[str, Any] | None], APIResponse
|
|
363
|
+
],
|
|
364
|
+
start_page: int,
|
|
365
|
+
page_field: str,
|
|
366
|
+
max_steps: int = 64,
|
|
367
|
+
) -> tuple[int, bool, int]:
|
|
368
|
+
"""Perform exponential probing to locate a valid range of page numbers.
|
|
369
|
+
|
|
370
|
+
Returns:
|
|
371
|
+
last_valid: last page number known to be valid (0 if none)
|
|
372
|
+
broke_on_no_more: True if an explicit 'no more pages' signal ended probing
|
|
373
|
+
current: page number at which probing ended
|
|
374
|
+
"""
|
|
375
|
+
last_valid = 0
|
|
376
|
+
current = start_page
|
|
377
|
+
broke_on_no_more = False
|
|
378
|
+
|
|
379
|
+
for _ in range(max_steps):
|
|
380
|
+
resp = self._page_request(
|
|
381
|
+
endpoint, base_params, headers, data, json_body, make_request, page_field, current
|
|
382
|
+
)
|
|
383
|
+
valid_now = self.has_any_data(resp)
|
|
384
|
+
has_next = self.has_more_pages(resp)
|
|
385
|
+
|
|
386
|
+
if not valid_now:
|
|
387
|
+
break
|
|
388
|
+
|
|
389
|
+
last_valid = current
|
|
390
|
+
|
|
391
|
+
if has_next is False:
|
|
392
|
+
broke_on_no_more = True
|
|
393
|
+
break
|
|
394
|
+
|
|
395
|
+
current = current * 2 if current > 0 else 1
|
|
396
|
+
|
|
397
|
+
return last_valid, broke_on_no_more, current
|
|
398
|
+
|
|
399
|
+
def _binary_search_last_valid_page(
|
|
400
|
+
self,
|
|
401
|
+
endpoint: str,
|
|
402
|
+
base_params: dict[str, Any],
|
|
403
|
+
headers: dict[str, Any] | None,
|
|
404
|
+
data: dict[str, Any] | None,
|
|
405
|
+
json_body: dict[str, Any] | None,
|
|
406
|
+
make_request: Callable[
|
|
407
|
+
[str, dict[str, Any], dict[str, Any] | None, dict[str, Any] | None, dict[str, Any] | None], APIResponse
|
|
408
|
+
],
|
|
409
|
+
low_valid: int,
|
|
410
|
+
high_invalid_or_safety: int,
|
|
411
|
+
page_field: str,
|
|
412
|
+
max_steps: int = 64,
|
|
413
|
+
) -> int:
|
|
414
|
+
"""Binary search for the last valid page number between a known-valid low bound and an invalid/safety high bound."""
|
|
415
|
+
low = low_valid
|
|
416
|
+
high = max(high_invalid_or_safety, low + 1)
|
|
417
|
+
|
|
418
|
+
for _ in range(max_steps):
|
|
419
|
+
if low + 1 >= high:
|
|
420
|
+
break
|
|
421
|
+
|
|
422
|
+
mid = (low + high) // 2
|
|
423
|
+
resp = self._page_request(endpoint, base_params, headers, data, json_body, make_request, page_field, mid)
|
|
424
|
+
|
|
425
|
+
if self.has_any_data(resp):
|
|
426
|
+
low = mid
|
|
427
|
+
else:
|
|
428
|
+
high = mid
|
|
429
|
+
|
|
430
|
+
candidate = low
|
|
431
|
+
|
|
432
|
+
if candidate + 1 < high:
|
|
433
|
+
resp = self._page_request(
|
|
434
|
+
endpoint, base_params, headers, data, json_body, make_request, page_field, candidate + 1
|
|
435
|
+
)
|
|
436
|
+
if self.has_any_data(resp):
|
|
437
|
+
candidate += 1
|
|
438
|
+
|
|
439
|
+
return candidate
|
|
440
|
+
|
|
441
|
+
def _build_page_list(
|
|
442
|
+
self,
|
|
443
|
+
base_params: dict[str, Any],
|
|
444
|
+
start_page: int,
|
|
445
|
+
max_page: int,
|
|
446
|
+
page_field: str,
|
|
447
|
+
) -> list[dict[str, Any]]:
|
|
448
|
+
"""Build and return the list of parameter maps for all pages from start_page to max_page inclusive."""
|
|
449
|
+
if max_page < start_page:
|
|
450
|
+
return []
|
|
451
|
+
out: list[dict[str, Any]] = []
|
|
452
|
+
for page in range(start_page, max_page + 1):
|
|
453
|
+
p = base_params.copy()
|
|
454
|
+
p[page_field] = page
|
|
455
|
+
out.append(p)
|
|
456
|
+
return out
|
|
457
|
+
|
|
458
|
+
def probe_max_page(
|
|
459
|
+
self,
|
|
460
|
+
endpoint: str,
|
|
461
|
+
params: dict[str, Any],
|
|
462
|
+
headers: dict[str, Any] | None,
|
|
463
|
+
data: dict[str, Any] | None,
|
|
464
|
+
json_body: dict[str, Any] | None,
|
|
465
|
+
make_request: Callable[
|
|
466
|
+
[str, dict[str, Any], dict[str, Any] | None, dict[str, Any] | None, dict[str, Any] | None], APIResponse
|
|
467
|
+
],
|
|
468
|
+
) -> list[dict[str, Any]]:
|
|
469
|
+
"""Find the maximum page available.
|
|
470
|
+
|
|
471
|
+
Honors both 'check_field' (data-present) and 'next_page_field' (explicit next pointer).
|
|
472
|
+
If probing stops because 'next_page_field' is False, no forward-check beyond that point is performed.
|
|
473
|
+
"""
|
|
474
|
+
page_field = self._config["page_field"]
|
|
475
|
+
|
|
476
|
+
start_page = max(1, int(params.get(page_field, 1)))
|
|
477
|
+
|
|
478
|
+
last_valid, broke_on_no_more, current = self._expansion_phase(
|
|
479
|
+
endpoint, params, headers, data, json_body, make_request, start_page, page_field
|
|
480
|
+
)
|
|
481
|
+
|
|
482
|
+
if last_valid == 0:
|
|
483
|
+
return []
|
|
484
|
+
|
|
485
|
+
if broke_on_no_more:
|
|
486
|
+
max_page = last_valid
|
|
487
|
+
return self._build_page_list(params, start_page, max_page, page_field)
|
|
488
|
+
|
|
489
|
+
max_page = self._binary_search_last_valid_page(
|
|
490
|
+
endpoint,
|
|
491
|
+
params,
|
|
492
|
+
headers,
|
|
493
|
+
data,
|
|
494
|
+
json_body,
|
|
495
|
+
make_request,
|
|
496
|
+
low_valid=last_valid,
|
|
497
|
+
high_invalid_or_safety=max(current, last_valid + 1),
|
|
498
|
+
page_field=page_field,
|
|
499
|
+
)
|
|
500
|
+
return self._build_page_list(params, start_page, max_page, page_field)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
from .api_reader import APIReader
|
|
1
|
+
from .api_reader import APIReader, RequestSet
|
|
2
2
|
from .catalog_reader import CatalogReader
|
|
3
3
|
from .excel_reader import ExcelDataFrameReader
|
|
4
4
|
from .file_reader import FileReader
|
|
5
5
|
|
|
6
|
-
__all__ = ["APIReader", "CatalogReader", "FileReader", "ExcelDataFrameReader"]
|
|
6
|
+
__all__ = ["APIReader", "CatalogReader", "FileReader", "ExcelDataFrameReader", "RequestSet"]
|