airbyte-cdk 6.12.4.dev0__py3-none-any.whl → 6.13.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/__init__.py +93 -34
- airbyte_cdk/cli/source_declarative_manifest/__init__.py +0 -1
- airbyte_cdk/models/__init__.py +10 -11
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +1 -1
- airbyte_cdk/sources/declarative/auth/__init__.py +2 -5
- airbyte_cdk/sources/declarative/auth/oauth.py +27 -12
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +25 -65
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +78 -1
- airbyte_cdk/sources/declarative/decoders/__init__.py +21 -3
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +97 -0
- airbyte_cdk/sources/declarative/extractors/__init__.py +10 -2
- airbyte_cdk/sources/declarative/extractors/record_filter.py +5 -3
- airbyte_cdk/sources/declarative/incremental/__init__.py +10 -6
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +0 -14
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +49 -2
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +96 -80
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +23 -5
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +19 -5
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +14 -3
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +52 -35
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +10 -7
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +9 -4
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +9 -3
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +11 -6
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +16 -5
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +14 -13
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +7 -8
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +10 -7
- airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +12 -3
- airbyte_cdk/sources/declarative/resolvers/__init__.py +31 -8
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +20 -14
- airbyte_cdk/sources/declarative/retrievers/__init__.py +5 -2
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +9 -32
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +72 -65
- airbyte_cdk/sources/declarative/schema/__init__.py +14 -2
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +20 -3
- airbyte_cdk/sources/file_based/availability_strategy/__init__.py +9 -2
- airbyte_cdk/sources/file_based/discovery_policy/__init__.py +6 -2
- airbyte_cdk/sources/file_based/file_types/__init__.py +12 -3
- airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +3 -1
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +5 -1
- airbyte_cdk/sources/message/__init__.py +7 -1
- airbyte_cdk/sources/streams/__init__.py +1 -1
- airbyte_cdk/sources/streams/checkpoint/__init__.py +2 -3
- airbyte_cdk/sources/streams/concurrent/cursor.py +0 -1
- airbyte_cdk/sources/streams/http/__init__.py +2 -2
- airbyte_cdk/sources/streams/http/error_handlers/__init__.py +2 -2
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +8 -3
- airbyte_cdk/test/mock_http/__init__.py +1 -1
- airbyte_cdk/test/mock_http/mocker.py +3 -1
- airbyte_cdk/test/mock_http/response_builder.py +1 -1
- airbyte_cdk/utils/__init__.py +1 -1
- {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dev0.dist-info}/METADATA +2 -2
- {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dev0.dist-info}/RECORD +59 -58
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +0 -344
- {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dev0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dev0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dev0.dist-info}/entry_points.txt +0 -0
@@ -2,10 +2,28 @@
|
|
2
2
|
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
-
from airbyte_cdk.sources.declarative.partition_routers.
|
6
|
-
|
7
|
-
|
8
|
-
from airbyte_cdk.sources.declarative.partition_routers.
|
5
|
+
from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
|
6
|
+
AsyncJobPartitionRouter,
|
7
|
+
)
|
8
|
+
from airbyte_cdk.sources.declarative.partition_routers.cartesian_product_stream_slicer import (
|
9
|
+
CartesianProductStreamSlicer,
|
10
|
+
)
|
11
|
+
from airbyte_cdk.sources.declarative.partition_routers.list_partition_router import (
|
12
|
+
ListPartitionRouter,
|
13
|
+
)
|
9
14
|
from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
|
15
|
+
from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import (
|
16
|
+
SinglePartitionRouter,
|
17
|
+
)
|
18
|
+
from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import (
|
19
|
+
SubstreamPartitionRouter,
|
20
|
+
)
|
10
21
|
|
11
|
-
__all__ = [
|
22
|
+
__all__ = [
|
23
|
+
"AsyncJobPartitionRouter",
|
24
|
+
"CartesianProductStreamSlicer",
|
25
|
+
"ListPartitionRouter",
|
26
|
+
"SinglePartitionRouter",
|
27
|
+
"SubstreamPartitionRouter",
|
28
|
+
"PartitionRouter",
|
29
|
+
]
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
from dataclasses import InitVar, dataclass, field
|
4
|
+
from typing import Any, Callable, Iterable, Mapping, Optional
|
5
|
+
|
6
|
+
from airbyte_cdk.models import FailureType
|
7
|
+
from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
|
8
|
+
AsyncJobOrchestrator,
|
9
|
+
AsyncPartition,
|
10
|
+
)
|
11
|
+
from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import (
|
12
|
+
SinglePartitionRouter,
|
13
|
+
)
|
14
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
|
15
|
+
from airbyte_cdk.sources.types import Config, StreamSlice
|
16
|
+
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
17
|
+
|
18
|
+
|
19
|
+
@dataclass
|
20
|
+
class AsyncJobPartitionRouter(StreamSlicer):
|
21
|
+
"""
|
22
|
+
Partition router that creates async jobs in a source API, periodically polls for job
|
23
|
+
completion, and supplies the completed job URL locations as stream slices so that
|
24
|
+
records can be extracted.
|
25
|
+
"""
|
26
|
+
|
27
|
+
config: Config
|
28
|
+
parameters: InitVar[Mapping[str, Any]]
|
29
|
+
job_orchestrator_factory: Callable[[Iterable[StreamSlice]], AsyncJobOrchestrator]
|
30
|
+
stream_slicer: StreamSlicer = field(
|
31
|
+
default_factory=lambda: SinglePartitionRouter(parameters={})
|
32
|
+
)
|
33
|
+
|
34
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
35
|
+
self._job_orchestrator_factory = self.job_orchestrator_factory
|
36
|
+
self._job_orchestrator: Optional[AsyncJobOrchestrator] = None
|
37
|
+
self._parameters = parameters
|
38
|
+
|
39
|
+
def stream_slices(self) -> Iterable[StreamSlice]:
|
40
|
+
slices = self.stream_slicer.stream_slices()
|
41
|
+
self._job_orchestrator = self._job_orchestrator_factory(slices)
|
42
|
+
|
43
|
+
for completed_partition in self._job_orchestrator.create_and_get_completed_partitions():
|
44
|
+
yield StreamSlice(
|
45
|
+
partition=dict(completed_partition.stream_slice.partition)
|
46
|
+
| {"partition": completed_partition},
|
47
|
+
cursor_slice=completed_partition.stream_slice.cursor_slice,
|
48
|
+
)
|
49
|
+
|
50
|
+
def fetch_records(self, partition: AsyncPartition) -> Iterable[Mapping[str, Any]]:
|
51
|
+
"""
|
52
|
+
This method of fetching records extends beyond what a PartitionRouter/StreamSlicer should
|
53
|
+
be responsible for. However, this was added in because the JobOrchestrator is required to
|
54
|
+
retrieve records. And without defining fetch_records() on this class, we're stuck with either
|
55
|
+
passing the JobOrchestrator to the AsyncRetriever or storing it on multiple classes.
|
56
|
+
"""
|
57
|
+
|
58
|
+
if not self._job_orchestrator:
|
59
|
+
raise AirbyteTracedException(
|
60
|
+
message="Invalid state within AsyncJobRetriever. Please contact Airbyte Support",
|
61
|
+
internal_message="AsyncPartitionRepository is expected to be accessed only after `stream_slices`",
|
62
|
+
failure_type=FailureType.system_error,
|
63
|
+
)
|
64
|
+
|
65
|
+
return self._job_orchestrator.fetch_records(partition=partition)
|
@@ -2,10 +2,24 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
-
from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategy import
|
6
|
-
|
7
|
-
|
5
|
+
from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategy import (
|
6
|
+
BackoffStrategy,
|
7
|
+
)
|
8
|
+
from airbyte_cdk.sources.declarative.requesters.error_handlers.composite_error_handler import (
|
9
|
+
CompositeErrorHandler,
|
10
|
+
)
|
11
|
+
from airbyte_cdk.sources.declarative.requesters.error_handlers.default_error_handler import (
|
12
|
+
DefaultErrorHandler,
|
13
|
+
)
|
8
14
|
from airbyte_cdk.sources.declarative.requesters.error_handlers.error_handler import ErrorHandler
|
9
|
-
from airbyte_cdk.sources.declarative.requesters.error_handlers.http_response_filter import
|
15
|
+
from airbyte_cdk.sources.declarative.requesters.error_handlers.http_response_filter import (
|
16
|
+
HttpResponseFilter,
|
17
|
+
)
|
10
18
|
|
11
|
-
__all__ = [
|
19
|
+
__all__ = [
|
20
|
+
"BackoffStrategy",
|
21
|
+
"CompositeErrorHandler",
|
22
|
+
"DefaultErrorHandler",
|
23
|
+
"ErrorHandler",
|
24
|
+
"HttpResponseFilter",
|
25
|
+
]
|
@@ -2,7 +2,9 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
-
from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies.constant_backoff_strategy import
|
5
|
+
from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies.constant_backoff_strategy import (
|
6
|
+
ConstantBackoffStrategy,
|
7
|
+
)
|
6
8
|
from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies.exponential_backoff_strategy import (
|
7
9
|
ExponentialBackoffStrategy,
|
8
10
|
)
|
@@ -2,9 +2,20 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
-
from airbyte_cdk.sources.declarative.requesters.paginators.default_paginator import
|
5
|
+
from airbyte_cdk.sources.declarative.requesters.paginators.default_paginator import (
|
6
|
+
DefaultPaginator,
|
7
|
+
PaginatorTestReadDecorator,
|
8
|
+
)
|
6
9
|
from airbyte_cdk.sources.declarative.requesters.paginators.no_pagination import NoPagination
|
7
10
|
from airbyte_cdk.sources.declarative.requesters.paginators.paginator import Paginator
|
8
|
-
from airbyte_cdk.sources.declarative.requesters.paginators.strategies.pagination_strategy import
|
11
|
+
from airbyte_cdk.sources.declarative.requesters.paginators.strategies.pagination_strategy import (
|
12
|
+
PaginationStrategy,
|
13
|
+
)
|
9
14
|
|
10
|
-
__all__ = [
|
15
|
+
__all__ = [
|
16
|
+
"DefaultPaginator",
|
17
|
+
"NoPagination",
|
18
|
+
"PaginationStrategy",
|
19
|
+
"Paginator",
|
20
|
+
"PaginatorTestReadDecorator",
|
21
|
+
]
|
@@ -112,27 +112,39 @@ class DefaultPaginator(Paginator):
|
|
112
112
|
)
|
113
113
|
if isinstance(self.url_base, str):
|
114
114
|
self.url_base = InterpolatedString(string=self.url_base, parameters=parameters)
|
115
|
-
|
115
|
+
|
116
|
+
def get_initial_token(self) -> Optional[Any]:
|
117
|
+
"""
|
118
|
+
Return the page token that should be used for the first request of a stream
|
119
|
+
|
120
|
+
WARNING: get_initial_token() should not be used by streams that use RFR that perform checkpointing
|
121
|
+
of state using page numbers. Because paginators are stateless
|
122
|
+
"""
|
123
|
+
return self.pagination_strategy.initial_token
|
116
124
|
|
117
125
|
def next_page_token(
|
118
|
-
self,
|
126
|
+
self,
|
127
|
+
response: requests.Response,
|
128
|
+
last_page_size: int,
|
129
|
+
last_record: Optional[Record],
|
130
|
+
last_page_token_value: Optional[Any] = None,
|
119
131
|
) -> Optional[Mapping[str, Any]]:
|
120
|
-
|
121
|
-
response,
|
132
|
+
next_page_token = self.pagination_strategy.next_page_token(
|
133
|
+
response=response,
|
134
|
+
last_page_size=last_page_size,
|
135
|
+
last_record=last_record,
|
136
|
+
last_page_token_value=last_page_token_value,
|
122
137
|
)
|
123
|
-
if
|
124
|
-
return {"next_page_token":
|
138
|
+
if next_page_token:
|
139
|
+
return {"next_page_token": next_page_token}
|
125
140
|
else:
|
126
141
|
return None
|
127
142
|
|
128
|
-
def path(self) -> Optional[str]:
|
129
|
-
if
|
130
|
-
|
131
|
-
and self.page_token_option
|
132
|
-
and isinstance(self.page_token_option, RequestPath)
|
133
|
-
):
|
143
|
+
def path(self, next_page_token: Optional[Mapping[str, Any]]) -> Optional[str]:
|
144
|
+
token = next_page_token.get("next_page_token") if next_page_token else None
|
145
|
+
if token and self.page_token_option and isinstance(self.page_token_option, RequestPath):
|
134
146
|
# Replace url base to only return the path
|
135
|
-
return str(
|
147
|
+
return str(token).replace(self.url_base.eval(self.config), "") # type: ignore # url_base is casted to a InterpolatedString in __post_init__
|
136
148
|
else:
|
137
149
|
return None
|
138
150
|
|
@@ -143,7 +155,7 @@ class DefaultPaginator(Paginator):
|
|
143
155
|
stream_slice: Optional[StreamSlice] = None,
|
144
156
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
145
157
|
) -> MutableMapping[str, Any]:
|
146
|
-
return self._get_request_options(RequestOptionType.request_parameter)
|
158
|
+
return self._get_request_options(RequestOptionType.request_parameter, next_page_token)
|
147
159
|
|
148
160
|
def get_request_headers(
|
149
161
|
self,
|
@@ -152,7 +164,7 @@ class DefaultPaginator(Paginator):
|
|
152
164
|
stream_slice: Optional[StreamSlice] = None,
|
153
165
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
154
166
|
) -> Mapping[str, str]:
|
155
|
-
return self._get_request_options(RequestOptionType.header)
|
167
|
+
return self._get_request_options(RequestOptionType.header, next_page_token)
|
156
168
|
|
157
169
|
def get_request_body_data(
|
158
170
|
self,
|
@@ -161,7 +173,7 @@ class DefaultPaginator(Paginator):
|
|
161
173
|
stream_slice: Optional[StreamSlice] = None,
|
162
174
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
163
175
|
) -> Mapping[str, Any]:
|
164
|
-
return self._get_request_options(RequestOptionType.body_data)
|
176
|
+
return self._get_request_options(RequestOptionType.body_data, next_page_token)
|
165
177
|
|
166
178
|
def get_request_body_json(
|
167
179
|
self,
|
@@ -170,25 +182,21 @@ class DefaultPaginator(Paginator):
|
|
170
182
|
stream_slice: Optional[StreamSlice] = None,
|
171
183
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
172
184
|
) -> Mapping[str, Any]:
|
173
|
-
return self._get_request_options(RequestOptionType.body_json)
|
174
|
-
|
175
|
-
def reset(self, reset_value: Optional[Any] = None) -> None:
|
176
|
-
if reset_value:
|
177
|
-
self.pagination_strategy.reset(reset_value=reset_value)
|
178
|
-
else:
|
179
|
-
self.pagination_strategy.reset()
|
180
|
-
self._token = self.pagination_strategy.initial_token
|
185
|
+
return self._get_request_options(RequestOptionType.body_json, next_page_token)
|
181
186
|
|
182
|
-
def _get_request_options(
|
187
|
+
def _get_request_options(
|
188
|
+
self, option_type: RequestOptionType, next_page_token: Optional[Mapping[str, Any]]
|
189
|
+
) -> MutableMapping[str, Any]:
|
183
190
|
options = {}
|
184
191
|
|
192
|
+
token = next_page_token.get("next_page_token") if next_page_token else None
|
185
193
|
if (
|
186
194
|
self.page_token_option
|
187
|
-
and
|
195
|
+
and token is not None
|
188
196
|
and isinstance(self.page_token_option, RequestOption)
|
189
197
|
and self.page_token_option.inject_into == option_type
|
190
198
|
):
|
191
|
-
options[self.page_token_option.field_name.eval(config=self.config)] =
|
199
|
+
options[self.page_token_option.field_name.eval(config=self.config)] = token # type: ignore # field_name is always cast to an interpolated string
|
192
200
|
if (
|
193
201
|
self.page_size_option
|
194
202
|
and self.pagination_strategy.get_page_size()
|
@@ -204,6 +212,9 @@ class PaginatorTestReadDecorator(Paginator):
|
|
204
212
|
"""
|
205
213
|
In some cases, we want to limit the number of requests that are made to the backend source. This class allows for limiting the number of
|
206
214
|
pages that are queried throughout a read command.
|
215
|
+
|
216
|
+
WARNING: This decorator is not currently thread-safe like the rest of the low-code framework because it has
|
217
|
+
an internal state to track the current number of pages counted so that it can exit early during a test read
|
207
218
|
"""
|
208
219
|
|
209
220
|
_PAGE_COUNT_BEFORE_FIRST_NEXT_CALL = 1
|
@@ -217,17 +228,27 @@ class PaginatorTestReadDecorator(Paginator):
|
|
217
228
|
self._decorated = decorated
|
218
229
|
self._page_count = self._PAGE_COUNT_BEFORE_FIRST_NEXT_CALL
|
219
230
|
|
231
|
+
def get_initial_token(self) -> Optional[Any]:
|
232
|
+
self._page_count = self._PAGE_COUNT_BEFORE_FIRST_NEXT_CALL
|
233
|
+
return self._decorated.get_initial_token()
|
234
|
+
|
220
235
|
def next_page_token(
|
221
|
-
self,
|
236
|
+
self,
|
237
|
+
response: requests.Response,
|
238
|
+
last_page_size: int,
|
239
|
+
last_record: Optional[Record],
|
240
|
+
last_page_token_value: Optional[Any] = None,
|
222
241
|
) -> Optional[Mapping[str, Any]]:
|
223
242
|
if self._page_count >= self._maximum_number_of_pages:
|
224
243
|
return None
|
225
244
|
|
226
245
|
self._page_count += 1
|
227
|
-
return self._decorated.next_page_token(
|
246
|
+
return self._decorated.next_page_token(
|
247
|
+
response, last_page_size, last_record, last_page_token_value
|
248
|
+
)
|
228
249
|
|
229
|
-
def path(self) -> Optional[str]:
|
230
|
-
return self._decorated.path()
|
250
|
+
def path(self, next_page_token: Optional[Mapping[str, Any]]) -> Optional[str]:
|
251
|
+
return self._decorated.path(next_page_token)
|
231
252
|
|
232
253
|
def get_request_params(
|
233
254
|
self,
|
@@ -272,7 +293,3 @@ class PaginatorTestReadDecorator(Paginator):
|
|
272
293
|
return self._decorated.get_request_body_json(
|
273
294
|
stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token
|
274
295
|
)
|
275
|
-
|
276
|
-
def reset(self, reset_value: Optional[Any] = None) -> None:
|
277
|
-
self._decorated.reset()
|
278
|
-
self._page_count = self._PAGE_COUNT_BEFORE_FIRST_NEXT_CALL
|
@@ -19,7 +19,7 @@ class NoPagination(Paginator):
|
|
19
19
|
|
20
20
|
parameters: InitVar[Mapping[str, Any]]
|
21
21
|
|
22
|
-
def path(self) -> Optional[str]:
|
22
|
+
def path(self, next_page_token: Optional[Mapping[str, Any]]) -> Optional[str]:
|
23
23
|
return None
|
24
24
|
|
25
25
|
def get_request_params(
|
@@ -58,11 +58,14 @@ class NoPagination(Paginator):
|
|
58
58
|
) -> Mapping[str, Any]:
|
59
59
|
return {}
|
60
60
|
|
61
|
+
def get_initial_token(self) -> Optional[Any]:
|
62
|
+
return None
|
63
|
+
|
61
64
|
def next_page_token(
|
62
|
-
self,
|
63
|
-
|
65
|
+
self,
|
66
|
+
response: requests.Response,
|
67
|
+
last_page_size: int,
|
68
|
+
last_record: Optional[Record],
|
69
|
+
last_page_token_value: Optional[Any],
|
70
|
+
) -> Optional[Mapping[str, Any]]:
|
64
71
|
return {}
|
65
|
-
|
66
|
-
def reset(self, reset_value: Optional[Any] = None) -> None:
|
67
|
-
# No state to reset
|
68
|
-
pass
|
@@ -24,14 +24,18 @@ class Paginator(ABC, RequestOptionsProvider):
|
|
24
24
|
"""
|
25
25
|
|
26
26
|
@abstractmethod
|
27
|
-
def
|
27
|
+
def get_initial_token(self) -> Optional[Any]:
|
28
28
|
"""
|
29
|
-
|
29
|
+
Get the page token that should be included in the request to get the first page of records
|
30
30
|
"""
|
31
31
|
|
32
32
|
@abstractmethod
|
33
33
|
def next_page_token(
|
34
|
-
self,
|
34
|
+
self,
|
35
|
+
response: requests.Response,
|
36
|
+
last_page_size: int,
|
37
|
+
last_record: Optional[Record],
|
38
|
+
last_page_token_value: Optional[Any],
|
35
39
|
) -> Optional[Mapping[str, Any]]:
|
36
40
|
"""
|
37
41
|
Returns the next_page_token to use to fetch the next page of records.
|
@@ -39,12 +43,13 @@ class Paginator(ABC, RequestOptionsProvider):
|
|
39
43
|
:param response: the response to process
|
40
44
|
:param last_page_size: the number of records read from the response
|
41
45
|
:param last_record: the last record extracted from the response
|
46
|
+
:param last_page_token_value: The current value of the page token made on the last request
|
42
47
|
:return: A mapping {"next_page_token": <token>} for the next page from the input response object. Returning None means there are no more pages to read in this response.
|
43
48
|
"""
|
44
49
|
pass
|
45
50
|
|
46
51
|
@abstractmethod
|
47
|
-
def path(self) -> Optional[str]:
|
52
|
+
def path(self, next_page_token: Optional[Mapping[str, Any]]) -> Optional[str]:
|
48
53
|
"""
|
49
54
|
Returns the URL path to hit to fetch the next page of records
|
50
55
|
|
@@ -2,9 +2,15 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
-
from airbyte_cdk.sources.declarative.requesters.paginators.strategies.cursor_pagination_strategy import
|
6
|
-
|
7
|
-
|
5
|
+
from airbyte_cdk.sources.declarative.requesters.paginators.strategies.cursor_pagination_strategy import (
|
6
|
+
CursorPaginationStrategy,
|
7
|
+
)
|
8
|
+
from airbyte_cdk.sources.declarative.requesters.paginators.strategies.offset_increment import (
|
9
|
+
OffsetIncrement,
|
10
|
+
)
|
11
|
+
from airbyte_cdk.sources.declarative.requesters.paginators.strategies.page_increment import (
|
12
|
+
PageIncrement,
|
13
|
+
)
|
8
14
|
from airbyte_cdk.sources.declarative.requesters.paginators.strategies.stop_condition import (
|
9
15
|
CursorStopCondition,
|
10
16
|
StopConditionPaginationStrategyDecorator,
|
airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py
CHANGED
@@ -43,7 +43,6 @@ class CursorPaginationStrategy(PaginationStrategy):
|
|
43
43
|
)
|
44
44
|
|
45
45
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
46
|
-
self._initial_cursor = None
|
47
46
|
if isinstance(self.cursor_value, str):
|
48
47
|
self._cursor_value = InterpolatedString.create(self.cursor_value, parameters=parameters)
|
49
48
|
else:
|
@@ -57,10 +56,19 @@ class CursorPaginationStrategy(PaginationStrategy):
|
|
57
56
|
|
58
57
|
@property
|
59
58
|
def initial_token(self) -> Optional[Any]:
|
60
|
-
|
59
|
+
"""
|
60
|
+
CursorPaginationStrategy does not have an initial value because the next cursor is typically included
|
61
|
+
in the response of the first request. For Resumable Full Refresh streams that checkpoint the page
|
62
|
+
cursor, the next cursor should be read from the state or stream slice object.
|
63
|
+
"""
|
64
|
+
return None
|
61
65
|
|
62
66
|
def next_page_token(
|
63
|
-
self,
|
67
|
+
self,
|
68
|
+
response: requests.Response,
|
69
|
+
last_page_size: int,
|
70
|
+
last_record: Optional[Record],
|
71
|
+
last_page_token_value: Optional[Any] = None,
|
64
72
|
) -> Optional[Any]:
|
65
73
|
decoded_response = next(self.decoder.decode(response))
|
66
74
|
|
@@ -87,8 +95,5 @@ class CursorPaginationStrategy(PaginationStrategy):
|
|
87
95
|
)
|
88
96
|
return token if token else None
|
89
97
|
|
90
|
-
def reset(self, reset_value: Optional[Any] = None) -> None:
|
91
|
-
self._initial_cursor = reset_value
|
92
|
-
|
93
98
|
def get_page_size(self) -> Optional[int]:
|
94
99
|
return self.page_size
|
@@ -52,7 +52,6 @@ class OffsetIncrement(PaginationStrategy):
|
|
52
52
|
inject_on_first_request: bool = False
|
53
53
|
|
54
54
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
55
|
-
self._offset = 0
|
56
55
|
page_size = str(self.page_size) if isinstance(self.page_size, int) else self.page_size
|
57
56
|
if page_size:
|
58
57
|
self._page_size: Optional[InterpolatedString] = InterpolatedString(
|
@@ -64,11 +63,15 @@ class OffsetIncrement(PaginationStrategy):
|
|
64
63
|
@property
|
65
64
|
def initial_token(self) -> Optional[Any]:
|
66
65
|
if self.inject_on_first_request:
|
67
|
-
return
|
66
|
+
return 0
|
68
67
|
return None
|
69
68
|
|
70
69
|
def next_page_token(
|
71
|
-
self,
|
70
|
+
self,
|
71
|
+
response: requests.Response,
|
72
|
+
last_page_size: int,
|
73
|
+
last_record: Optional[Record],
|
74
|
+
last_page_token_value: Optional[Any] = None,
|
72
75
|
) -> Optional[Any]:
|
73
76
|
decoded_response = next(self.decoder.decode(response))
|
74
77
|
|
@@ -78,9 +81,17 @@ class OffsetIncrement(PaginationStrategy):
|
|
78
81
|
and last_page_size < self._page_size.eval(self.config, response=decoded_response)
|
79
82
|
) or last_page_size == 0:
|
80
83
|
return None
|
84
|
+
elif last_page_token_value is None:
|
85
|
+
# If the OffsetIncrement strategy does not inject on the first request, the incoming last_page_token_value
|
86
|
+
# will be None. For this case, we assume that None was the first page and progress to the next offset
|
87
|
+
return 0 + last_page_size
|
88
|
+
elif not isinstance(last_page_token_value, int):
|
89
|
+
raise ValueError(
|
90
|
+
"The page token for a OffsetIncrement pagination strategy must be an integer"
|
91
|
+
)
|
81
92
|
else:
|
82
|
-
|
83
|
-
return
|
93
|
+
next_page_token_value = last_page_token_value + last_page_size
|
94
|
+
return next_page_token_value
|
84
95
|
|
85
96
|
def reset(self, reset_value: Optional[Any] = 0) -> None:
|
86
97
|
if not isinstance(reset_value, int):
|
@@ -31,7 +31,6 @@ class PageIncrement(PaginationStrategy):
|
|
31
31
|
inject_on_first_request: bool = False
|
32
32
|
|
33
33
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
34
|
-
self._page = self.start_from_page
|
35
34
|
if isinstance(self.page_size, int) or (self.page_size is None):
|
36
35
|
self._page_size = self.page_size
|
37
36
|
else:
|
@@ -43,28 +42,30 @@ class PageIncrement(PaginationStrategy):
|
|
43
42
|
@property
|
44
43
|
def initial_token(self) -> Optional[Any]:
|
45
44
|
if self.inject_on_first_request:
|
46
|
-
return self.
|
45
|
+
return self.start_from_page
|
47
46
|
return None
|
48
47
|
|
49
48
|
def next_page_token(
|
50
|
-
self,
|
49
|
+
self,
|
50
|
+
response: requests.Response,
|
51
|
+
last_page_size: int,
|
52
|
+
last_record: Optional[Record],
|
53
|
+
last_page_token_value: Optional[Any],
|
51
54
|
) -> Optional[Any]:
|
52
55
|
# Stop paginating when there are fewer records than the page size or the current page has no records
|
53
56
|
if (self._page_size and last_page_size < self._page_size) or last_page_size == 0:
|
54
57
|
return None
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
self._page = self.start_from_page
|
62
|
-
elif not isinstance(reset_value, int):
|
58
|
+
elif last_page_token_value is None:
|
59
|
+
# If the PageIncrement strategy does not inject on the first request, the incoming last_page_token_value
|
60
|
+
# may be None. When this is the case, we assume we've already requested the first page specified by
|
61
|
+
# start_from_page and must now get the next page
|
62
|
+
return self.start_from_page + 1
|
63
|
+
elif not isinstance(last_page_token_value, int):
|
63
64
|
raise ValueError(
|
64
|
-
|
65
|
+
"The page token for a PageIncrement pagination strategy must be an integer"
|
65
66
|
)
|
66
67
|
else:
|
67
|
-
|
68
|
+
return last_page_token_value + 1
|
68
69
|
|
69
70
|
def get_page_size(self) -> Optional[int]:
|
70
71
|
return self._page_size
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
from abc import abstractmethod
|
6
6
|
from dataclasses import dataclass
|
7
|
-
from typing import Any, Optional
|
7
|
+
from typing import Any, Mapping, Optional
|
8
8
|
|
9
9
|
import requests
|
10
10
|
|
@@ -26,22 +26,21 @@ class PaginationStrategy:
|
|
26
26
|
|
27
27
|
@abstractmethod
|
28
28
|
def next_page_token(
|
29
|
-
self,
|
29
|
+
self,
|
30
|
+
response: requests.Response,
|
31
|
+
last_page_size: int,
|
32
|
+
last_record: Optional[Record],
|
33
|
+
last_page_token_value: Optional[Any],
|
30
34
|
) -> Optional[Any]:
|
31
35
|
"""
|
32
36
|
:param response: response to process
|
33
37
|
:param last_page_size: the number of records read from the response
|
34
38
|
:param last_record: the last record extracted from the response
|
39
|
+
:param last_page_token_value: The current value of the page token made on the last request
|
35
40
|
:return: next page token. Returns None if there are no more pages to fetch
|
36
41
|
"""
|
37
42
|
pass
|
38
43
|
|
39
|
-
@abstractmethod
|
40
|
-
def reset(self, reset_value: Optional[Any] = None) -> None:
|
41
|
-
"""
|
42
|
-
Reset the pagination's inner state
|
43
|
-
"""
|
44
|
-
|
45
44
|
@abstractmethod
|
46
45
|
def get_page_size(self) -> Optional[int]:
|
47
46
|
"""
|
@@ -44,16 +44,19 @@ class StopConditionPaginationStrategyDecorator(PaginationStrategy):
|
|
44
44
|
self._stop_condition = stop_condition
|
45
45
|
|
46
46
|
def next_page_token(
|
47
|
-
self,
|
47
|
+
self,
|
48
|
+
response: requests.Response,
|
49
|
+
last_page_size: int,
|
50
|
+
last_record: Optional[Record],
|
51
|
+
last_page_token_value: Optional[Any] = None,
|
48
52
|
) -> Optional[Any]:
|
49
|
-
# We evaluate in reverse order because the assumption is that most of the APIs using data feed structure
|
50
|
-
# descending order. In terms of performance/memory, we return the records lazily
|
53
|
+
# We evaluate in reverse order because the assumption is that most of the APIs using data feed structure
|
54
|
+
# will return records in descending order. In terms of performance/memory, we return the records lazily
|
51
55
|
if last_record and self._stop_condition.is_met(last_record):
|
52
56
|
return None
|
53
|
-
return self._delegate.next_page_token(
|
54
|
-
|
55
|
-
|
56
|
-
self._delegate.reset(reset_value)
|
57
|
+
return self._delegate.next_page_token(
|
58
|
+
response, last_page_size, last_record, last_page_token_value
|
59
|
+
)
|
57
60
|
|
58
61
|
def get_page_size(self) -> Optional[int]:
|
59
62
|
return self._delegate.get_page_size()
|
@@ -5,10 +5,19 @@
|
|
5
5
|
from airbyte_cdk.sources.declarative.requesters.request_options.datetime_based_request_options_provider import (
|
6
6
|
DatetimeBasedRequestOptionsProvider,
|
7
7
|
)
|
8
|
-
from airbyte_cdk.sources.declarative.requesters.request_options.default_request_options_provider import
|
8
|
+
from airbyte_cdk.sources.declarative.requesters.request_options.default_request_options_provider import (
|
9
|
+
DefaultRequestOptionsProvider,
|
10
|
+
)
|
9
11
|
from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_request_options_provider import (
|
10
12
|
InterpolatedRequestOptionsProvider,
|
11
13
|
)
|
12
|
-
from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import
|
14
|
+
from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import (
|
15
|
+
RequestOptionsProvider,
|
16
|
+
)
|
13
17
|
|
14
|
-
__all__ = [
|
18
|
+
__all__ = [
|
19
|
+
"DatetimeBasedRequestOptionsProvider",
|
20
|
+
"DefaultRequestOptionsProvider",
|
21
|
+
"InterpolatedRequestOptionsProvider",
|
22
|
+
"RequestOptionsProvider",
|
23
|
+
]
|