airbyte-cdk 6.14.0.dev1__py3-none-any.whl → 6.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +51 -27
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +45 -0
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +19 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +14 -0
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +35 -52
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +7 -10
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +4 -9
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +6 -11
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +13 -13
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +13 -14
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +8 -7
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +10 -10
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +64 -71
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +3 -3
- airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
- {airbyte_cdk-6.14.0.dev1.dist-info → airbyte_cdk-6.15.0.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.14.0.dev1.dist-info → airbyte_cdk-6.15.0.dist-info}/RECORD +20 -19
- {airbyte_cdk-6.14.0.dev1.dist-info → airbyte_cdk-6.15.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.14.0.dev1.dist-info → airbyte_cdk-6.15.0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.14.0.dev1.dist-info → airbyte_cdk-6.15.0.dist-info}/entry_points.txt +0 -0
@@ -3,7 +3,7 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
import logging
|
6
|
-
from typing import Any, Generic, Iterator, List, Mapping, Optional, Tuple
|
6
|
+
from typing import Any, Callable, Generic, Iterator, List, Mapping, Optional, Tuple, Union
|
7
7
|
|
8
8
|
from airbyte_cdk.models import (
|
9
9
|
AirbyteCatalog,
|
@@ -28,11 +28,15 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
28
28
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
29
29
|
DatetimeBasedCursor as DatetimeBasedCursorModel,
|
30
30
|
)
|
31
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
32
|
+
DeclarativeStream as DeclarativeStreamModel,
|
33
|
+
)
|
31
34
|
from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
|
35
|
+
ComponentDefinition,
|
32
36
|
ModelToComponentFactory,
|
33
37
|
)
|
34
38
|
from airbyte_cdk.sources.declarative.requesters import HttpRequester
|
35
|
-
from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever
|
39
|
+
from airbyte_cdk.sources.declarative.retrievers import Retriever, SimpleRetriever
|
36
40
|
from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
|
37
41
|
DeclarativePartitionFactory,
|
38
42
|
StreamSlicerPartitionGenerator,
|
@@ -48,6 +52,7 @@ from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
|
|
48
52
|
from airbyte_cdk.sources.streams.concurrent.cursor import FinalStateCursor
|
49
53
|
from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
|
50
54
|
from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream
|
55
|
+
from airbyte_cdk.sources.types import Config, StreamState
|
51
56
|
|
52
57
|
|
53
58
|
class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
@@ -189,11 +194,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
189
194
|
# Some low-code sources use a combination of DeclarativeStream and regular Python streams. We can't inspect
|
190
195
|
# these legacy Python streams the way we do low-code streams to determine if they are concurrent compatible,
|
191
196
|
# so we need to treat them as synchronous
|
192
|
-
if
|
193
|
-
|
197
|
+
if (
|
198
|
+
isinstance(declarative_stream, DeclarativeStream)
|
199
|
+
and name_to_stream_mapping[declarative_stream.name]["retriever"]["type"]
|
194
200
|
== "SimpleRetriever"
|
195
|
-
or name_to_stream_mapping[declarative_stream.name]["retriever"]["type"]
|
196
|
-
== "AsyncRetriever"
|
197
201
|
):
|
198
202
|
incremental_sync_component_definition = name_to_stream_mapping[
|
199
203
|
declarative_stream.name
|
@@ -213,11 +217,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
213
217
|
and not incremental_sync_component_definition
|
214
218
|
)
|
215
219
|
|
216
|
-
is_async_job_stream = (
|
217
|
-
name_to_stream_mapping[declarative_stream.name].get("retriever", {}).get("type")
|
218
|
-
== "AsyncRetriever"
|
219
|
-
)
|
220
|
-
|
221
220
|
if self._is_datetime_incremental_without_partition_routing(
|
222
221
|
declarative_stream, incremental_sync_component_definition
|
223
222
|
):
|
@@ -235,25 +234,15 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
235
234
|
stream_state=stream_state,
|
236
235
|
)
|
237
236
|
|
238
|
-
retriever = declarative_stream.retriever
|
239
|
-
|
240
|
-
# This is an optimization so that we don't invoke any cursor or state management flows within the
|
241
|
-
# low-code framework because state management is handled through the ConcurrentCursor.
|
242
|
-
if declarative_stream and isinstance(retriever, SimpleRetriever):
|
243
|
-
# Also a temporary hack. In the legacy Stream implementation, as part of the read,
|
244
|
-
# set_initial_state() is called to instantiate incoming state on the cursor. Although we no
|
245
|
-
# longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components
|
246
|
-
# like StopConditionPaginationStrategyDecorator and ClientSideIncrementalRecordFilterDecorator
|
247
|
-
# still rely on a DatetimeBasedCursor that is properly initialized with state.
|
248
|
-
if retriever.cursor:
|
249
|
-
retriever.cursor.set_initial_state(stream_state=stream_state)
|
250
|
-
retriever.cursor = None
|
251
|
-
|
252
237
|
partition_generator = StreamSlicerPartitionGenerator(
|
253
238
|
DeclarativePartitionFactory(
|
254
239
|
declarative_stream.name,
|
255
240
|
declarative_stream.get_json_schema(),
|
256
|
-
|
241
|
+
self._retriever_factory(
|
242
|
+
name_to_stream_mapping[declarative_stream.name],
|
243
|
+
config,
|
244
|
+
stream_state,
|
245
|
+
),
|
257
246
|
self.message_repository,
|
258
247
|
),
|
259
248
|
cursor,
|
@@ -283,7 +272,11 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
283
272
|
DeclarativePartitionFactory(
|
284
273
|
declarative_stream.name,
|
285
274
|
declarative_stream.get_json_schema(),
|
286
|
-
|
275
|
+
self._retriever_factory(
|
276
|
+
name_to_stream_mapping[declarative_stream.name],
|
277
|
+
config,
|
278
|
+
{},
|
279
|
+
),
|
287
280
|
self.message_repository,
|
288
281
|
),
|
289
282
|
declarative_stream.retriever.stream_slicer,
|
@@ -422,3 +415,34 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
422
415
|
if stream.stream.name not in concurrent_stream_names
|
423
416
|
]
|
424
417
|
)
|
418
|
+
|
419
|
+
def _retriever_factory(
|
420
|
+
self, stream_config: ComponentDefinition, source_config: Config, stream_state: StreamState
|
421
|
+
) -> Callable[[], Retriever]:
|
422
|
+
def _factory_method() -> Retriever:
|
423
|
+
declarative_stream: DeclarativeStream = self._constructor.create_component(
|
424
|
+
DeclarativeStreamModel,
|
425
|
+
stream_config,
|
426
|
+
source_config,
|
427
|
+
emit_connector_builder_messages=self._emit_connector_builder_messages,
|
428
|
+
)
|
429
|
+
|
430
|
+
# This is an optimization so that we don't invoke any cursor or state management flows within the
|
431
|
+
# low-code framework because state management is handled through the ConcurrentCursor.
|
432
|
+
if (
|
433
|
+
declarative_stream
|
434
|
+
and declarative_stream.retriever
|
435
|
+
and isinstance(declarative_stream.retriever, SimpleRetriever)
|
436
|
+
):
|
437
|
+
# Also a temporary hack. In the legacy Stream implementation, as part of the read, set_initial_state() is
|
438
|
+
# called to instantiate incoming state on the cursor. Although we no longer rely on the legacy low-code cursor
|
439
|
+
# for concurrent checkpointing, low-code components like StopConditionPaginationStrategyDecorator and
|
440
|
+
# ClientSideIncrementalRecordFilterDecorator still rely on a DatetimeBasedCursor that is properly initialized
|
441
|
+
# with state.
|
442
|
+
if declarative_stream.retriever.cursor:
|
443
|
+
declarative_stream.retriever.cursor.set_initial_state(stream_state=stream_state)
|
444
|
+
declarative_stream.retriever.cursor = None
|
445
|
+
|
446
|
+
return declarative_stream.retriever
|
447
|
+
|
448
|
+
return _factory_method
|
@@ -1241,6 +1241,7 @@ definitions:
|
|
1241
1241
|
- "$ref": "#/definitions/KeysToLower"
|
1242
1242
|
- "$ref": "#/definitions/KeysToSnakeCase"
|
1243
1243
|
- "$ref": "#/definitions/FlattenFields"
|
1244
|
+
- "$ref": "#/definitions/KeysReplace"
|
1244
1245
|
state_migrations:
|
1245
1246
|
title: State Migrations
|
1246
1247
|
description: Array of state migrations to be applied on the input state
|
@@ -1785,6 +1786,7 @@ definitions:
|
|
1785
1786
|
- "$ref": "#/definitions/KeysToLower"
|
1786
1787
|
- "$ref": "#/definitions/KeysToSnakeCase"
|
1787
1788
|
- "$ref": "#/definitions/FlattenFields"
|
1789
|
+
- "$ref": "#/definitions/KeysReplace"
|
1788
1790
|
schema_type_identifier:
|
1789
1791
|
"$ref": "#/definitions/SchemaTypeIdentifier"
|
1790
1792
|
$parameters:
|
@@ -1883,6 +1885,49 @@ definitions:
|
|
1883
1885
|
$parameters:
|
1884
1886
|
type: object
|
1885
1887
|
additionalProperties: true
|
1888
|
+
KeysReplace:
|
1889
|
+
title: Keys Replace
|
1890
|
+
description: A transformation that replaces symbols in keys.
|
1891
|
+
type: object
|
1892
|
+
required:
|
1893
|
+
- type
|
1894
|
+
- old
|
1895
|
+
- new
|
1896
|
+
properties:
|
1897
|
+
type:
|
1898
|
+
type: string
|
1899
|
+
enum: [KeysReplace]
|
1900
|
+
old:
|
1901
|
+
type: string
|
1902
|
+
title: Old value
|
1903
|
+
description: Old value to replace.
|
1904
|
+
examples:
|
1905
|
+
- " "
|
1906
|
+
- "{{ record.id }}"
|
1907
|
+
- "{{ config['id'] }}"
|
1908
|
+
- "{{ stream_slice['id'] }}"
|
1909
|
+
interpolation_context:
|
1910
|
+
- config
|
1911
|
+
- record
|
1912
|
+
- stream_state
|
1913
|
+
- stream_slice
|
1914
|
+
new:
|
1915
|
+
type: string
|
1916
|
+
title: New value
|
1917
|
+
description: New value to set.
|
1918
|
+
examples:
|
1919
|
+
- "_"
|
1920
|
+
- "{{ record.id }}"
|
1921
|
+
- "{{ config['id'] }}"
|
1922
|
+
- "{{ stream_slice['id'] }}"
|
1923
|
+
interpolation_context:
|
1924
|
+
- config
|
1925
|
+
- record
|
1926
|
+
- stream_state
|
1927
|
+
- stream_slice
|
1928
|
+
$parameters:
|
1929
|
+
type: object
|
1930
|
+
additionalProperties: true
|
1886
1931
|
IterableDecoder:
|
1887
1932
|
title: Iterable Decoder
|
1888
1933
|
description: Use this if the response consists of strings separated by new lines (`\n`). The Decoder will wrap each row into a JSON object with the `record` key.
|
@@ -721,6 +721,23 @@ class KeysToSnakeCase(BaseModel):
|
|
721
721
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
722
722
|
|
723
723
|
|
724
|
+
class KeysReplace(BaseModel):
|
725
|
+
type: Literal["KeysReplace"]
|
726
|
+
old: str = Field(
|
727
|
+
...,
|
728
|
+
description="Old value to replace.",
|
729
|
+
examples=[" ", "{{ record.id }}", "{{ config['id'] }}", "{{ stream_slice['id'] }}"],
|
730
|
+
title="Old value",
|
731
|
+
)
|
732
|
+
new: str = Field(
|
733
|
+
...,
|
734
|
+
description="New value to set.",
|
735
|
+
examples=["_", "{{ record.id }}", "{{ config['id'] }}", "{{ stream_slice['id'] }}"],
|
736
|
+
title="New value",
|
737
|
+
)
|
738
|
+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
739
|
+
|
740
|
+
|
724
741
|
class FlattenFields(BaseModel):
|
725
742
|
type: Literal["FlattenFields"]
|
726
743
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
@@ -1701,6 +1718,7 @@ class DeclarativeStream(BaseModel):
|
|
1701
1718
|
KeysToLower,
|
1702
1719
|
KeysToSnakeCase,
|
1703
1720
|
FlattenFields,
|
1721
|
+
KeysReplace,
|
1704
1722
|
]
|
1705
1723
|
]
|
1706
1724
|
] = Field(
|
@@ -1875,6 +1893,7 @@ class DynamicSchemaLoader(BaseModel):
|
|
1875
1893
|
KeysToLower,
|
1876
1894
|
KeysToSnakeCase,
|
1877
1895
|
FlattenFields,
|
1896
|
+
KeysReplace,
|
1878
1897
|
]
|
1879
1898
|
]
|
1880
1899
|
] = Field(
|
@@ -254,6 +254,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
254
254
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
255
255
|
JwtPayload as JwtPayloadModel,
|
256
256
|
)
|
257
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
258
|
+
KeysReplace as KeysReplaceModel,
|
259
|
+
)
|
257
260
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
258
261
|
KeysToLower as KeysToLowerModel,
|
259
262
|
)
|
@@ -417,6 +420,9 @@ from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFiel
|
|
417
420
|
from airbyte_cdk.sources.declarative.transformations.flatten_fields import (
|
418
421
|
FlattenFields,
|
419
422
|
)
|
423
|
+
from airbyte_cdk.sources.declarative.transformations.keys_replace_transformation import (
|
424
|
+
KeysReplaceTransformation,
|
425
|
+
)
|
420
426
|
from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import (
|
421
427
|
KeysToLowerTransformation,
|
422
428
|
)
|
@@ -509,6 +515,7 @@ class ModelToComponentFactory:
|
|
509
515
|
GzipParserModel: self.create_gzip_parser,
|
510
516
|
KeysToLowerModel: self.create_keys_to_lower_transformation,
|
511
517
|
KeysToSnakeCaseModel: self.create_keys_to_snake_transformation,
|
518
|
+
KeysReplaceModel: self.create_keys_replace_transformation,
|
512
519
|
FlattenFieldsModel: self.create_flatten_fields,
|
513
520
|
IterableDecoderModel: self.create_iterable_decoder,
|
514
521
|
XmlDecoderModel: self.create_xml_decoder,
|
@@ -630,6 +637,13 @@ class ModelToComponentFactory:
|
|
630
637
|
) -> KeysToSnakeCaseTransformation:
|
631
638
|
return KeysToSnakeCaseTransformation()
|
632
639
|
|
640
|
+
def create_keys_replace_transformation(
|
641
|
+
self, model: KeysReplaceModel, config: Config, **kwargs: Any
|
642
|
+
) -> KeysReplaceTransformation:
|
643
|
+
return KeysReplaceTransformation(
|
644
|
+
old=model.old, new=model.new, parameters=model.parameters or {}
|
645
|
+
)
|
646
|
+
|
633
647
|
def create_flatten_fields(
|
634
648
|
self, model: FlattenFieldsModel, config: Config, **kwargs: Any
|
635
649
|
) -> FlattenFields:
|
@@ -112,39 +112,27 @@ class DefaultPaginator(Paginator):
|
|
112
112
|
)
|
113
113
|
if isinstance(self.url_base, str):
|
114
114
|
self.url_base = InterpolatedString(string=self.url_base, parameters=parameters)
|
115
|
-
|
116
|
-
def get_initial_token(self) -> Optional[Any]:
|
117
|
-
"""
|
118
|
-
Return the page token that should be used for the first request of a stream
|
119
|
-
|
120
|
-
WARNING: get_initial_token() should not be used by streams that use RFR that perform checkpointing
|
121
|
-
of state using page numbers. Because paginators are stateless
|
122
|
-
"""
|
123
|
-
return self.pagination_strategy.initial_token
|
115
|
+
self._token: Optional[Any] = self.pagination_strategy.initial_token
|
124
116
|
|
125
117
|
def next_page_token(
|
126
|
-
self,
|
127
|
-
response: requests.Response,
|
128
|
-
last_page_size: int,
|
129
|
-
last_record: Optional[Record],
|
130
|
-
last_page_token_value: Optional[Any] = None,
|
118
|
+
self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
|
131
119
|
) -> Optional[Mapping[str, Any]]:
|
132
|
-
|
133
|
-
response
|
134
|
-
last_page_size=last_page_size,
|
135
|
-
last_record=last_record,
|
136
|
-
last_page_token_value=last_page_token_value,
|
120
|
+
self._token = self.pagination_strategy.next_page_token(
|
121
|
+
response, last_page_size, last_record
|
137
122
|
)
|
138
|
-
if
|
139
|
-
return {"next_page_token":
|
123
|
+
if self._token:
|
124
|
+
return {"next_page_token": self._token}
|
140
125
|
else:
|
141
126
|
return None
|
142
127
|
|
143
|
-
def path(self
|
144
|
-
|
145
|
-
|
128
|
+
def path(self) -> Optional[str]:
|
129
|
+
if (
|
130
|
+
self._token
|
131
|
+
and self.page_token_option
|
132
|
+
and isinstance(self.page_token_option, RequestPath)
|
133
|
+
):
|
146
134
|
# Replace url base to only return the path
|
147
|
-
return str(
|
135
|
+
return str(self._token).replace(self.url_base.eval(self.config), "") # type: ignore # url_base is casted to a InterpolatedString in __post_init__
|
148
136
|
else:
|
149
137
|
return None
|
150
138
|
|
@@ -155,7 +143,7 @@ class DefaultPaginator(Paginator):
|
|
155
143
|
stream_slice: Optional[StreamSlice] = None,
|
156
144
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
157
145
|
) -> MutableMapping[str, Any]:
|
158
|
-
return self._get_request_options(RequestOptionType.request_parameter
|
146
|
+
return self._get_request_options(RequestOptionType.request_parameter)
|
159
147
|
|
160
148
|
def get_request_headers(
|
161
149
|
self,
|
@@ -164,7 +152,7 @@ class DefaultPaginator(Paginator):
|
|
164
152
|
stream_slice: Optional[StreamSlice] = None,
|
165
153
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
166
154
|
) -> Mapping[str, str]:
|
167
|
-
return self._get_request_options(RequestOptionType.header
|
155
|
+
return self._get_request_options(RequestOptionType.header)
|
168
156
|
|
169
157
|
def get_request_body_data(
|
170
158
|
self,
|
@@ -173,7 +161,7 @@ class DefaultPaginator(Paginator):
|
|
173
161
|
stream_slice: Optional[StreamSlice] = None,
|
174
162
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
175
163
|
) -> Mapping[str, Any]:
|
176
|
-
return self._get_request_options(RequestOptionType.body_data
|
164
|
+
return self._get_request_options(RequestOptionType.body_data)
|
177
165
|
|
178
166
|
def get_request_body_json(
|
179
167
|
self,
|
@@ -182,21 +170,25 @@ class DefaultPaginator(Paginator):
|
|
182
170
|
stream_slice: Optional[StreamSlice] = None,
|
183
171
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
184
172
|
) -> Mapping[str, Any]:
|
185
|
-
return self._get_request_options(RequestOptionType.body_json
|
173
|
+
return self._get_request_options(RequestOptionType.body_json)
|
186
174
|
|
187
|
-
def
|
188
|
-
|
189
|
-
|
175
|
+
def reset(self, reset_value: Optional[Any] = None) -> None:
|
176
|
+
if reset_value:
|
177
|
+
self.pagination_strategy.reset(reset_value=reset_value)
|
178
|
+
else:
|
179
|
+
self.pagination_strategy.reset()
|
180
|
+
self._token = self.pagination_strategy.initial_token
|
181
|
+
|
182
|
+
def _get_request_options(self, option_type: RequestOptionType) -> MutableMapping[str, Any]:
|
190
183
|
options = {}
|
191
184
|
|
192
|
-
token = next_page_token.get("next_page_token") if next_page_token else None
|
193
185
|
if (
|
194
186
|
self.page_token_option
|
195
|
-
and
|
187
|
+
and self._token is not None
|
196
188
|
and isinstance(self.page_token_option, RequestOption)
|
197
189
|
and self.page_token_option.inject_into == option_type
|
198
190
|
):
|
199
|
-
options[self.page_token_option.field_name.eval(config=self.config)] =
|
191
|
+
options[self.page_token_option.field_name.eval(config=self.config)] = self._token # type: ignore # field_name is always cast to an interpolated string
|
200
192
|
if (
|
201
193
|
self.page_size_option
|
202
194
|
and self.pagination_strategy.get_page_size()
|
@@ -212,9 +204,6 @@ class PaginatorTestReadDecorator(Paginator):
|
|
212
204
|
"""
|
213
205
|
In some cases, we want to limit the number of requests that are made to the backend source. This class allows for limiting the number of
|
214
206
|
pages that are queried throughout a read command.
|
215
|
-
|
216
|
-
WARNING: This decorator is not currently thread-safe like the rest of the low-code framework because it has
|
217
|
-
an internal state to track the current number of pages counted so that it can exit early during a test read
|
218
207
|
"""
|
219
208
|
|
220
209
|
_PAGE_COUNT_BEFORE_FIRST_NEXT_CALL = 1
|
@@ -228,27 +217,17 @@ class PaginatorTestReadDecorator(Paginator):
|
|
228
217
|
self._decorated = decorated
|
229
218
|
self._page_count = self._PAGE_COUNT_BEFORE_FIRST_NEXT_CALL
|
230
219
|
|
231
|
-
def get_initial_token(self) -> Optional[Any]:
|
232
|
-
self._page_count = self._PAGE_COUNT_BEFORE_FIRST_NEXT_CALL
|
233
|
-
return self._decorated.get_initial_token()
|
234
|
-
|
235
220
|
def next_page_token(
|
236
|
-
self,
|
237
|
-
response: requests.Response,
|
238
|
-
last_page_size: int,
|
239
|
-
last_record: Optional[Record],
|
240
|
-
last_page_token_value: Optional[Any] = None,
|
221
|
+
self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
|
241
222
|
) -> Optional[Mapping[str, Any]]:
|
242
223
|
if self._page_count >= self._maximum_number_of_pages:
|
243
224
|
return None
|
244
225
|
|
245
226
|
self._page_count += 1
|
246
|
-
return self._decorated.next_page_token(
|
247
|
-
response, last_page_size, last_record, last_page_token_value
|
248
|
-
)
|
227
|
+
return self._decorated.next_page_token(response, last_page_size, last_record)
|
249
228
|
|
250
|
-
def path(self
|
251
|
-
return self._decorated.path(
|
229
|
+
def path(self) -> Optional[str]:
|
230
|
+
return self._decorated.path()
|
252
231
|
|
253
232
|
def get_request_params(
|
254
233
|
self,
|
@@ -293,3 +272,7 @@ class PaginatorTestReadDecorator(Paginator):
|
|
293
272
|
return self._decorated.get_request_body_json(
|
294
273
|
stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token
|
295
274
|
)
|
275
|
+
|
276
|
+
def reset(self, reset_value: Optional[Any] = None) -> None:
|
277
|
+
self._decorated.reset()
|
278
|
+
self._page_count = self._PAGE_COUNT_BEFORE_FIRST_NEXT_CALL
|
@@ -19,7 +19,7 @@ class NoPagination(Paginator):
|
|
19
19
|
|
20
20
|
parameters: InitVar[Mapping[str, Any]]
|
21
21
|
|
22
|
-
def path(self
|
22
|
+
def path(self) -> Optional[str]:
|
23
23
|
return None
|
24
24
|
|
25
25
|
def get_request_params(
|
@@ -58,14 +58,11 @@ class NoPagination(Paginator):
|
|
58
58
|
) -> Mapping[str, Any]:
|
59
59
|
return {}
|
60
60
|
|
61
|
-
def get_initial_token(self) -> Optional[Any]:
|
62
|
-
return None
|
63
|
-
|
64
61
|
def next_page_token(
|
65
|
-
self,
|
66
|
-
|
67
|
-
last_page_size: int,
|
68
|
-
last_record: Optional[Record],
|
69
|
-
last_page_token_value: Optional[Any],
|
70
|
-
) -> Optional[Mapping[str, Any]]:
|
62
|
+
self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
|
63
|
+
) -> Mapping[str, Any]:
|
71
64
|
return {}
|
65
|
+
|
66
|
+
def reset(self, reset_value: Optional[Any] = None) -> None:
|
67
|
+
# No state to reset
|
68
|
+
pass
|
@@ -24,18 +24,14 @@ class Paginator(ABC, RequestOptionsProvider):
|
|
24
24
|
"""
|
25
25
|
|
26
26
|
@abstractmethod
|
27
|
-
def
|
27
|
+
def reset(self, reset_value: Optional[Any] = None) -> None:
|
28
28
|
"""
|
29
|
-
|
29
|
+
Reset the pagination's inner state
|
30
30
|
"""
|
31
31
|
|
32
32
|
@abstractmethod
|
33
33
|
def next_page_token(
|
34
|
-
self,
|
35
|
-
response: requests.Response,
|
36
|
-
last_page_size: int,
|
37
|
-
last_record: Optional[Record],
|
38
|
-
last_page_token_value: Optional[Any],
|
34
|
+
self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
|
39
35
|
) -> Optional[Mapping[str, Any]]:
|
40
36
|
"""
|
41
37
|
Returns the next_page_token to use to fetch the next page of records.
|
@@ -43,13 +39,12 @@ class Paginator(ABC, RequestOptionsProvider):
|
|
43
39
|
:param response: the response to process
|
44
40
|
:param last_page_size: the number of records read from the response
|
45
41
|
:param last_record: the last record extracted from the response
|
46
|
-
:param last_page_token_value: The current value of the page token made on the last request
|
47
42
|
:return: A mapping {"next_page_token": <token>} for the next page from the input response object. Returning None means there are no more pages to read in this response.
|
48
43
|
"""
|
49
44
|
pass
|
50
45
|
|
51
46
|
@abstractmethod
|
52
|
-
def path(self
|
47
|
+
def path(self) -> Optional[str]:
|
53
48
|
"""
|
54
49
|
Returns the URL path to hit to fetch the next page of records
|
55
50
|
|
airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py
CHANGED
@@ -43,6 +43,7 @@ class CursorPaginationStrategy(PaginationStrategy):
|
|
43
43
|
)
|
44
44
|
|
45
45
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
46
|
+
self._initial_cursor = None
|
46
47
|
if isinstance(self.cursor_value, str):
|
47
48
|
self._cursor_value = InterpolatedString.create(self.cursor_value, parameters=parameters)
|
48
49
|
else:
|
@@ -56,19 +57,10 @@ class CursorPaginationStrategy(PaginationStrategy):
|
|
56
57
|
|
57
58
|
@property
|
58
59
|
def initial_token(self) -> Optional[Any]:
|
59
|
-
|
60
|
-
CursorPaginationStrategy does not have an initial value because the next cursor is typically included
|
61
|
-
in the response of the first request. For Resumable Full Refresh streams that checkpoint the page
|
62
|
-
cursor, the next cursor should be read from the state or stream slice object.
|
63
|
-
"""
|
64
|
-
return None
|
60
|
+
return self._initial_cursor
|
65
61
|
|
66
62
|
def next_page_token(
|
67
|
-
self,
|
68
|
-
response: requests.Response,
|
69
|
-
last_page_size: int,
|
70
|
-
last_record: Optional[Record],
|
71
|
-
last_page_token_value: Optional[Any] = None,
|
63
|
+
self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
|
72
64
|
) -> Optional[Any]:
|
73
65
|
decoded_response = next(self.decoder.decode(response))
|
74
66
|
|
@@ -95,5 +87,8 @@ class CursorPaginationStrategy(PaginationStrategy):
|
|
95
87
|
)
|
96
88
|
return token if token else None
|
97
89
|
|
90
|
+
def reset(self, reset_value: Optional[Any] = None) -> None:
|
91
|
+
self._initial_cursor = reset_value
|
92
|
+
|
98
93
|
def get_page_size(self) -> Optional[int]:
|
99
94
|
return self.page_size
|
@@ -52,6 +52,7 @@ class OffsetIncrement(PaginationStrategy):
|
|
52
52
|
inject_on_first_request: bool = False
|
53
53
|
|
54
54
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
55
|
+
self._offset = 0
|
55
56
|
page_size = str(self.page_size) if isinstance(self.page_size, int) else self.page_size
|
56
57
|
if page_size:
|
57
58
|
self._page_size: Optional[InterpolatedString] = InterpolatedString(
|
@@ -63,15 +64,11 @@ class OffsetIncrement(PaginationStrategy):
|
|
63
64
|
@property
|
64
65
|
def initial_token(self) -> Optional[Any]:
|
65
66
|
if self.inject_on_first_request:
|
66
|
-
return
|
67
|
+
return self._offset
|
67
68
|
return None
|
68
69
|
|
69
70
|
def next_page_token(
|
70
|
-
self,
|
71
|
-
response: requests.Response,
|
72
|
-
last_page_size: int,
|
73
|
-
last_record: Optional[Record],
|
74
|
-
last_page_token_value: Optional[Any] = None,
|
71
|
+
self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
|
75
72
|
) -> Optional[Any]:
|
76
73
|
decoded_response = next(self.decoder.decode(response))
|
77
74
|
|
@@ -81,16 +78,19 @@ class OffsetIncrement(PaginationStrategy):
|
|
81
78
|
and last_page_size < self._page_size.eval(self.config, response=decoded_response)
|
82
79
|
) or last_page_size == 0:
|
83
80
|
return None
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
81
|
+
else:
|
82
|
+
self._offset += last_page_size
|
83
|
+
return self._offset
|
84
|
+
|
85
|
+
def reset(self, reset_value: Optional[Any] = 0) -> None:
|
86
|
+
if reset_value is None:
|
87
|
+
self._offset = 0
|
88
|
+
elif not isinstance(reset_value, int):
|
89
89
|
raise ValueError(
|
90
|
-
f"
|
90
|
+
f"Reset value {reset_value} for OffsetIncrement pagination strategy was not an integer"
|
91
91
|
)
|
92
92
|
else:
|
93
|
-
|
93
|
+
self._offset = reset_value
|
94
94
|
|
95
95
|
def get_page_size(self) -> Optional[int]:
|
96
96
|
if self._page_size:
|
@@ -31,6 +31,7 @@ class PageIncrement(PaginationStrategy):
|
|
31
31
|
inject_on_first_request: bool = False
|
32
32
|
|
33
33
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
34
|
+
self._page = self.start_from_page
|
34
35
|
if isinstance(self.page_size, int) or (self.page_size is None):
|
35
36
|
self._page_size = self.page_size
|
36
37
|
else:
|
@@ -42,30 +43,28 @@ class PageIncrement(PaginationStrategy):
|
|
42
43
|
@property
|
43
44
|
def initial_token(self) -> Optional[Any]:
|
44
45
|
if self.inject_on_first_request:
|
45
|
-
return self.
|
46
|
+
return self._page
|
46
47
|
return None
|
47
48
|
|
48
49
|
def next_page_token(
|
49
|
-
self,
|
50
|
-
response: requests.Response,
|
51
|
-
last_page_size: int,
|
52
|
-
last_record: Optional[Record],
|
53
|
-
last_page_token_value: Optional[Any],
|
50
|
+
self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
|
54
51
|
) -> Optional[Any]:
|
55
52
|
# Stop paginating when there are fewer records than the page size or the current page has no records
|
56
53
|
if (self._page_size and last_page_size < self._page_size) or last_page_size == 0:
|
57
54
|
return None
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
55
|
+
else:
|
56
|
+
self._page += 1
|
57
|
+
return self._page
|
58
|
+
|
59
|
+
def reset(self, reset_value: Optional[Any] = None) -> None:
|
60
|
+
if reset_value is None:
|
61
|
+
self._page = self.start_from_page
|
62
|
+
elif not isinstance(reset_value, int):
|
64
63
|
raise ValueError(
|
65
|
-
f"
|
64
|
+
f"Reset value {reset_value} for PageIncrement pagination strategy was not an integer"
|
66
65
|
)
|
67
66
|
else:
|
68
|
-
|
67
|
+
self._page = reset_value
|
69
68
|
|
70
69
|
def get_page_size(self) -> Optional[int]:
|
71
70
|
return self._page_size
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
from abc import abstractmethod
|
6
6
|
from dataclasses import dataclass
|
7
|
-
from typing import Any,
|
7
|
+
from typing import Any, Optional
|
8
8
|
|
9
9
|
import requests
|
10
10
|
|
@@ -26,21 +26,22 @@ class PaginationStrategy:
|
|
26
26
|
|
27
27
|
@abstractmethod
|
28
28
|
def next_page_token(
|
29
|
-
self,
|
30
|
-
response: requests.Response,
|
31
|
-
last_page_size: int,
|
32
|
-
last_record: Optional[Record],
|
33
|
-
last_page_token_value: Optional[Any],
|
29
|
+
self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
|
34
30
|
) -> Optional[Any]:
|
35
31
|
"""
|
36
32
|
:param response: response to process
|
37
33
|
:param last_page_size: the number of records read from the response
|
38
34
|
:param last_record: the last record extracted from the response
|
39
|
-
:param last_page_token_value: The current value of the page token made on the last request
|
40
35
|
:return: next page token. Returns None if there are no more pages to fetch
|
41
36
|
"""
|
42
37
|
pass
|
43
38
|
|
39
|
+
@abstractmethod
|
40
|
+
def reset(self, reset_value: Optional[Any] = None) -> None:
|
41
|
+
"""
|
42
|
+
Reset the pagination's inner state
|
43
|
+
"""
|
44
|
+
|
44
45
|
@abstractmethod
|
45
46
|
def get_page_size(self) -> Optional[int]:
|
46
47
|
"""
|
@@ -44,19 +44,19 @@ class StopConditionPaginationStrategyDecorator(PaginationStrategy):
|
|
44
44
|
self._stop_condition = stop_condition
|
45
45
|
|
46
46
|
def next_page_token(
|
47
|
-
self,
|
48
|
-
response: requests.Response,
|
49
|
-
last_page_size: int,
|
50
|
-
last_record: Optional[Record],
|
51
|
-
last_page_token_value: Optional[Any] = None,
|
47
|
+
self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
|
52
48
|
) -> Optional[Any]:
|
53
|
-
# We evaluate in reverse order because the assumption is that most of the APIs using data feed structure
|
54
|
-
#
|
49
|
+
# We evaluate in reverse order because the assumption is that most of the APIs using data feed structure will return records in
|
50
|
+
# descending order. In terms of performance/memory, we return the records lazily
|
55
51
|
if last_record and self._stop_condition.is_met(last_record):
|
56
52
|
return None
|
57
|
-
return self._delegate.next_page_token(
|
58
|
-
|
59
|
-
|
53
|
+
return self._delegate.next_page_token(response, last_page_size, last_record)
|
54
|
+
|
55
|
+
def reset(self, reset_value: Optional[Any] = None) -> None:
|
56
|
+
if reset_value:
|
57
|
+
self._delegate.reset(reset_value)
|
58
|
+
else:
|
59
|
+
self._delegate.reset()
|
60
60
|
|
61
61
|
def get_page_size(self) -> Optional[int]:
|
62
62
|
return self._delegate.get_page_size()
|
@@ -6,7 +6,18 @@ import json
|
|
6
6
|
from dataclasses import InitVar, dataclass, field
|
7
7
|
from functools import partial
|
8
8
|
from itertools import islice
|
9
|
-
from typing import
|
9
|
+
from typing import (
|
10
|
+
Any,
|
11
|
+
Callable,
|
12
|
+
Iterable,
|
13
|
+
List,
|
14
|
+
Mapping,
|
15
|
+
MutableMapping,
|
16
|
+
Optional,
|
17
|
+
Set,
|
18
|
+
Tuple,
|
19
|
+
Union,
|
20
|
+
)
|
10
21
|
|
11
22
|
import requests
|
12
23
|
|
@@ -79,6 +90,9 @@ class SimpleRetriever(Retriever):
|
|
79
90
|
|
80
91
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
81
92
|
self._paginator = self.paginator or NoPagination(parameters=parameters)
|
93
|
+
self._last_response: Optional[requests.Response] = None
|
94
|
+
self._last_page_size: int = 0
|
95
|
+
self._last_record: Optional[Record] = None
|
82
96
|
self._parameters = parameters
|
83
97
|
self._name = (
|
84
98
|
InterpolatedString(self._name, parameters=parameters)
|
@@ -86,6 +100,10 @@ class SimpleRetriever(Retriever):
|
|
86
100
|
else self._name
|
87
101
|
)
|
88
102
|
|
103
|
+
# This mapping is used during a resumable full refresh syncs to indicate whether a partition has started syncing
|
104
|
+
# records. Partitions serve as the key and map to True if they already began processing records
|
105
|
+
self._partition_started: MutableMapping[Any, bool] = dict()
|
106
|
+
|
89
107
|
@property # type: ignore
|
90
108
|
def name(self) -> str:
|
91
109
|
"""
|
@@ -233,13 +251,17 @@ class SimpleRetriever(Retriever):
|
|
233
251
|
raise ValueError("Request body json cannot be a string")
|
234
252
|
return body_json
|
235
253
|
|
236
|
-
def _paginator_path(
|
254
|
+
def _paginator_path(
|
255
|
+
self,
|
256
|
+
) -> Optional[str]:
|
237
257
|
"""
|
238
258
|
If the paginator points to a path, follow it, else return nothing so the requester is used.
|
259
|
+
:param stream_state:
|
260
|
+
:param stream_slice:
|
239
261
|
:param next_page_token:
|
240
262
|
:return:
|
241
263
|
"""
|
242
|
-
return self._paginator.path(
|
264
|
+
return self._paginator.path()
|
243
265
|
|
244
266
|
def _parse_response(
|
245
267
|
self,
|
@@ -250,15 +272,22 @@ class SimpleRetriever(Retriever):
|
|
250
272
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
251
273
|
) -> Iterable[Record]:
|
252
274
|
if not response:
|
275
|
+
self._last_response = None
|
253
276
|
yield from []
|
254
277
|
else:
|
255
|
-
|
278
|
+
self._last_response = response
|
279
|
+
record_generator = self.record_selector.select_records(
|
256
280
|
response=response,
|
257
281
|
stream_state=stream_state,
|
258
282
|
records_schema=records_schema,
|
259
283
|
stream_slice=stream_slice,
|
260
284
|
next_page_token=next_page_token,
|
261
285
|
)
|
286
|
+
self._last_page_size = 0
|
287
|
+
for record in record_generator:
|
288
|
+
self._last_page_size += 1
|
289
|
+
self._last_record = record
|
290
|
+
yield record
|
262
291
|
|
263
292
|
@property # type: ignore
|
264
293
|
def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]:
|
@@ -270,13 +299,7 @@ class SimpleRetriever(Retriever):
|
|
270
299
|
if not isinstance(value, property):
|
271
300
|
self._primary_key = value
|
272
301
|
|
273
|
-
def _next_page_token(
|
274
|
-
self,
|
275
|
-
response: requests.Response,
|
276
|
-
last_page_size: int,
|
277
|
-
last_record: Optional[Record],
|
278
|
-
last_page_token_value: Optional[Any],
|
279
|
-
) -> Optional[Mapping[str, Any]]:
|
302
|
+
def _next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
|
280
303
|
"""
|
281
304
|
Specifies a pagination strategy.
|
282
305
|
|
@@ -284,12 +307,7 @@ class SimpleRetriever(Retriever):
|
|
284
307
|
|
285
308
|
:return: The token for the next page from the input response object. Returning None means there are no more pages to read in this response.
|
286
309
|
"""
|
287
|
-
return self._paginator.next_page_token(
|
288
|
-
response=response,
|
289
|
-
last_page_size=last_page_size,
|
290
|
-
last_record=last_record,
|
291
|
-
last_page_token_value=last_page_token_value,
|
292
|
-
)
|
310
|
+
return self._paginator.next_page_token(response, self._last_page_size, self._last_record)
|
293
311
|
|
294
312
|
def _fetch_next_page(
|
295
313
|
self,
|
@@ -298,7 +316,7 @@ class SimpleRetriever(Retriever):
|
|
298
316
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
299
317
|
) -> Optional[requests.Response]:
|
300
318
|
return self.requester.send_request(
|
301
|
-
path=self._paginator_path(
|
319
|
+
path=self._paginator_path(),
|
302
320
|
stream_state=stream_state,
|
303
321
|
stream_slice=stream_slice,
|
304
322
|
next_page_token=next_page_token,
|
@@ -327,37 +345,20 @@ class SimpleRetriever(Retriever):
|
|
327
345
|
# This logic is similar to _read_pages in the HttpStream class. When making changes here, consider making changes there as well.
|
328
346
|
def _read_pages(
|
329
347
|
self,
|
330
|
-
records_generator_fn: Callable[[Optional[requests.Response]], Iterable[
|
348
|
+
records_generator_fn: Callable[[Optional[requests.Response]], Iterable[StreamData]],
|
331
349
|
stream_state: Mapping[str, Any],
|
332
350
|
stream_slice: StreamSlice,
|
333
|
-
) -> Iterable[
|
351
|
+
) -> Iterable[StreamData]:
|
334
352
|
pagination_complete = False
|
335
|
-
|
336
|
-
next_page_token: Optional[Mapping[str, Any]] = (
|
337
|
-
{"next_page_token": initial_token} if initial_token else None
|
338
|
-
)
|
353
|
+
next_page_token = None
|
339
354
|
while not pagination_complete:
|
340
355
|
response = self._fetch_next_page(stream_state, stream_slice, next_page_token)
|
341
|
-
|
342
|
-
last_page_size = 0
|
343
|
-
last_record: Optional[Record] = None
|
344
|
-
for record in records_generator_fn(response):
|
345
|
-
last_page_size += 1
|
346
|
-
last_record = record
|
347
|
-
yield record
|
356
|
+
yield from records_generator_fn(response)
|
348
357
|
|
349
358
|
if not response:
|
350
359
|
pagination_complete = True
|
351
360
|
else:
|
352
|
-
|
353
|
-
next_page_token.get("next_page_token") if next_page_token else None
|
354
|
-
)
|
355
|
-
next_page_token = self._next_page_token(
|
356
|
-
response=response,
|
357
|
-
last_page_size=last_page_size,
|
358
|
-
last_record=last_record,
|
359
|
-
last_page_token_value=last_page_token_value,
|
360
|
-
)
|
361
|
+
next_page_token = self._next_page_token(response)
|
361
362
|
if not next_page_token:
|
362
363
|
pagination_complete = True
|
363
364
|
|
@@ -366,38 +367,19 @@ class SimpleRetriever(Retriever):
|
|
366
367
|
|
367
368
|
def _read_single_page(
|
368
369
|
self,
|
369
|
-
records_generator_fn: Callable[[Optional[requests.Response]], Iterable[
|
370
|
+
records_generator_fn: Callable[[Optional[requests.Response]], Iterable[StreamData]],
|
370
371
|
stream_state: Mapping[str, Any],
|
371
372
|
stream_slice: StreamSlice,
|
372
373
|
) -> Iterable[StreamData]:
|
373
|
-
|
374
|
-
|
375
|
-
initial_token = self._paginator.get_initial_token()
|
376
|
-
next_page_token: Optional[Mapping[str, Any]] = (
|
377
|
-
{"next_page_token": initial_token} if initial_token else None
|
378
|
-
)
|
379
|
-
|
380
|
-
response = self._fetch_next_page(stream_state, stream_slice, next_page_token)
|
381
|
-
|
382
|
-
last_page_size = 0
|
383
|
-
last_record: Optional[Record] = None
|
384
|
-
for record in records_generator_fn(response):
|
385
|
-
last_page_size += 1
|
386
|
-
last_record = record
|
387
|
-
yield record
|
374
|
+
response = self._fetch_next_page(stream_state, stream_slice)
|
375
|
+
yield from records_generator_fn(response)
|
388
376
|
|
389
377
|
if not response:
|
390
|
-
next_page_token = {FULL_REFRESH_SYNC_COMPLETE_KEY: True}
|
378
|
+
next_page_token: Mapping[str, Any] = {FULL_REFRESH_SYNC_COMPLETE_KEY: True}
|
391
379
|
else:
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
next_page_token = self._next_page_token(
|
396
|
-
response=response,
|
397
|
-
last_page_size=last_page_size,
|
398
|
-
last_record=last_record,
|
399
|
-
last_page_token_value=last_page_token_value,
|
400
|
-
) or {FULL_REFRESH_SYNC_COMPLETE_KEY: True}
|
380
|
+
next_page_token = self._next_page_token(response) or {
|
381
|
+
FULL_REFRESH_SYNC_COMPLETE_KEY: True
|
382
|
+
}
|
401
383
|
|
402
384
|
if self.cursor:
|
403
385
|
self.cursor.close_slice(
|
@@ -432,14 +414,25 @@ class SimpleRetriever(Retriever):
|
|
432
414
|
if self.cursor and isinstance(self.cursor, ResumableFullRefreshCursor):
|
433
415
|
stream_state = self.state
|
434
416
|
|
435
|
-
# Before syncing the RFR stream, we check if the job's prior attempt was successful and don't need to
|
436
|
-
#
|
437
|
-
#
|
417
|
+
# Before syncing the RFR stream, we check if the job's prior attempt was successful and don't need to fetch more records
|
418
|
+
# The platform deletes stream state for full refresh streams before starting a new job, so we don't need to worry about
|
419
|
+
# this value existing for the initial attempt
|
438
420
|
if stream_state.get(FULL_REFRESH_SYNC_COMPLETE_KEY):
|
439
421
|
return
|
422
|
+
cursor_value = stream_state.get("next_page_token")
|
423
|
+
|
424
|
+
# The first attempt to read a page for the current partition should reset the paginator to the current
|
425
|
+
# cursor state which is initially assigned to the incoming state from the platform
|
426
|
+
partition_key = self._to_partition_key(_slice.partition)
|
427
|
+
if partition_key not in self._partition_started:
|
428
|
+
self._partition_started[partition_key] = True
|
429
|
+
self._paginator.reset(reset_value=cursor_value)
|
440
430
|
|
441
431
|
yield from self._read_single_page(record_generator, stream_state, _slice)
|
442
432
|
else:
|
433
|
+
# Fixing paginator types has a long tail of dependencies
|
434
|
+
self._paginator.reset()
|
435
|
+
|
443
436
|
for stream_data in self._read_pages(record_generator, self.state, _slice):
|
444
437
|
current_record = self._extract_record(stream_data, _slice)
|
445
438
|
if self.cursor and current_record:
|
@@ -525,7 +518,7 @@ class SimpleRetriever(Retriever):
|
|
525
518
|
stream_state: Mapping[str, Any],
|
526
519
|
records_schema: Mapping[str, Any],
|
527
520
|
stream_slice: Optional[StreamSlice],
|
528
|
-
) -> Iterable[
|
521
|
+
) -> Iterable[StreamData]:
|
529
522
|
yield from self._parse_response(
|
530
523
|
response,
|
531
524
|
stream_slice=stream_slice,
|
@@ -569,7 +562,7 @@ class SimpleRetrieverTestReadDecorator(SimpleRetriever):
|
|
569
562
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
570
563
|
) -> Optional[requests.Response]:
|
571
564
|
return self.requester.send_request(
|
572
|
-
path=self._paginator_path(
|
565
|
+
path=self._paginator_path(),
|
573
566
|
stream_state=stream_state,
|
574
567
|
stream_slice=stream_slice,
|
575
568
|
next_page_token=next_page_token,
|
@@ -16,7 +16,7 @@ class DeclarativePartitionFactory:
|
|
16
16
|
self,
|
17
17
|
stream_name: str,
|
18
18
|
json_schema: Mapping[str, Any],
|
19
|
-
|
19
|
+
retriever_factory: Callable[[], Retriever],
|
20
20
|
message_repository: MessageRepository,
|
21
21
|
) -> None:
|
22
22
|
"""
|
@@ -26,14 +26,14 @@ class DeclarativePartitionFactory:
|
|
26
26
|
"""
|
27
27
|
self._stream_name = stream_name
|
28
28
|
self._json_schema = json_schema
|
29
|
-
self.
|
29
|
+
self._retriever_factory = retriever_factory
|
30
30
|
self._message_repository = message_repository
|
31
31
|
|
32
32
|
def create(self, stream_slice: StreamSlice) -> Partition:
|
33
33
|
return DeclarativePartition(
|
34
34
|
self._stream_name,
|
35
35
|
self._json_schema,
|
36
|
-
self.
|
36
|
+
self._retriever_factory(),
|
37
37
|
self._message_repository,
|
38
38
|
stream_slice,
|
39
39
|
)
|
@@ -0,0 +1,61 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from dataclasses import InitVar, dataclass
|
6
|
+
from typing import Any, Dict, Mapping, Optional
|
7
|
+
|
8
|
+
from airbyte_cdk import InterpolatedString
|
9
|
+
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
10
|
+
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
11
|
+
|
12
|
+
|
13
|
+
@dataclass
|
14
|
+
class KeysReplaceTransformation(RecordTransformation):
|
15
|
+
"""
|
16
|
+
Transformation that applies keys names replacement.
|
17
|
+
|
18
|
+
Example usage:
|
19
|
+
- type: KeysReplace
|
20
|
+
old: " "
|
21
|
+
new: "_"
|
22
|
+
Result:
|
23
|
+
from: {"created time": ..., "customer id": ..., "user id": ...}
|
24
|
+
to: {"created_time": ..., "customer_id": ..., "user_id": ...}
|
25
|
+
"""
|
26
|
+
|
27
|
+
old: str
|
28
|
+
new: str
|
29
|
+
parameters: InitVar[Mapping[str, Any]]
|
30
|
+
|
31
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
32
|
+
self._old = InterpolatedString.create(self.old, parameters=parameters)
|
33
|
+
self._new = InterpolatedString.create(self.new, parameters=parameters)
|
34
|
+
|
35
|
+
def transform(
|
36
|
+
self,
|
37
|
+
record: Dict[str, Any],
|
38
|
+
config: Optional[Config] = None,
|
39
|
+
stream_state: Optional[StreamState] = None,
|
40
|
+
stream_slice: Optional[StreamSlice] = None,
|
41
|
+
) -> None:
|
42
|
+
if config is None:
|
43
|
+
config = {}
|
44
|
+
|
45
|
+
kwargs = {"record": record, "stream_state": stream_state, "stream_slice": stream_slice}
|
46
|
+
old_key = str(self._old.eval(config, **kwargs))
|
47
|
+
new_key = str(self._new.eval(config, **kwargs))
|
48
|
+
|
49
|
+
def _transform(data: Dict[str, Any]) -> Dict[str, Any]:
|
50
|
+
result = {}
|
51
|
+
for key, value in data.items():
|
52
|
+
updated_key = key.replace(old_key, new_key)
|
53
|
+
if isinstance(value, dict):
|
54
|
+
result[updated_key] = _transform(value)
|
55
|
+
else:
|
56
|
+
result[updated_key] = value
|
57
|
+
return result
|
58
|
+
|
59
|
+
transformed_record = _transform(record)
|
60
|
+
record.clear()
|
61
|
+
record.update(transformed_record)
|
@@ -62,11 +62,11 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQr
|
|
62
62
|
airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
|
63
63
|
airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
|
64
64
|
airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
|
65
|
-
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=
|
65
|
+
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=PxP4p2686wsf1gjsumGKnh2o2Jjnrqg8QLGijEIrp-A,23412
|
66
66
|
airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
|
67
67
|
airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
|
68
68
|
airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
|
69
|
-
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=
|
69
|
+
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=W8H8rYMEJihZBY3VgGUo-lo4OfCze9Rli2NorehDr38,131973
|
70
70
|
airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
|
71
71
|
airbyte_cdk/sources/declarative/declarative_stream.py,sha256=JRyNeOIpsFu4ztVZsN6sncqUEIqIE-bUkD2TPgbMgk0,10375
|
72
72
|
airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=edGj4fGxznBk4xzRQyCA1rGfbpqe7z-RE0K3kQQWbgA,858
|
@@ -105,12 +105,12 @@ airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW
|
|
105
105
|
airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
|
106
106
|
airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
|
107
107
|
airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
|
108
|
-
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=
|
108
|
+
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=U64qHqBut90L29EuUJ2_4OdY6eCMZIL2MH4DqGYhifQ,92340
|
109
109
|
airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
110
110
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
|
111
111
|
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
|
112
112
|
airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
|
113
|
-
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=
|
113
|
+
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=xNrr3RuD2dd-u5Ryfv3Wf1bEYC3uYoSzntgCiAQXy84,108538
|
114
114
|
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=HJ-Syp3p7RpyR_OK0X_a2kSyISfu3W-PKrRI16iY0a8,957
|
115
115
|
airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=n82J15S8bjeMZ5uROu--P3hnbQoxkY5v7RPHYx7g7ro,2929
|
116
116
|
airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
|
@@ -135,15 +135,15 @@ airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.p
|
|
135
135
|
airbyte_cdk/sources/declarative/requesters/http_job_repository.py,sha256=o0520AmHMb7SAoeokVNwoOzuZzIAT6ryx9uFYGSOrs0,8664
|
136
136
|
airbyte_cdk/sources/declarative/requesters/http_requester.py,sha256=RqYPkgJFAWfcZBTc-JBcGHPm4JL1ZQOhs9GKU4MP2eE,14723
|
137
137
|
airbyte_cdk/sources/declarative/requesters/paginators/__init__.py,sha256=uArbKs9JKNCt7t9tZoeWwjDpyI1HoPp29FNW0JzvaEM,644
|
138
|
-
airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py,sha256=
|
139
|
-
airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py,sha256
|
140
|
-
airbyte_cdk/sources/declarative/requesters/paginators/paginator.py,sha256=
|
138
|
+
airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py,sha256=LxTq1hieznRWlYlfODdZbMDUml-g6NyBkdwVI2mCNMM,10910
|
139
|
+
airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py,sha256=-P-QOlefFhEe99bsB2y3yTvA8c8kCCbfBaTS6qPvF6I,1927
|
140
|
+
airbyte_cdk/sources/declarative/requesters/paginators/paginator.py,sha256=ZgyvH7DOrASQ5K__J5SRAXH3REUW2n3yPHnFW9xq4NU,1972
|
141
141
|
airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py,sha256=2gly8fuZpDNwtu1Qg6oE2jBLGqQRdzSLJdnpk_iDV6I,767
|
142
|
-
airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py,sha256=
|
143
|
-
airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py,sha256=
|
144
|
-
airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py,sha256=
|
145
|
-
airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py,sha256=
|
146
|
-
airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py,sha256=
|
142
|
+
airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py,sha256=vFzpNv8BdgXrYO5qhi2_Un4x4y-EAQWxinZtEPWz5KI,3654
|
143
|
+
airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py,sha256=TKG4Mp1t8MfmFJDeHtXmxCp_ibRK03J5O04N5HVtBvE,3430
|
144
|
+
airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py,sha256=kQGpfr-dOwarxTIf2S4sHVulBzm8zSwQXBM7rOhkafA,2491
|
145
|
+
airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py,sha256=ABpO4t0UUziBZnyml8UT_NhlF6loekhQji57TpKnaiY,1290
|
146
|
+
airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py,sha256=2b005ulACvHgIL8ktTWwposu4umowyu0iGV2mGOb_Tg,2290
|
147
147
|
airbyte_cdk/sources/declarative/requesters/request_option.py,sha256=_qmv8CLQQ3fERt6BuMZeRu6tZXscPoeARx1VJdWMQ_M,1055
|
148
148
|
airbyte_cdk/sources/declarative/requesters/request_options/__init__.py,sha256=WCwpKqM4wKqy-DHJaCHbKAlFqRVOqMi9K5qonxIfi_Y,809
|
149
149
|
airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py,sha256=FLkg0uzC9bc-zFnALWr0FLYpKsz8iK2xQsd4UOyeW08,3706
|
@@ -161,7 +161,7 @@ airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py,sha256=Aio
|
|
161
161
|
airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=ix9m1dkR69DcXCXUKC5RK_ZZM7ojTLBQ4IkWQTfmfCk,456
|
162
162
|
airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=_-d3MvHh-4r46i4wjQikD4ZygKA7TvuDu2i04qqULEg,3731
|
163
163
|
airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
|
164
|
-
airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=
|
164
|
+
airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=N4swGw5mfuTXJ2R7AKX18CHzizsr69pXwt5uSHLPi48,24172
|
165
165
|
airbyte_cdk/sources/declarative/schema/__init__.py,sha256=HztgVVaZdil5UfgUZcv_Hyy84r89_EKRwyO2hoewNVg,749
|
166
166
|
airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=KTACrIE23a83wsm3Rd9Eb4K6-20lrGqYxTHNp9yxsso,1820
|
167
167
|
airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=H6A3NQ6kPPM-cUNPmdvDPc9xNzR1rQNrK95GbgCW334,8822
|
@@ -171,11 +171,12 @@ airbyte_cdk/sources/declarative/schema/schema_loader.py,sha256=kjt8v0N5wWKA5zyLn
|
|
171
171
|
airbyte_cdk/sources/declarative/spec/__init__.py,sha256=H0UwoRhgucbKBIzg85AXrifybVmfpwWpPdy22vZKVuo,141
|
172
172
|
airbyte_cdk/sources/declarative/spec/spec.py,sha256=ODSNUgkDOhnLQnwLjgSaME6R3kNeywjROvbNrWEnsgU,1876
|
173
173
|
airbyte_cdk/sources/declarative/stream_slicers/__init__.py,sha256=sI9vhc95RwJYOnA0VKjcbtKgFcmAbWjhdWBXFbAijOs,176
|
174
|
-
airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py,sha256=
|
174
|
+
airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py,sha256=E7feZ5xkHwFHODq8FSjwdGe291RZoCMCRHT1rWnQ1lI,3463
|
175
175
|
airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py,sha256=SOkIPBi2Wu7yxIvA15yFzUAB95a3IzA8LPq5DEqHQQc,725
|
176
176
|
airbyte_cdk/sources/declarative/transformations/__init__.py,sha256=CPJ8TlMpiUmvG3624VYu_NfTzxwKcfBjM2Q2wJ7fkSA,919
|
177
177
|
airbyte_cdk/sources/declarative/transformations/add_fields.py,sha256=r4YdAuAk2bQtNWJMztIIy2CC-NglD9NeK1s1TeO9wkw,5027
|
178
178
|
airbyte_cdk/sources/declarative/transformations/flatten_fields.py,sha256=ti9fLVk-EpMeDY7ImduvQq1YGounLYmH9dHzp7MIRxk,1703
|
179
|
+
airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py,sha256=vbIn6ump-Ut6g20yMub7PFoPBhOKVtrHSAUdcOUdLfw,1999
|
179
180
|
airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py,sha256=RTs5KX4V3hM7A6QN1WlGF21YccTIyNH6qQI9IMb__hw,670
|
180
181
|
airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py,sha256=43zwe6_F5ba5C4eY0RgXxPz7ndPKZfXGChHepFn-2lk,2263
|
181
182
|
airbyte_cdk/sources/declarative/transformations/remove_fields.py,sha256=EwUP0SZ2p4GRJ6Q8CUzlz9dcUeEidEFDlI2IBye2tlc,2745
|
@@ -340,8 +341,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
|
|
340
341
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
341
342
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
342
343
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
343
|
-
airbyte_cdk-6.
|
344
|
-
airbyte_cdk-6.
|
345
|
-
airbyte_cdk-6.
|
346
|
-
airbyte_cdk-6.
|
347
|
-
airbyte_cdk-6.
|
344
|
+
airbyte_cdk-6.15.0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
345
|
+
airbyte_cdk-6.15.0.dist-info/METADATA,sha256=aIk10nM3OL1ldN48mbZAdfd8mOWeSyVfeJMwHYMos8s,5988
|
346
|
+
airbyte_cdk-6.15.0.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
|
347
|
+
airbyte_cdk-6.15.0.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
348
|
+
airbyte_cdk-6.15.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|