airbyte-cdk 6.34.1.dev0__py3-none-any.whl → 6.34.1.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/connector_builder/connector_builder_handler.py +16 -12
- airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
- airbyte_cdk/connector_builder/test_reader/helpers.py +591 -0
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +160 -0
- airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
- airbyte_cdk/connector_builder/test_reader/types.py +75 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
- airbyte_cdk/sources/declarative/auth/jwt.py +17 -11
- airbyte_cdk/sources/declarative/auth/oauth.py +6 -1
- airbyte_cdk/sources/declarative/auth/token.py +3 -8
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +30 -79
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +203 -100
- airbyte_cdk/sources/declarative/declarative_stream.py +3 -1
- airbyte_cdk/sources/declarative/decoders/__init__.py +0 -4
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +7 -2
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +12 -58
- airbyte_cdk/sources/declarative/extractors/record_selector.py +12 -3
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +56 -25
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +12 -6
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +6 -2
- airbyte_cdk/sources/declarative/interpolation/jinja.py +13 -0
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +9 -0
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +150 -41
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +234 -84
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +4 -2
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +26 -18
- airbyte_cdk/sources/declarative/requesters/http_requester.py +8 -2
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +16 -5
- airbyte_cdk/sources/declarative/requesters/request_option.py +83 -4
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +7 -6
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +1 -4
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +0 -3
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +2 -47
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +6 -12
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +4 -3
- airbyte_cdk/sources/declarative/transformations/add_fields.py +4 -4
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +2 -1
- airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
- airbyte_cdk/sources/file_based/file_based_source.py +70 -37
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +107 -12
- airbyte_cdk/sources/file_based/stream/__init__.py +10 -1
- airbyte_cdk/sources/file_based/stream/identities_stream.py +47 -0
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +85 -0
- airbyte_cdk/sources/specs/transfer_modes.py +26 -0
- airbyte_cdk/sources/streams/call_rate.py +185 -47
- airbyte_cdk/sources/streams/http/http.py +1 -2
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +217 -56
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +144 -73
- airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
- airbyte_cdk/test/mock_http/mocker.py +9 -1
- airbyte_cdk/test/mock_http/response.py +6 -3
- airbyte_cdk/utils/datetime_helpers.py +48 -66
- airbyte_cdk/utils/mapping_helpers.py +126 -26
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/RECORD +60 -51
- airbyte_cdk/connector_builder/message_grouper.py +0 -448
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/entry_points.txt +0 -0
@@ -437,10 +437,10 @@ class AsyncJobOrchestrator:
|
|
437
437
|
yield from self._process_running_partitions_and_yield_completed_ones()
|
438
438
|
self._wait_on_status_update()
|
439
439
|
except Exception as exception:
|
440
|
+
LOGGER.warning(
|
441
|
+
f"Caught exception that stops the processing of the jobs: {exception}. Traceback: {traceback.format_exc()}"
|
442
|
+
)
|
440
443
|
if self._is_breaking_exception(exception):
|
441
|
-
LOGGER.warning(
|
442
|
-
f"Caught exception that stops the processing of the jobs: {exception}"
|
443
|
-
)
|
444
444
|
self._abort_all_running_jobs()
|
445
445
|
raise exception
|
446
446
|
|
@@ -482,16 +482,16 @@ class AsyncJobOrchestrator:
|
|
482
482
|
and exception.failure_type == FailureType.config_error
|
483
483
|
)
|
484
484
|
|
485
|
-
def fetch_records(self,
|
485
|
+
def fetch_records(self, async_jobs: Iterable[AsyncJob]) -> Iterable[Mapping[str, Any]]:
|
486
486
|
"""
|
487
|
-
Fetches records from the given
|
487
|
+
Fetches records from the given jobs.
|
488
488
|
|
489
489
|
Args:
|
490
|
-
|
490
|
+
async_jobs Iterable[AsyncJob]: The list of AsyncJobs.
|
491
491
|
|
492
492
|
Yields:
|
493
493
|
Iterable[Mapping[str, Any]]: The fetched records from the jobs.
|
494
494
|
"""
|
495
|
-
for job in
|
495
|
+
for job in async_jobs:
|
496
496
|
yield from self._job_repository.fetch_records(job)
|
497
497
|
self._job_repository.delete(job)
|
@@ -3,6 +3,7 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
import base64
|
6
|
+
import json
|
6
7
|
from dataclasses import InitVar, dataclass
|
7
8
|
from datetime import datetime
|
8
9
|
from typing import Any, Mapping, Optional, Union
|
@@ -104,21 +105,21 @@ class JwtAuthenticator(DeclarativeAuthenticator):
|
|
104
105
|
)
|
105
106
|
|
106
107
|
def _get_jwt_headers(self) -> dict[str, Any]:
|
107
|
-
"""
|
108
|
+
"""
|
108
109
|
Builds and returns the headers used when signing the JWT.
|
109
110
|
"""
|
110
|
-
headers = self._additional_jwt_headers.eval(self.config)
|
111
|
+
headers = self._additional_jwt_headers.eval(self.config, json_loads=json.loads)
|
111
112
|
if any(prop in headers for prop in ["kid", "alg", "typ", "cty"]):
|
112
113
|
raise ValueError(
|
113
114
|
"'kid', 'alg', 'typ', 'cty' are reserved headers and should not be set as part of 'additional_jwt_headers'"
|
114
115
|
)
|
115
116
|
|
116
117
|
if self._kid:
|
117
|
-
headers["kid"] = self._kid.eval(self.config)
|
118
|
+
headers["kid"] = self._kid.eval(self.config, json_loads=json.loads)
|
118
119
|
if self._typ:
|
119
|
-
headers["typ"] = self._typ.eval(self.config)
|
120
|
+
headers["typ"] = self._typ.eval(self.config, json_loads=json.loads)
|
120
121
|
if self._cty:
|
121
|
-
headers["cty"] = self._cty.eval(self.config)
|
122
|
+
headers["cty"] = self._cty.eval(self.config, json_loads=json.loads)
|
122
123
|
headers["alg"] = self._algorithm
|
123
124
|
return headers
|
124
125
|
|
@@ -130,18 +131,19 @@ class JwtAuthenticator(DeclarativeAuthenticator):
|
|
130
131
|
exp = now + self._token_duration if isinstance(self._token_duration, int) else now
|
131
132
|
nbf = now
|
132
133
|
|
133
|
-
payload = self._additional_jwt_payload.eval(self.config)
|
134
|
+
payload = self._additional_jwt_payload.eval(self.config, json_loads=json.loads)
|
134
135
|
if any(prop in payload for prop in ["iss", "sub", "aud", "iat", "exp", "nbf"]):
|
135
136
|
raise ValueError(
|
136
137
|
"'iss', 'sub', 'aud', 'iat', 'exp', 'nbf' are reserved properties and should not be set as part of 'additional_jwt_payload'"
|
137
138
|
)
|
138
139
|
|
139
140
|
if self._iss:
|
140
|
-
payload["iss"] = self._iss.eval(self.config)
|
141
|
+
payload["iss"] = self._iss.eval(self.config, json_loads=json.loads)
|
141
142
|
if self._sub:
|
142
|
-
payload["sub"] = self._sub.eval(self.config)
|
143
|
+
payload["sub"] = self._sub.eval(self.config, json_loads=json.loads)
|
143
144
|
if self._aud:
|
144
|
-
payload["aud"] = self._aud.eval(self.config)
|
145
|
+
payload["aud"] = self._aud.eval(self.config, json_loads=json.loads)
|
146
|
+
|
145
147
|
payload["iat"] = now
|
146
148
|
payload["exp"] = exp
|
147
149
|
payload["nbf"] = nbf
|
@@ -151,7 +153,7 @@ class JwtAuthenticator(DeclarativeAuthenticator):
|
|
151
153
|
"""
|
152
154
|
Returns the secret key used to sign the JWT.
|
153
155
|
"""
|
154
|
-
secret_key: str = self._secret_key.eval(self.config)
|
156
|
+
secret_key: str = self._secret_key.eval(self.config, json_loads=json.loads)
|
155
157
|
return (
|
156
158
|
base64.b64encode(secret_key.encode()).decode()
|
157
159
|
if self._base64_encode_secret_key
|
@@ -176,7 +178,11 @@ class JwtAuthenticator(DeclarativeAuthenticator):
|
|
176
178
|
"""
|
177
179
|
Returns the header prefix to be used when attaching the token to the request.
|
178
180
|
"""
|
179
|
-
return
|
181
|
+
return (
|
182
|
+
self._header_prefix.eval(self.config, json_loads=json.loads)
|
183
|
+
if self._header_prefix
|
184
|
+
else None
|
185
|
+
)
|
180
186
|
|
181
187
|
@property
|
182
188
|
def auth_header(self) -> str:
|
@@ -3,7 +3,7 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
from dataclasses import InitVar, dataclass, field
|
6
|
-
from datetime import timedelta
|
6
|
+
from datetime import datetime, timedelta
|
7
7
|
from typing import Any, List, Mapping, MutableMapping, Optional, Union
|
8
8
|
|
9
9
|
from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator
|
@@ -232,8 +232,13 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
|
|
232
232
|
return self._refresh_request_headers.eval(self.config)
|
233
233
|
|
234
234
|
def get_token_expiry_date(self) -> AirbyteDateTime:
|
235
|
+
if not self._has_access_token_been_initialized():
|
236
|
+
return AirbyteDateTime.from_datetime(datetime.min)
|
235
237
|
return self._token_expiry_date # type: ignore # _token_expiry_date is an AirbyteDateTime. It is never None despite what mypy thinks
|
236
238
|
|
239
|
+
def _has_access_token_been_initialized(self) -> bool:
|
240
|
+
return self._access_token is not None
|
241
|
+
|
237
242
|
def set_token_expiry_date(self, value: Union[str, int]) -> None:
|
238
243
|
self._token_expiry_date = self._parse_token_expiration_date(value)
|
239
244
|
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import base64
|
6
6
|
import logging
|
7
7
|
from dataclasses import InitVar, dataclass
|
8
|
-
from typing import Any, Mapping, Union
|
8
|
+
from typing import Any, Mapping, MutableMapping, Union
|
9
9
|
|
10
10
|
import requests
|
11
11
|
from cachetools import TTLCache, cached
|
@@ -45,11 +45,6 @@ class ApiKeyAuthenticator(DeclarativeAuthenticator):
|
|
45
45
|
config: Config
|
46
46
|
parameters: InitVar[Mapping[str, Any]]
|
47
47
|
|
48
|
-
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
49
|
-
self._field_name = InterpolatedString.create(
|
50
|
-
self.request_option.field_name, parameters=parameters
|
51
|
-
)
|
52
|
-
|
53
48
|
@property
|
54
49
|
def auth_header(self) -> str:
|
55
50
|
options = self._get_request_options(RequestOptionType.header)
|
@@ -60,9 +55,9 @@ class ApiKeyAuthenticator(DeclarativeAuthenticator):
|
|
60
55
|
return self.token_provider.get_token()
|
61
56
|
|
62
57
|
def _get_request_options(self, option_type: RequestOptionType) -> Mapping[str, Any]:
|
63
|
-
options = {}
|
58
|
+
options: MutableMapping[str, Any] = {}
|
64
59
|
if self.request_option.inject_into == option_type:
|
65
|
-
|
60
|
+
self.request_option.inject_into_request(options, self.token, self.config)
|
66
61
|
return options
|
67
62
|
|
68
63
|
def get_request_params(self) -> Mapping[str, Any]:
|
@@ -3,7 +3,7 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
import logging
|
6
|
-
from typing import Any, Generic, Iterator, List, Mapping, Optional, Tuple
|
6
|
+
from typing import Any, Generic, Iterator, List, Mapping, MutableMapping, Optional, Tuple
|
7
7
|
|
8
8
|
from airbyte_cdk.models import (
|
9
9
|
AirbyteCatalog,
|
@@ -19,11 +19,11 @@ from airbyte_cdk.sources.declarative.extractors import RecordSelector
|
|
19
19
|
from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
20
20
|
ClientSideIncrementalRecordFilterDecorator,
|
21
21
|
)
|
22
|
+
from airbyte_cdk.sources.declarative.incremental import ConcurrentPerPartitionCursor
|
22
23
|
from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
|
23
24
|
from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
|
24
25
|
PerPartitionWithGlobalCursor,
|
25
26
|
)
|
26
|
-
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
27
27
|
from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
|
28
28
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
29
29
|
ConcurrencyLevel as ConcurrencyLevelModel,
|
@@ -35,13 +35,11 @@ from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
|
|
35
35
|
ModelToComponentFactory,
|
36
36
|
)
|
37
37
|
from airbyte_cdk.sources.declarative.partition_routers import AsyncJobPartitionRouter
|
38
|
-
from airbyte_cdk.sources.declarative.requesters import HttpRequester
|
39
38
|
from airbyte_cdk.sources.declarative.retrievers import AsyncRetriever, Retriever, SimpleRetriever
|
40
39
|
from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
|
41
40
|
DeclarativePartitionFactory,
|
42
41
|
StreamSlicerPartitionGenerator,
|
43
42
|
)
|
44
|
-
from airbyte_cdk.sources.declarative.transformations.add_fields import AddFields
|
45
43
|
from airbyte_cdk.sources.declarative.types import ConnectionDefinition
|
46
44
|
from airbyte_cdk.sources.source import TState
|
47
45
|
from airbyte_cdk.sources.streams import Stream
|
@@ -230,6 +228,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
230
228
|
stream_state = self._connector_state_manager.get_stream_state(
|
231
229
|
stream_name=declarative_stream.name, namespace=declarative_stream.namespace
|
232
230
|
)
|
231
|
+
stream_state = self._migrate_state(declarative_stream, stream_state)
|
233
232
|
|
234
233
|
retriever = self._get_retriever(declarative_stream, stream_state)
|
235
234
|
|
@@ -238,7 +237,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
238
237
|
):
|
239
238
|
cursor = declarative_stream.retriever.stream_slicer.stream_slicer
|
240
239
|
|
241
|
-
if not isinstance(cursor, ConcurrentCursor):
|
240
|
+
if not isinstance(cursor, ConcurrentCursor | ConcurrentPerPartitionCursor):
|
242
241
|
# This should never happen since we instantiate ConcurrentCursor in
|
243
242
|
# model_to_component_factory.py
|
244
243
|
raise ValueError(
|
@@ -326,9 +325,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
326
325
|
incremental_sync_component_definition
|
327
326
|
and incremental_sync_component_definition.get("type", "")
|
328
327
|
== DatetimeBasedCursorModel.__name__
|
329
|
-
and self._stream_supports_concurrent_partition_processing(
|
330
|
-
declarative_stream=declarative_stream
|
331
|
-
)
|
332
328
|
and hasattr(declarative_stream.retriever, "stream_slicer")
|
333
329
|
and isinstance(
|
334
330
|
declarative_stream.retriever.stream_slicer, PerPartitionWithGlobalCursor
|
@@ -337,6 +333,8 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
337
333
|
stream_state = self._connector_state_manager.get_stream_state(
|
338
334
|
stream_name=declarative_stream.name, namespace=declarative_stream.namespace
|
339
335
|
)
|
336
|
+
stream_state = self._migrate_state(declarative_stream, stream_state)
|
337
|
+
|
340
338
|
partition_router = declarative_stream.retriever.stream_slicer._partition_router
|
341
339
|
|
342
340
|
perpartition_cursor = (
|
@@ -401,9 +399,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
401
399
|
and bool(incremental_sync_component_definition)
|
402
400
|
and incremental_sync_component_definition.get("type", "")
|
403
401
|
== DatetimeBasedCursorModel.__name__
|
404
|
-
and self._stream_supports_concurrent_partition_processing(
|
405
|
-
declarative_stream=declarative_stream
|
406
|
-
)
|
407
402
|
and hasattr(declarative_stream.retriever, "stream_slicer")
|
408
403
|
and (
|
409
404
|
isinstance(declarative_stream.retriever.stream_slicer, DatetimeBasedCursor)
|
@@ -411,72 +406,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
411
406
|
)
|
412
407
|
)
|
413
408
|
|
414
|
-
def _stream_supports_concurrent_partition_processing(
|
415
|
-
self, declarative_stream: DeclarativeStream
|
416
|
-
) -> bool:
|
417
|
-
"""
|
418
|
-
Many connectors make use of stream_state during interpolation on a per-partition basis under the assumption that
|
419
|
-
state is updated sequentially. Because the concurrent CDK engine processes different partitions in parallel,
|
420
|
-
stream_state is no longer a thread-safe interpolation context. It would be a race condition because a cursor's
|
421
|
-
stream_state can be updated in any order depending on which stream partition's finish first.
|
422
|
-
|
423
|
-
We should start to move away from depending on the value of stream_state for low-code components that operate
|
424
|
-
per-partition, but we need to gate this otherwise some connectors will be blocked from publishing. See the
|
425
|
-
cdk-migrations.md for the full list of connectors.
|
426
|
-
"""
|
427
|
-
|
428
|
-
if isinstance(declarative_stream.retriever, SimpleRetriever) and isinstance(
|
429
|
-
declarative_stream.retriever.requester, HttpRequester
|
430
|
-
):
|
431
|
-
http_requester = declarative_stream.retriever.requester
|
432
|
-
if "stream_state" in http_requester._path.string:
|
433
|
-
self.logger.warning(
|
434
|
-
f"Low-code stream '{declarative_stream.name}' uses interpolation of stream_state in the HttpRequester which is not thread-safe. Defaulting to synchronous processing"
|
435
|
-
)
|
436
|
-
return False
|
437
|
-
|
438
|
-
request_options_provider = http_requester._request_options_provider
|
439
|
-
if request_options_provider.request_options_contain_stream_state():
|
440
|
-
self.logger.warning(
|
441
|
-
f"Low-code stream '{declarative_stream.name}' uses interpolation of stream_state in the HttpRequester which is not thread-safe. Defaulting to synchronous processing"
|
442
|
-
)
|
443
|
-
return False
|
444
|
-
|
445
|
-
record_selector = declarative_stream.retriever.record_selector
|
446
|
-
if isinstance(record_selector, RecordSelector):
|
447
|
-
if (
|
448
|
-
record_selector.record_filter
|
449
|
-
and not isinstance(
|
450
|
-
record_selector.record_filter, ClientSideIncrementalRecordFilterDecorator
|
451
|
-
)
|
452
|
-
and "stream_state" in record_selector.record_filter.condition
|
453
|
-
):
|
454
|
-
self.logger.warning(
|
455
|
-
f"Low-code stream '{declarative_stream.name}' uses interpolation of stream_state in the RecordFilter which is not thread-safe. Defaulting to synchronous processing"
|
456
|
-
)
|
457
|
-
return False
|
458
|
-
|
459
|
-
for add_fields in [
|
460
|
-
transformation
|
461
|
-
for transformation in record_selector.transformations
|
462
|
-
if isinstance(transformation, AddFields)
|
463
|
-
]:
|
464
|
-
for field in add_fields.fields:
|
465
|
-
if isinstance(field.value, str) and "stream_state" in field.value:
|
466
|
-
self.logger.warning(
|
467
|
-
f"Low-code stream '{declarative_stream.name}' uses interpolation of stream_state in the AddFields which is not thread-safe. Defaulting to synchronous processing"
|
468
|
-
)
|
469
|
-
return False
|
470
|
-
if (
|
471
|
-
isinstance(field.value, InterpolatedString)
|
472
|
-
and "stream_state" in field.value.string
|
473
|
-
):
|
474
|
-
self.logger.warning(
|
475
|
-
f"Low-code stream '{declarative_stream.name}' uses interpolation of stream_state in the AddFields which is not thread-safe. Defaulting to synchronous processing"
|
476
|
-
)
|
477
|
-
return False
|
478
|
-
return True
|
479
|
-
|
480
409
|
@staticmethod
|
481
410
|
def _get_retriever(
|
482
411
|
declarative_stream: DeclarativeStream, stream_state: Mapping[str, Any]
|
@@ -489,10 +418,21 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
489
418
|
# Also a temporary hack. In the legacy Stream implementation, as part of the read,
|
490
419
|
# set_initial_state() is called to instantiate incoming state on the cursor. Although we no
|
491
420
|
# longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components
|
492
|
-
# like StopConditionPaginationStrategyDecorator
|
493
|
-
#
|
421
|
+
# like StopConditionPaginationStrategyDecorator still rely on a DatetimeBasedCursor that is
|
422
|
+
# properly initialized with state.
|
494
423
|
if retriever.cursor:
|
495
424
|
retriever.cursor.set_initial_state(stream_state=stream_state)
|
425
|
+
|
426
|
+
# Similar to above, the ClientSideIncrementalRecordFilterDecorator cursor is a separate instance
|
427
|
+
# from the one initialized on the SimpleRetriever, so it also must also have state initialized
|
428
|
+
# for semi-incremental streams using is_client_side_incremental to filter properly
|
429
|
+
if isinstance(retriever.record_selector, RecordSelector) and isinstance(
|
430
|
+
retriever.record_selector.record_filter, ClientSideIncrementalRecordFilterDecorator
|
431
|
+
):
|
432
|
+
retriever.record_selector.record_filter._cursor.set_initial_state(
|
433
|
+
stream_state=stream_state
|
434
|
+
) # type: ignore # After non-concurrent cursors are deprecated we can remove these cursor workarounds
|
435
|
+
|
496
436
|
# We zero it out here, but since this is a cursor reference, the state is still properly
|
497
437
|
# instantiated for the other components that reference it
|
498
438
|
retriever.cursor = None
|
@@ -524,3 +464,14 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
524
464
|
if stream.stream.name not in concurrent_stream_names
|
525
465
|
]
|
526
466
|
)
|
467
|
+
|
468
|
+
@staticmethod
|
469
|
+
def _migrate_state(
|
470
|
+
declarative_stream: DeclarativeStream, stream_state: MutableMapping[str, Any]
|
471
|
+
) -> MutableMapping[str, Any]:
|
472
|
+
for state_migration in declarative_stream.state_migrations:
|
473
|
+
if state_migration.should_migrate(stream_state):
|
474
|
+
# The state variable is expected to be mutable but the migrate method returns an immutable mapping.
|
475
|
+
stream_state = dict(state_migration.migrate(stream_state))
|
476
|
+
|
477
|
+
return stream_state
|